lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision integer
  11 // constant values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "apint"
  16 #include "llvm/ADT/APInt.h"
  17 #include "llvm/ADT/FoldingSet.h"
  18 #include "llvm/ADT/SmallString.h"
  19 #include "llvm/Support/Debug.h"
  20 #include "llvm/Support/MathExtras.h"
  21 #include "llvm/Support/raw_ostream.h"
  22 #include <cmath>
  23 #include <limits>
  24 #include <cstring>
  25 #include <cstdlib>
  26 using namespace llvm;
  27
  28 /// A utility function for allocating memory, checking for allocation failures,
  29 /// and ensuring the contents are zeroed.
  30 inline static uint64_t* getClearedMemory(unsigned numWords) {
  31   uint64_t * result = new uint64_t[numWords];
  32   assert(result && "APInt memory allocation fails!");
  33   memset(result, 0, numWords * sizeof(uint64_t));
  34   return result;
  35 }
  36
  37 /// A utility function for allocating memory and checking for allocation
  38 /// failure.  The content is not zeroed.
  39 inline static uint64_t* getMemory(unsigned numWords) {
  40   uint64_t * result = new uint64_t[numWords];
  41   assert(result && "APInt memory allocation fails!");
  42   return result;
  43 }
  44
  45 void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
  46   pVal = getClearedMemory(getNumWords());
  47   pVal[0] = val;
  48   if (isSigned && int64_t(val) < 0)
  49     for (unsigned i = 1; i < getNumWords(); ++i)
  50       pVal[i] = -1ULL;
  51 }
  52
  53 void APInt::initSlowCase(const APInt& that) {
  54   pVal = getMemory(getNumWords());
  55   memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
  56 }
  57
  58
  59 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
  60   : BitWidth(numBits), VAL(0) {
  61   assert(BitWidth && "bitwidth too small");
  62   assert(bigVal && "Null pointer detected!");
  63   if (isSingleWord())
  64     VAL = bigVal[0];
  65   else {
  66     // Get memory, cleared to 0
  67     pVal = getClearedMemory(getNumWords());
  68     // Calculate the number of words to copy
  69     unsigned words = std::min<unsigned>(numWords, getNumWords());
  70     // Copy the words from bigVal to pVal
  71     memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
  72   }
  73   // Make sure unused high bits are cleared
  74   clearUnusedBits();
  75 }
  76
  77 APInt::APInt(unsigned numbits, const char StrStart[], unsigned slen,
  78              uint8_t radix)
  79   : BitWidth(numbits), VAL(0) {
  80   assert(BitWidth && "bitwidth too small");
  81   fromString(numbits, StrStart, slen, radix);
  82 }
  83
  84 APInt& APInt::AssignSlowCase(const APInt& RHS) {
  85   // Don't do anything for X = X
  86   if (this == &RHS)
  87     return *this;
  88
  89   if (BitWidth == RHS.getBitWidth()) {
  90     // assume same bit-width single-word case is already handled
  91     assert(!isSingleWord());
  92     memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
  93     return *this;
  94   }
  95
  96   if (isSingleWord()) {
  97     // assume case where both are single words is already handled
  98     assert(!RHS.isSingleWord());
  99     VAL = 0;
 100     pVal = getMemory(RHS.getNumWords());
 101     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 102   } else if (getNumWords() == RHS.getNumWords())
 103     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 104   else if (RHS.isSingleWord()) {
 105     delete [] pVal;
 106     VAL = RHS.VAL;
 107   } else {
 108     delete [] pVal;
 109     pVal = getMemory(RHS.getNumWords());
 110     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 111   }
 112   BitWidth = RHS.BitWidth;
 113   return clearUnusedBits();
 114 }
 115
 116 APInt& APInt::operator=(uint64_t RHS) {
 117   if (isSingleWord())
 118     VAL = RHS;
 119   else {
 120     pVal[0] = RHS;
 121     memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
 122   }
 123   return clearUnusedBits();
 124 }
 125
 126 /// Profile - This method 'profiles' an APInt for use with FoldingSet.
 127 void APInt::Profile(FoldingSetNodeID& ID) const {
 128   ID.AddInteger(BitWidth);
 129
 130   if (isSingleWord()) {
 131     ID.AddInteger(VAL);
 132     return;
 133   }
 134
 135   unsigned NumWords = getNumWords();
 136   for (unsigned i = 0; i < NumWords; ++i)
 137     ID.AddInteger(pVal[i]);
 138 }
 139
 140 /// add_1 - This function adds a single "digit" integer, y, to the multiple
 141 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
 142 /// 1 is returned if there is a carry out, otherwise 0 is returned.
 143 /// @returns the carry of the addition.
 144 static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 145   for (unsigned i = 0; i < len; ++i) {
 146     dest[i] = y + x[i];
 147     if (dest[i] < y)
 148       y = 1; // Carry one to next digit.
 149     else {
 150       y = 0; // No need to carry so exit early
 151       break;
 152     }
 153   }
 154   return y;
 155 }
 156
 157 /// @brief Prefix increment operator. Increments the APInt by one.
 158 APInt& APInt::operator++() {
 159   if (isSingleWord())
 160     ++VAL;
 161   else
 162     add_1(pVal, pVal, getNumWords(), 1);
 163   return clearUnusedBits();
 164 }
 165
 166 /// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
 167 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
 168 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
 169 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
 170 /// In other words, if y > x then this function returns 1, otherwise 0.
 171 /// @returns the borrow out of the subtraction
 172 static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
 173   for (unsigned i = 0; i < len; ++i) {
 174     uint64_t X = x[i];
 175     x[i] -= y;
 176     if (y > X)
 177       y = 1;  // We have to "borrow 1" from next "digit"
 178     else {
 179       y = 0;  // No need to borrow
 180       break;  // Remaining digits are unchanged so exit early
 181     }
 182   }
 183   return bool(y);
 184 }
 185
 186 /// @brief Prefix decrement operator. Decrements the APInt by one.
 187 APInt& APInt::operator--() {
 188   if (isSingleWord())
 189     --VAL;
 190   else
 191     sub_1(pVal, getNumWords(), 1);
 192   return clearUnusedBits();
 193 }
 194
 195 /// add - This function adds the integer array x to the integer array Y and
 196 /// places the result in dest.
 197 /// @returns the carry out from the addition
 198 /// @brief General addition of 64-bit integer arrays
 199 static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 200                 unsigned len) {
 201   bool carry = false;
 202   for (unsigned i = 0; i< len; ++i) {
 203     uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
 204     dest[i] = x[i] + y[i] + carry;
 205     carry = dest[i] < limit || (carry && dest[i] == limit);
 206   }
 207   return carry;
 208 }
 209
 210 /// Adds the RHS APint to this APInt.
 211 /// @returns this, after addition of RHS.
 212 /// @brief Addition assignment operator.
 213 APInt& APInt::operator+=(const APInt& RHS) {
 214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 215   if (isSingleWord())
 216     VAL += RHS.VAL;
 217   else {
 218     add(pVal, pVal, RHS.pVal, getNumWords());
 219   }
 220   return clearUnusedBits();
 221 }
 222
 223 /// Subtracts the integer array y from the integer array x
 224 /// @returns returns the borrow out.
 225 /// @brief Generalized subtraction of 64-bit integer arrays.
 226 static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 227                 unsigned len) {
 228   bool borrow = false;
 229   for (unsigned i = 0; i < len; ++i) {
 230     uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
 231     borrow = y[i] > x_tmp || (borrow && x[i] == 0);
 232     dest[i] = x_tmp - y[i];
 233   }
 234   return borrow;
 235 }
 236
 237 /// Subtracts the RHS APInt from this APInt
 238 /// @returns this, after subtraction
 239 /// @brief Subtraction assignment operator.
 240 APInt& APInt::operator-=(const APInt& RHS) {
 241   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 242   if (isSingleWord())
 243     VAL -= RHS.VAL;
 244   else
 245     sub(pVal, pVal, RHS.pVal, getNumWords());
 246   return clearUnusedBits();
 247 }
 248
 249 /// Multiplies an integer array, x by a a uint64_t integer and places the result
 250 /// into dest.
 251 /// @returns the carry out of the multiplication.
 252 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
 253 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 254   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
 255   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
 256   uint64_t carry = 0;
 257
 258   // For each digit of x.
 259   for (unsigned i = 0; i < len; ++i) {
 260     // Split x into high and low words
 261     uint64_t lx = x[i] & 0xffffffffULL;
 262     uint64_t hx = x[i] >> 32;
 263     // hasCarry - A flag to indicate if there is a carry to the next digit.
 264     // hasCarry == 0, no carry
 265     // hasCarry == 1, has carry
 266     // hasCarry == 2, no carry and the calculation result == 0.
 267     uint8_t hasCarry = 0;
 268     dest[i] = carry + lx * ly;
 269     // Determine if the add above introduces carry.
 270     hasCarry = (dest[i] < carry) ? 1 : 0;
 271     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
 272     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
 273     // (2^32 - 1) + 2^32 = 2^64.
 274     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 275
 276     carry += (lx * hy) & 0xffffffffULL;
 277     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
 278     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
 279             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
 280   }
 281   return carry;
 282 }
 283
 284 /// Multiplies integer array x by integer array y and stores the result into
 285 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
 286 /// @brief Generalized multiplicate of integer arrays.
 287 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
 288                 unsigned ylen) {
 289   dest[xlen] = mul_1(dest, x, xlen, y[0]);
 290   for (unsigned i = 1; i < ylen; ++i) {
 291     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
 292     uint64_t carry = 0, lx = 0, hx = 0;
 293     for (unsigned j = 0; j < xlen; ++j) {
 294       lx = x[j] & 0xffffffffULL;
 295       hx = x[j] >> 32;
 296       // hasCarry - A flag to indicate if has carry.
 297       // hasCarry == 0, no carry
 298       // hasCarry == 1, has carry
 299       // hasCarry == 2, no carry and the calculation result == 0.
 300       uint8_t hasCarry = 0;
 301       uint64_t resul = carry + lx * ly;
 302       hasCarry = (resul < carry) ? 1 : 0;
 303       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
 304       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 305
 306       carry += (lx * hy) & 0xffffffffULL;
 307       resul = (carry << 32) | (resul & 0xffffffffULL);
 308       dest[i+j] += resul;
 309       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
 310               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
 311               ((lx * hy) >> 32) + hx * hy;
 312     }
 313     dest[i+xlen] = carry;
 314   }
 315 }
 316
 317 APInt& APInt::operator*=(const APInt& RHS) {
 318   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 319   if (isSingleWord()) {
 320     VAL *= RHS.VAL;
 321     clearUnusedBits();
 322     return *this;
 323   }
 324
 325   // Get some bit facts about LHS and check for zero
 326   unsigned lhsBits = getActiveBits();
 327   unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
 328   if (!lhsWords)
 329     // 0 * X ===> 0
 330     return *this;
 331
 332   // Get some bit facts about RHS and check for zero
 333   unsigned rhsBits = RHS.getActiveBits();
 334   unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
 335   if (!rhsWords) {
 336     // X * 0 ===> 0
 337     clear();
 338     return *this;
 339   }
 340
 341   // Allocate space for the result
 342   unsigned destWords = rhsWords + lhsWords;
 343   uint64_t *dest = getMemory(destWords);
 344
 345   // Perform the long multiply
 346   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
 347
 348   // Copy result back into *this
 349   clear();
 350   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
 351   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
 352
 353   // delete dest array and return
 354   delete[] dest;
 355   return *this;
 356 }
 357
 358 APInt& APInt::operator&=(const APInt& RHS) {
 359   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 360   if (isSingleWord()) {
 361     VAL &= RHS.VAL;
 362     return *this;
 363   }
 364   unsigned numWords = getNumWords();
 365   for (unsigned i = 0; i < numWords; ++i)
 366     pVal[i] &= RHS.pVal[i];
 367   return *this;
 368 }
 369
 370 APInt& APInt::operator|=(const APInt& RHS) {
 371   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 372   if (isSingleWord()) {
 373     VAL |= RHS.VAL;
 374     return *this;
 375   }
 376   unsigned numWords = getNumWords();
 377   for (unsigned i = 0; i < numWords; ++i)
 378     pVal[i] |= RHS.pVal[i];
 379   return *this;
 380 }
 381
 382 APInt& APInt::operator^=(const APInt& RHS) {
 383   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 384   if (isSingleWord()) {
 385     VAL ^= RHS.VAL;
 386     this->clearUnusedBits();
 387     return *this;
 388   }
 389   unsigned numWords = getNumWords();
 390   for (unsigned i = 0; i < numWords; ++i)
 391     pVal[i] ^= RHS.pVal[i];
 392   return clearUnusedBits();
 393 }
 394
 395 APInt APInt::AndSlowCase(const APInt& RHS) const {
 396   unsigned numWords = getNumWords();
 397   uint64_t* val = getMemory(numWords);
 398   for (unsigned i = 0; i < numWords; ++i)
 399     val[i] = pVal[i] & RHS.pVal[i];
 400   return APInt(val, getBitWidth());
 401 }
 402
 403 APInt APInt::OrSlowCase(const APInt& RHS) const {
 404   unsigned numWords = getNumWords();
 405   uint64_t *val = getMemory(numWords);
 406   for (unsigned i = 0; i < numWords; ++i)
 407     val[i] = pVal[i] | RHS.pVal[i];
 408   return APInt(val, getBitWidth());
 409 }
 410
 411 APInt APInt::XorSlowCase(const APInt& RHS) const {
 412   unsigned numWords = getNumWords();
 413   uint64_t *val = getMemory(numWords);
 414   for (unsigned i = 0; i < numWords; ++i)
 415     val[i] = pVal[i] ^ RHS.pVal[i];
 416
 417   // 0^0==1 so clear the high bits in case they got set.
 418   return APInt(val, getBitWidth()).clearUnusedBits();
 419 }
 420
 421 bool APInt::operator !() const {
 422   if (isSingleWord())
 423     return !VAL;
 424
 425   for (unsigned i = 0; i < getNumWords(); ++i)
 426     if (pVal[i])
 427       return false;
 428   return true;
 429 }
 430
 431 APInt APInt::operator*(const APInt& RHS) const {
 432   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 433   if (isSingleWord())
 434     return APInt(BitWidth, VAL * RHS.VAL);
 435   APInt Result(*this);
 436   Result *= RHS;
 437   return Result.clearUnusedBits();
 438 }
 439
 440 APInt APInt::operator+(const APInt& RHS) const {
 441   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 442   if (isSingleWord())
 443     return APInt(BitWidth, VAL + RHS.VAL);
 444   APInt Result(BitWidth, 0);
 445   add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 446   return Result.clearUnusedBits();
 447 }
 448
 449 APInt APInt::operator-(const APInt& RHS) const {
 450   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 451   if (isSingleWord())
 452     return APInt(BitWidth, VAL - RHS.VAL);
 453   APInt Result(BitWidth, 0);
 454   sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 455   return Result.clearUnusedBits();
 456 }
 457
 458 bool APInt::operator[](unsigned bitPosition) const {
 459   return (maskBit(bitPosition) &
 460           (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
 461 }
 462
 463 bool APInt::EqualSlowCase(const APInt& RHS) const {
 464   // Get some facts about the number of bits used in the two operands.
 465   unsigned n1 = getActiveBits();
 466   unsigned n2 = RHS.getActiveBits();
 467
 468   // If the number of bits isn't the same, they aren't equal
 469   if (n1 != n2)
 470     return false;
 471
 472   // If the number of bits fits in a word, we only need to compare the low word.
 473   if (n1 <= APINT_BITS_PER_WORD)
 474     return pVal[0] == RHS.pVal[0];
 475
 476   // Otherwise, compare everything
 477   for (int i = whichWord(n1 - 1); i >= 0; --i)
 478     if (pVal[i] != RHS.pVal[i])
 479       return false;
 480   return true;
 481 }
 482
 483 bool APInt::EqualSlowCase(uint64_t Val) const {
 484   unsigned n = getActiveBits();
 485   if (n <= APINT_BITS_PER_WORD)
 486     return pVal[0] == Val;
 487   else
 488     return false;
 489 }
 490
 491 bool APInt::ult(const APInt& RHS) const {
 492   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 493   if (isSingleWord())
 494     return VAL < RHS.VAL;
 495
 496   // Get active bit length of both operands
 497   unsigned n1 = getActiveBits();
 498   unsigned n2 = RHS.getActiveBits();
 499
 500   // If magnitude of LHS is less than RHS, return true.
 501   if (n1 < n2)
 502     return true;
 503
 504   // If magnitude of RHS is greather than LHS, return false.
 505   if (n2 < n1)
 506     return false;
 507
 508   // If they bot fit in a word, just compare the low order word
 509   if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
 510     return pVal[0] < RHS.pVal[0];
 511
 512   // Otherwise, compare all words
 513   unsigned topWord = whichWord(std::max(n1,n2)-1);
 514   for (int i = topWord; i >= 0; --i) {
 515     if (pVal[i] > RHS.pVal[i])
 516       return false;
 517     if (pVal[i] < RHS.pVal[i])
 518       return true;
 519   }
 520   return false;
 521 }
 522
 523 bool APInt::slt(const APInt& RHS) const {
 524   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 525   if (isSingleWord()) {
 526     int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
 527     int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
 528     return lhsSext < rhsSext;
 529   }
 530
 531   APInt lhs(*this);
 532   APInt rhs(RHS);
 533   bool lhsNeg = isNegative();
 534   bool rhsNeg = rhs.isNegative();
 535   if (lhsNeg) {
 536     // Sign bit is set so perform two's complement to make it positive
 537     lhs.flip();
 538     lhs++;
 539   }
 540   if (rhsNeg) {
 541     // Sign bit is set so perform two's complement to make it positive
 542     rhs.flip();
 543     rhs++;
 544   }
 545
 546   // Now we have unsigned values to compare so do the comparison if necessary
 547   // based on the negativeness of the values.
 548   if (lhsNeg)
 549     if (rhsNeg)
 550       return lhs.ugt(rhs);
 551     else
 552       return true;
 553   else if (rhsNeg)
 554     return false;
 555   else
 556     return lhs.ult(rhs);
 557 }
 558
 559 APInt& APInt::set(unsigned bitPosition) {
 560   if (isSingleWord())
 561     VAL |= maskBit(bitPosition);
 562   else
 563     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
 564   return *this;
 565 }
 566
 567 /// Set the given bit to 0 whose position is given as "bitPosition".
 568 /// @brief Set a given bit to 0.
 569 APInt& APInt::clear(unsigned bitPosition) {
 570   if (isSingleWord())
 571     VAL &= ~maskBit(bitPosition);
 572   else
 573     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
 574   return *this;
 575 }
 576
 577 /// @brief Toggle every bit to its opposite value.
 578
 579 /// Toggle a given bit to its opposite value whose position is given
 580 /// as "bitPosition".
 581 /// @brief Toggles a given bit to its opposite value.
 582 APInt& APInt::flip(unsigned bitPosition) {
 583   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 584   if ((*this)[bitPosition]) clear(bitPosition);
 585   else set(bitPosition);
 586   return *this;
 587 }
 588
 589 unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) {
 590   assert(str != 0 && "Invalid value string");
 591   assert(slen > 0 && "Invalid string length");
 592
 593   // Each computation below needs to know if its negative
 594   unsigned isNegative = str[0] == '-';
 595   if (isNegative) {
 596     slen--;
 597     str++;
 598   }
 599   // For radixes of power-of-two values, the bits required is accurately and
 600   // easily computed
 601   if (radix == 2)
 602     return slen + isNegative;
 603   if (radix == 8)
 604     return slen * 3 + isNegative;
 605   if (radix == 16)
 606     return slen * 4 + isNegative;
 607
 608   // Otherwise it must be radix == 10, the hard case
 609   assert(radix == 10 && "Invalid radix");
 610
 611   // This is grossly inefficient but accurate. We could probably do something
 612   // with a computation of roughly slen*64/20 and then adjust by the value of
 613   // the first few digits. But, I'm not sure how accurate that could be.
 614
 615   // Compute a sufficient number of bits that is always large enough but might
 616   // be too large. This avoids the assertion in the constructor.
 617   unsigned sufficient = slen*64/18;
 618
 619   // Convert to the actual binary value.
 620   APInt tmp(sufficient, str, slen, radix);
 621
 622   // Compute how many bits are required.
 623   return isNegative + tmp.logBase2() + 1;
 624 }
 625
 626 // From http://www.burtleburtle.net, byBob Jenkins.
 627 // When targeting x86, both GCC and LLVM seem to recognize this as a
 628 // rotate instruction.
 629 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
 630
 631 // From http://www.burtleburtle.net, by Bob Jenkins.
 632 #define mix(a,b,c) \
 633   { \
 634     a -= c;  a ^= rot(c, 4);  c += b; \
 635     b -= a;  b ^= rot(a, 6);  a += c; \
 636     c -= b;  c ^= rot(b, 8);  b += a; \
 637     a -= c;  a ^= rot(c,16);  c += b; \
 638     b -= a;  b ^= rot(a,19);  a += c; \
 639     c -= b;  c ^= rot(b, 4);  b += a; \
 640   }
 641
 642 // From http://www.burtleburtle.net, by Bob Jenkins.
 643 #define final(a,b,c) \
 644   { \
 645     c ^= b; c -= rot(b,14); \
 646     a ^= c; a -= rot(c,11); \
 647     b ^= a; b -= rot(a,25); \
 648     c ^= b; c -= rot(b,16); \
 649     a ^= c; a -= rot(c,4);  \
 650     b ^= a; b -= rot(a,14); \
 651     c ^= b; c -= rot(b,24); \
 652   }
 653
 654 // hashword() was adapted from http://www.burtleburtle.net, by Bob
 655 // Jenkins.  k is a pointer to an array of uint32_t values; length is
 656 // the length of the key, in 32-bit chunks.  This version only handles
 657 // keys that are a multiple of 32 bits in size.
 658 static inline uint32_t hashword(const uint64_t *k64, size_t length)
 659 {
 660   const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
 661   uint32_t a,b,c;
 662
 663   /* Set up the internal state */
 664   a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
 665
 666   /*------------------------------------------------- handle most of the key */
 667   while (length > 3)
 668     {
 669       a += k[0];
 670       b += k[1];
 671       c += k[2];
 672       mix(a,b,c);
 673       length -= 3;
 674       k += 3;
 675     }
 676
 677   /*------------------------------------------- handle the last 3 uint32_t's */
 678   switch(length)                     /* all the case statements fall through */
 679     {
 680     case 3 : c+=k[2];
 681     case 2 : b+=k[1];
 682     case 1 : a+=k[0];
 683       final(a,b,c);
 684     case 0:     /* case 0: nothing left to add */
 685       break;
 686     }
 687   /*------------------------------------------------------ report the result */
 688   return c;
 689 }
 690
 691 // hashword8() was adapted from http://www.burtleburtle.net, by Bob
 692 // Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
 693 // targeting x86 (32 or 64 bit), both LLVM and GCC compile this
 694 // function into about 35 instructions when inlined.
 695 static inline uint32_t hashword8(const uint64_t k64)
 696 {
 697   uint32_t a,b,c;
 698   a = b = c = 0xdeadbeef + 4;
 699   b += k64 >> 32;
 700   a += k64 & 0xffffffff;
 701   final(a,b,c);
 702   return c;
 703 }
 704 #undef final
 705 #undef mix
 706 #undef rot
 707
 708 uint64_t APInt::getHashValue() const {
 709   uint64_t hash;
 710   if (isSingleWord())
 711     hash = hashword8(VAL);
 712   else
 713     hash = hashword(pVal, getNumWords()*2);
 714   return hash;
 715 }
 716
 717 /// HiBits - This function returns the high "numBits" bits of this APInt.
 718 APInt APInt::getHiBits(unsigned numBits) const {
 719   return APIntOps::lshr(*this, BitWidth - numBits);
 720 }
 721
 722 /// LoBits - This function returns the low "numBits" bits of this APInt.
 723 APInt APInt::getLoBits(unsigned numBits) const {
 724   return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
 725                         BitWidth - numBits);
 726 }
 727
 728 bool APInt::isPowerOf2() const {
 729   return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
 730 }
 731
 732 unsigned APInt::countLeadingZerosSlowCase() const {
 733   unsigned Count = 0;
 734   for (unsigned i = getNumWords(); i > 0u; --i) {
 735     if (pVal[i-1] == 0)
 736       Count += APINT_BITS_PER_WORD;
 737     else {
 738       Count += CountLeadingZeros_64(pVal[i-1]);
 739       break;
 740     }
 741   }
 742   unsigned remainder = BitWidth % APINT_BITS_PER_WORD;
 743   if (remainder)
 744     Count -= APINT_BITS_PER_WORD - remainder;
 745   return std::min(Count, BitWidth);
 746 }
 747
 748 static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
 749   unsigned Count = 0;
 750   if (skip)
 751     V <<= skip;
 752   while (V && (V & (1ULL << 63))) {
 753     Count++;
 754     V <<= 1;
 755   }
 756   return Count;
 757 }
 758
 759 unsigned APInt::countLeadingOnes() const {
 760   if (isSingleWord())
 761     return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
 762
 763   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 764   unsigned shift;
 765   if (!highWordBits) {
 766     highWordBits = APINT_BITS_PER_WORD;
 767     shift = 0;
 768   } else {
 769     shift = APINT_BITS_PER_WORD - highWordBits;
 770   }
 771   int i = getNumWords() - 1;
 772   unsigned Count = countLeadingOnes_64(pVal[i], shift);
 773   if (Count == highWordBits) {
 774     for (i--; i >= 0; --i) {
 775       if (pVal[i] == -1ULL)
 776         Count += APINT_BITS_PER_WORD;
 777       else {
 778         Count += countLeadingOnes_64(pVal[i], 0);
 779         break;
 780       }
 781     }
 782   }
 783   return Count;
 784 }
 785
 786 unsigned APInt::countTrailingZeros() const {
 787   if (isSingleWord())
 788     return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
 789   unsigned Count = 0;
 790   unsigned i = 0;
 791   for (; i < getNumWords() && pVal[i] == 0; ++i)
 792     Count += APINT_BITS_PER_WORD;
 793   if (i < getNumWords())
 794     Count += CountTrailingZeros_64(pVal[i]);
 795   return std::min(Count, BitWidth);
 796 }
 797
 798 unsigned APInt::countTrailingOnesSlowCase() const {
 799   unsigned Count = 0;
 800   unsigned i = 0;
 801   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
 802     Count += APINT_BITS_PER_WORD;
 803   if (i < getNumWords())
 804     Count += CountTrailingOnes_64(pVal[i]);
 805   return std::min(Count, BitWidth);
 806 }
 807
 808 unsigned APInt::countPopulationSlowCase() const {
 809   unsigned Count = 0;
 810   for (unsigned i = 0; i < getNumWords(); ++i)
 811     Count += CountPopulation_64(pVal[i]);
 812   return Count;
 813 }
 814
 815 APInt APInt::byteSwap() const {
 816   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 817   if (BitWidth == 16)
 818     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
 819   else if (BitWidth == 32)
 820     return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
 821   else if (BitWidth == 48) {
 822     unsigned Tmp1 = unsigned(VAL >> 16);
 823     Tmp1 = ByteSwap_32(Tmp1);
 824     uint16_t Tmp2 = uint16_t(VAL);
 825     Tmp2 = ByteSwap_16(Tmp2);
 826     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 827   } else if (BitWidth == 64)
 828     return APInt(BitWidth, ByteSwap_64(VAL));
 829   else {
 830     APInt Result(BitWidth, 0);
 831     char *pByte = (char*)Result.pVal;
 832     for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
 833       char Tmp = pByte[i];
 834       pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
 835       pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
 836     }
 837     return Result;
 838   }
 839 }
 840
 841 APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
 842                                             const APInt& API2) {
 843   APInt A = API1, B = API2;
 844   while (!!B) {
 845     APInt T = B;
 846     B = APIntOps::urem(A, B);
 847     A = T;
 848   }
 849   return A;
 850 }
 851
 852 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 853   union {
 854     double D;
 855     uint64_t I;
 856   } T;
 857   T.D = Double;
 858
 859   // Get the sign bit from the highest order bit
 860   bool isNeg = T.I >> 63;
 861
 862   // Get the 11-bit exponent and adjust for the 1023 bit bias
 863   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
 864
 865   // If the exponent is negative, the value is < 0 so just return 0.
 866   if (exp < 0)
 867     return APInt(width, 0u);
 868
 869   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 870   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
 871
 872   // If the exponent doesn't shift all bits out of the mantissa
 873   if (exp < 52)
 874     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 875                     APInt(width, mantissa >> (52 - exp));
 876
 877   // If the client didn't provide enough bits for us to shift the mantissa into
 878   // then the result is undefined, just return 0
 879   if (width <= exp - 52)
 880     return APInt(width, 0);
 881
 882   // Otherwise, we have to shift the mantissa bits up to the right location
 883   APInt Tmp(width, mantissa);
 884   Tmp = Tmp.shl((unsigned)exp - 52);
 885   return isNeg ? -Tmp : Tmp;
 886 }
 887
 888 /// RoundToDouble - This function convert this APInt to a double.
 889 /// The layout for double is as following (IEEE Standard 754):
 890 ///  --------------------------------------
 891 /// |  Sign    Exponent    Fraction    Bias |
 892 /// |-------------------------------------- |
 893 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 894 ///  --------------------------------------
 895 double APInt::roundToDouble(bool isSigned) const {
 896
 897   // Handle the simple case where the value is contained in one uint64_t.
 898   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 899     if (isSigned) {
 900       int64_t sext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
 901       return double(sext);
 902     } else
 903       return double(VAL);
 904   }
 905
 906   // Determine if the value is negative.
 907   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 908
 909   // Construct the absolute value if we're negative.
 910   APInt Tmp(isNeg ? -(*this) : (*this));
 911
 912   // Figure out how many bits we're using.
 913   unsigned n = Tmp.getActiveBits();
 914
 915   // The exponent (without bias normalization) is just the number of bits
 916   // we are using. Note that the sign bit is gone since we constructed the
 917   // absolute value.
 918   uint64_t exp = n;
 919
 920   // Return infinity for exponent overflow
 921   if (exp > 1023) {
 922     if (!isSigned || !isNeg)
 923       return std::numeric_limits<double>::infinity();
 924     else
 925       return -std::numeric_limits<double>::infinity();
 926   }
 927   exp += 1023; // Increment for 1023 bias
 928
 929   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 930   // extract the high 52 bits from the correct words in pVal.
 931   uint64_t mantissa;
 932   unsigned hiWord = whichWord(n-1);
 933   if (hiWord == 0) {
 934     mantissa = Tmp.pVal[0];
 935     if (n > 52)
 936       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 937   } else {
 938     assert(hiWord > 0 && "huh?");
 939     uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 940     uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 941     mantissa = hibits | lobits;
 942   }
 943
 944   // The leading bit of mantissa is implicit, so get rid of it.
 945   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 946   union {
 947     double D;
 948     uint64_t I;
 949   } T;
 950   T.I = sign | (exp << 52) | mantissa;
 951   return T.D;
 952 }
 953
 954 // Truncate to new width.
 955 APInt &APInt::trunc(unsigned width) {
 956   assert(width < BitWidth && "Invalid APInt Truncate request");
 957   assert(width && "Can't truncate to 0 bits");
 958   unsigned wordsBefore = getNumWords();
 959   BitWidth = width;
 960   unsigned wordsAfter = getNumWords();
 961   if (wordsBefore != wordsAfter) {
 962     if (wordsAfter == 1) {
 963       uint64_t *tmp = pVal;
 964       VAL = pVal[0];
 965       delete [] tmp;
 966     } else {
 967       uint64_t *newVal = getClearedMemory(wordsAfter);
 968       for (unsigned i = 0; i < wordsAfter; ++i)
 969         newVal[i] = pVal[i];
 970       delete [] pVal;
 971       pVal = newVal;
 972     }
 973   }
 974   return clearUnusedBits();
 975 }
 976
 977 // Sign extend to a new width.
 978 APInt &APInt::sext(unsigned width) {
 979   assert(width > BitWidth && "Invalid APInt SignExtend request");
 980   // If the sign bit isn't set, this is the same as zext.
 981   if (!isNegative()) {
 982     zext(width);
 983     return *this;
 984   }
 985
 986   // The sign bit is set. First, get some facts
 987   unsigned wordsBefore = getNumWords();
 988   unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
 989   BitWidth = width;
 990   unsigned wordsAfter = getNumWords();
 991
 992   // Mask the high order word appropriately
 993   if (wordsBefore == wordsAfter) {
 994     unsigned newWordBits = width % APINT_BITS_PER_WORD;
 995     // The extension is contained to the wordsBefore-1th word.
 996     uint64_t mask = ~0ULL;
 997     if (newWordBits)
 998       mask >>= APINT_BITS_PER_WORD - newWordBits;
 999     mask <<= wordBits;
1000     if (wordsBefore == 1)
1001       VAL |= mask;
1002     else
1003       pVal[wordsBefore-1] |= mask;
1004     return clearUnusedBits();
1005   }
1006
1007   uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1008   uint64_t *newVal = getMemory(wordsAfter);
1009   if (wordsBefore == 1)
1010     newVal[0] = VAL | mask;
1011   else {
1012     for (unsigned i = 0; i < wordsBefore; ++i)
1013       newVal[i] = pVal[i];
1014     newVal[wordsBefore-1] |= mask;
1015   }
1016   for (unsigned i = wordsBefore; i < wordsAfter; i++)
1017     newVal[i] = -1ULL;
1018   if (wordsBefore != 1)
1019     delete [] pVal;
1020   pVal = newVal;
1021   return clearUnusedBits();
1022 }
1023
1024 //  Zero extend to a new width.
1025 APInt &APInt::zext(unsigned width) {
1026   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1027   unsigned wordsBefore = getNumWords();
1028   BitWidth = width;
1029   unsigned wordsAfter = getNumWords();
1030   if (wordsBefore != wordsAfter) {
1031     uint64_t *newVal = getClearedMemory(wordsAfter);
1032     if (wordsBefore == 1)
1033       newVal[0] = VAL;
1034     else
1035       for (unsigned i = 0; i < wordsBefore; ++i)
1036         newVal[i] = pVal[i];
1037     if (wordsBefore != 1)
1038       delete [] pVal;
1039     pVal = newVal;
1040   }
1041   return *this;
1042 }
1043
1044 APInt &APInt::zextOrTrunc(unsigned width) {
1045   if (BitWidth < width)
1046     return zext(width);
1047   if (BitWidth > width)
1048     return trunc(width);
1049   return *this;
1050 }
1051
1052 APInt &APInt::sextOrTrunc(unsigned width) {
1053   if (BitWidth < width)
1054     return sext(width);
1055   if (BitWidth > width)
1056     return trunc(width);
1057   return *this;
1058 }
1059
1060 /// Arithmetic right-shift this APInt by shiftAmt.
1061 /// @brief Arithmetic right-shift function.
1062 APInt APInt::ashr(const APInt &shiftAmt) const {
1063   return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1064 }
1065
1066 /// Arithmetic right-shift this APInt by shiftAmt.
1067 /// @brief Arithmetic right-shift function.
1068 APInt APInt::ashr(unsigned shiftAmt) const {
1069   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1070   // Handle a degenerate case
1071   if (shiftAmt == 0)
1072     return *this;
1073
1074   // Handle single word shifts with built-in ashr
1075   if (isSingleWord()) {
1076     if (shiftAmt == BitWidth)
1077       return APInt(BitWidth, 0); // undefined
1078     else {
1079       unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1080       return APInt(BitWidth,
1081         (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1082     }
1083   }
1084
1085   // If all the bits were shifted out, the result is, technically, undefined.
1086   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1087   // issues in the algorithm below.
1088   if (shiftAmt == BitWidth) {
1089     if (isNegative())
1090       return APInt(BitWidth, -1ULL, true);
1091     else
1092       return APInt(BitWidth, 0);
1093   }
1094
1095   // Create some space for the result.
1096   uint64_t * val = new uint64_t[getNumWords()];
1097
1098   // Compute some values needed by the following shift algorithms
1099   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1100   unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1101   unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1102   unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1103   if (bitsInWord == 0)
1104     bitsInWord = APINT_BITS_PER_WORD;
1105
1106   // If we are shifting whole words, just move whole words
1107   if (wordShift == 0) {
1108     // Move the words containing significant bits
1109     for (unsigned i = 0; i <= breakWord; ++i)
1110       val[i] = pVal[i+offset]; // move whole word
1111
1112     // Adjust the top significant word for sign bit fill, if negative
1113     if (isNegative())
1114       if (bitsInWord < APINT_BITS_PER_WORD)
1115         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1116   } else {
1117     // Shift the low order words
1118     for (unsigned i = 0; i < breakWord; ++i) {
1119       // This combines the shifted corresponding word with the low bits from
1120       // the next word (shifted into this word's high bits).
1121       val[i] = (pVal[i+offset] >> wordShift) |
1122                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1123     }
1124
1125     // Shift the break word. In this case there are no bits from the next word
1126     // to include in this word.
1127     val[breakWord] = pVal[breakWord+offset] >> wordShift;
1128
1129     // Deal with sign extenstion in the break word, and possibly the word before
1130     // it.
1131     if (isNegative()) {
1132       if (wordShift > bitsInWord) {
1133         if (breakWord > 0)
1134           val[breakWord-1] |=
1135             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1136         val[breakWord] |= ~0ULL;
1137       } else
1138         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1139     }
1140   }
1141
1142   // Remaining words are 0 or -1, just assign them.
1143   uint64_t fillValue = (isNegative() ? -1ULL : 0);
1144   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1145     val[i] = fillValue;
1146   return APInt(val, BitWidth).clearUnusedBits();
1147 }
1148
1149 /// Logical right-shift this APInt by shiftAmt.
1150 /// @brief Logical right-shift function.
1151 APInt APInt::lshr(const APInt &shiftAmt) const {
1152   return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1153 }
1154
1155 /// Logical right-shift this APInt by shiftAmt.
1156 /// @brief Logical right-shift function.
1157 APInt APInt::lshr(unsigned shiftAmt) const {
1158   if (isSingleWord()) {
1159     if (shiftAmt == BitWidth)
1160       return APInt(BitWidth, 0);
1161     else
1162       return APInt(BitWidth, this->VAL >> shiftAmt);
1163   }
1164
1165   // If all the bits were shifted out, the result is 0. This avoids issues
1166   // with shifting by the size of the integer type, which produces undefined
1167   // results. We define these "undefined results" to always be 0.
1168   if (shiftAmt == BitWidth)
1169     return APInt(BitWidth, 0);
1170
1171   // If none of the bits are shifted out, the result is *this. This avoids
1172   // issues with shifting by the size of the integer type, which produces
1173   // undefined results in the code below. This is also an optimization.
1174   if (shiftAmt == 0)
1175     return *this;
1176
1177   // Create some space for the result.
1178   uint64_t * val = new uint64_t[getNumWords()];
1179
1180   // If we are shifting less than a word, compute the shift with a simple carry
1181   if (shiftAmt < APINT_BITS_PER_WORD) {
1182     uint64_t carry = 0;
1183     for (int i = getNumWords()-1; i >= 0; --i) {
1184       val[i] = (pVal[i] >> shiftAmt) | carry;
1185       carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1186     }
1187     return APInt(val, BitWidth).clearUnusedBits();
1188   }
1189
1190   // Compute some values needed by the remaining shift algorithms
1191   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1192   unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1193
1194   // If we are shifting whole words, just move whole words
1195   if (wordShift == 0) {
1196     for (unsigned i = 0; i < getNumWords() - offset; ++i)
1197       val[i] = pVal[i+offset];
1198     for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1199       val[i] = 0;
1200     return APInt(val,BitWidth).clearUnusedBits();
1201   }
1202
1203   // Shift the low order words
1204   unsigned breakWord = getNumWords() - offset -1;
1205   for (unsigned i = 0; i < breakWord; ++i)
1206     val[i] = (pVal[i+offset] >> wordShift) |
1207              (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1208   // Shift the break word.
1209   val[breakWord] = pVal[breakWord+offset] >> wordShift;
1210
1211   // Remaining words are 0
1212   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1213     val[i] = 0;
1214   return APInt(val, BitWidth).clearUnusedBits();
1215 }
1216
1217 /// Left-shift this APInt by shiftAmt.
1218 /// @brief Left-shift function.
1219 APInt APInt::shl(const APInt &shiftAmt) const {
1220   // It's undefined behavior in C to shift by BitWidth or greater.
1221   return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1222 }
1223
1224 APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1225   // If all the bits were shifted out, the result is 0. This avoids issues
1226   // with shifting by the size of the integer type, which produces undefined
1227   // results. We define these "undefined results" to always be 0.
1228   if (shiftAmt == BitWidth)
1229     return APInt(BitWidth, 0);
1230
1231   // If none of the bits are shifted out, the result is *this. This avoids a
1232   // lshr by the words size in the loop below which can produce incorrect
1233   // results. It also avoids the expensive computation below for a common case.
1234   if (shiftAmt == 0)
1235     return *this;
1236
1237   // Create some space for the result.
1238   uint64_t * val = new uint64_t[getNumWords()];
1239
1240   // If we are shifting less than a word, do it the easy way
1241   if (shiftAmt < APINT_BITS_PER_WORD) {
1242     uint64_t carry = 0;
1243     for (unsigned i = 0; i < getNumWords(); i++) {
1244       val[i] = pVal[i] << shiftAmt | carry;
1245       carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1246     }
1247     return APInt(val, BitWidth).clearUnusedBits();
1248   }
1249
1250   // Compute some values needed by the remaining shift algorithms
1251   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1252   unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1253
1254   // If we are shifting whole words, just move whole words
1255   if (wordShift == 0) {
1256     for (unsigned i = 0; i < offset; i++)
1257       val[i] = 0;
1258     for (unsigned i = offset; i < getNumWords(); i++)
1259       val[i] = pVal[i-offset];
1260     return APInt(val,BitWidth).clearUnusedBits();
1261   }
1262
1263   // Copy whole words from this to Result.
1264   unsigned i = getNumWords() - 1;
1265   for (; i > offset; --i)
1266     val[i] = pVal[i-offset] << wordShift |
1267              pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1268   val[offset] = pVal[0] << wordShift;
1269   for (i = 0; i < offset; ++i)
1270     val[i] = 0;
1271   return APInt(val, BitWidth).clearUnusedBits();
1272 }
1273
1274 APInt APInt::rotl(const APInt &rotateAmt) const {
1275   return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1276 }
1277
1278 APInt APInt::rotl(unsigned rotateAmt) const {
1279   if (rotateAmt == 0)
1280     return *this;
1281   // Don't get too fancy, just use existing shift/or facilities
1282   APInt hi(*this);
1283   APInt lo(*this);
1284   hi.shl(rotateAmt);
1285   lo.lshr(BitWidth - rotateAmt);
1286   return hi | lo;
1287 }
1288
1289 APInt APInt::rotr(const APInt &rotateAmt) const {
1290   return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1291 }
1292
1293 APInt APInt::rotr(unsigned rotateAmt) const {
1294   if (rotateAmt == 0)
1295     return *this;
1296   // Don't get too fancy, just use existing shift/or facilities
1297   APInt hi(*this);
1298   APInt lo(*this);
1299   lo.lshr(rotateAmt);
1300   hi.shl(BitWidth - rotateAmt);
1301   return hi | lo;
1302 }
1303
1304 // Square Root - this method computes and returns the square root of "this".
1305 // Three mechanisms are used for computation. For small values (<= 5 bits),
1306 // a table lookup is done. This gets some performance for common cases. For
1307 // values using less than 52 bits, the value is converted to double and then
1308 // the libc sqrt function is called. The result is rounded and then converted
1309 // back to a uint64_t which is then used to construct the result. Finally,
1310 // the Babylonian method for computing square roots is used.
1311 APInt APInt::sqrt() const {
1312
1313   // Determine the magnitude of the value.
1314   unsigned magnitude = getActiveBits();
1315
1316   // Use a fast table for some small values. This also gets rid of some
1317   // rounding errors in libc sqrt for small values.
1318   if (magnitude <= 5) {
1319     static const uint8_t results[32] = {
1320       /*     0 */ 0,
1321       /*  1- 2 */ 1, 1,
1322       /*  3- 6 */ 2, 2, 2, 2,
1323       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1324       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1325       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1326       /*    31 */ 6
1327     };
1328     return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1329   }
1330
1331   // If the magnitude of the value fits in less than 52 bits (the precision of
1332   // an IEEE double precision floating point value), then we can use the
1333   // libc sqrt function which will probably use a hardware sqrt computation.
1334   // This should be faster than the algorithm below.
1335   if (magnitude < 52) {
1336 #ifdef _MSC_VER
1337     // Amazingly, VC++ doesn't have round().
1338     return APInt(BitWidth,
1339                  uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1340 #else
1341     return APInt(BitWidth,
1342                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1343 #endif
1344   }
1345
1346   // Okay, all the short cuts are exhausted. We must compute it. The following
1347   // is a classical Babylonian method for computing the square root. This code
1348   // was adapted to APINt from a wikipedia article on such computations.
1349   // See http://www.wikipedia.org/ and go to the page named
1350   // Calculate_an_integer_square_root.
1351   unsigned nbits = BitWidth, i = 4;
1352   APInt testy(BitWidth, 16);
1353   APInt x_old(BitWidth, 1);
1354   APInt x_new(BitWidth, 0);
1355   APInt two(BitWidth, 2);
1356
1357   // Select a good starting value using binary logarithms.
1358   for (;; i += 2, testy = testy.shl(2))
1359     if (i >= nbits || this->ule(testy)) {
1360       x_old = x_old.shl(i / 2);
1361       break;
1362     }
1363
1364   // Use the Babylonian method to arrive at the integer square root:
1365   for (;;) {
1366     x_new = (this->udiv(x_old) + x_old).udiv(two);
1367     if (x_old.ule(x_new))
1368       break;
1369     x_old = x_new;
1370   }
1371
1372   // Make sure we return the closest approximation
1373   // NOTE: The rounding calculation below is correct. It will produce an
1374   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1375   // determined to be a rounding issue with pari/gp as it begins to use a
1376   // floating point representation after 192 bits. There are no discrepancies
1377   // between this algorithm and pari/gp for bit widths < 192 bits.
1378   APInt square(x_old * x_old);
1379   APInt nextSquare((x_old + 1) * (x_old +1));
1380   if (this->ult(square))
1381     return x_old;
1382   else if (this->ule(nextSquare)) {
1383     APInt midpoint((nextSquare - square).udiv(two));
1384     APInt offset(*this - square);
1385     if (offset.ult(midpoint))
1386       return x_old;
1387     else
1388       return x_old + 1;
1389   } else
1390     assert(0 && "Error in APInt::sqrt computation");
1391   return x_old + 1;
1392 }
1393
1394 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1395 /// iterative extended Euclidean algorithm is used to solve for this value,
1396 /// however we simplify it to speed up calculating only the inverse, and take
1397 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1398 /// (potentially large) APInts around.
1399 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1400   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1401
1402   // Using the properties listed at the following web page (accessed 06/21/08):
1403   //   http://www.numbertheory.org/php/euclid.html
1404   // (especially the properties numbered 3, 4 and 9) it can be proved that
1405   // BitWidth bits suffice for all the computations in the algorithm implemented
1406   // below. More precisely, this number of bits suffice if the multiplicative
1407   // inverse exists, but may not suffice for the general extended Euclidean
1408   // algorithm.
1409
1410   APInt r[2] = { modulo, *this };
1411   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1412   APInt q(BitWidth, 0);
1413
1414   unsigned i;
1415   for (i = 0; r[i^1] != 0; i ^= 1) {
1416     // An overview of the math without the confusing bit-flipping:
1417     // q = r[i-2] / r[i-1]
1418     // r[i] = r[i-2] % r[i-1]
1419     // t[i] = t[i-2] - t[i-1] * q
1420     udivrem(r[i], r[i^1], q, r[i]);
1421     t[i] -= t[i^1] * q;
1422   }
1423
1424   // If this APInt and the modulo are not coprime, there is no multiplicative
1425   // inverse, so return 0. We check this by looking at the next-to-last
1426   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1427   // algorithm.
1428   if (r[i] != 1)
1429     return APInt(BitWidth, 0);
1430
1431   // The next-to-last t is the multiplicative inverse.  However, we are
1432   // interested in a positive inverse. Calcuate a positive one from a negative
1433   // one if necessary. A simple addition of the modulo suffices because
1434   // abs(t[i]) is known to be less than *this/2 (see the link above).
1435   return t[i].isNegative() ? t[i] + modulo : t[i];
1436 }
1437
1438 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1439 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1440 /// variables here have the same names as in the algorithm. Comments explain
1441 /// the algorithm and any deviation from it.
1442 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1443                      unsigned m, unsigned n) {
1444   assert(u && "Must provide dividend");
1445   assert(v && "Must provide divisor");
1446   assert(q && "Must provide quotient");
1447   assert(u != v && u != q && v != q && "Must us different memory");
1448   assert(n>1 && "n must be > 1");
1449
1450   // Knuth uses the value b as the base of the number system. In our case b
1451   // is 2^31 so we just set it to -1u.
1452   uint64_t b = uint64_t(1) << 32;
1453
1454 #if 0
1455   DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n');
1456   DEBUG(cerr << "KnuthDiv: original:");
1457   DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
1458   DEBUG(cerr << " by");
1459   DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
1460   DEBUG(cerr << '\n');
1461 #endif
1462   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1463   // u and v by d. Note that we have taken Knuth's advice here to use a power
1464   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1465   // 2 allows us to shift instead of multiply and it is easy to determine the
1466   // shift amount from the leading zeros.  We are basically normalizing the u
1467   // and v so that its high bits are shifted to the top of v's range without
1468   // overflow. Note that this can require an extra word in u so that u must
1469   // be of length m+n+1.
1470   unsigned shift = CountLeadingZeros_32(v[n-1]);
1471   unsigned v_carry = 0;
1472   unsigned u_carry = 0;
1473   if (shift) {
1474     for (unsigned i = 0; i < m+n; ++i) {
1475       unsigned u_tmp = u[i] >> (32 - shift);
1476       u[i] = (u[i] << shift) | u_carry;
1477       u_carry = u_tmp;
1478     }
1479     for (unsigned i = 0; i < n; ++i) {
1480       unsigned v_tmp = v[i] >> (32 - shift);
1481       v[i] = (v[i] << shift) | v_carry;
1482       v_carry = v_tmp;
1483     }
1484   }
1485   u[m+n] = u_carry;
1486 #if 0
1487   DEBUG(cerr << "KnuthDiv:   normal:");
1488   DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
1489   DEBUG(cerr << " by");
1490   DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
1491   DEBUG(cerr << '\n');
1492 #endif
1493
1494   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1495   int j = m;
1496   do {
1497     DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n');
1498     // D3. [Calculate q'.].
1499     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1500     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1501     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1502     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1503     // on v[n-2] determines at high speed most of the cases in which the trial
1504     // value qp is one too large, and it eliminates all cases where qp is two
1505     // too large.
1506     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1507     DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n');
1508     uint64_t qp = dividend / v[n-1];
1509     uint64_t rp = dividend % v[n-1];
1510     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1511       qp--;
1512       rp += v[n-1];
1513       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1514         qp--;
1515     }
1516     DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1517
1518     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1519     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1520     // consists of a simple multiplication by a one-place number, combined with
1521     // a subtraction.
1522     bool isNeg = false;
1523     for (unsigned i = 0; i < n; ++i) {
1524       uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1525       uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1526       bool borrow = subtrahend > u_tmp;
1527       DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp
1528                  << ", subtrahend == " << subtrahend
1529                  << ", borrow = " << borrow << '\n');
1530
1531       uint64_t result = u_tmp - subtrahend;
1532       unsigned k = j + i;
1533       u[k++] = (unsigned)(result & (b-1)); // subtract low word
1534       u[k++] = (unsigned)(result >> 32);   // subtract high word
1535       while (borrow && k <= m+n) { // deal with borrow to the left
1536         borrow = u[k] == 0;
1537         u[k]--;
1538         k++;
1539       }
1540       isNeg |= borrow;
1541       DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1542                     u[j+i+1] << '\n');
1543     }
1544     DEBUG(cerr << "KnuthDiv: after subtraction:");
1545     DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
1546     DEBUG(cerr << '\n');
1547     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1548     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1549     // true value plus b**(n+1), namely as the b's complement of
1550     // the true value, and a "borrow" to the left should be remembered.
1551     //
1552     if (isNeg) {
1553       bool carry = true;  // true because b's complement is "complement + 1"
1554       for (unsigned i = 0; i <= m+n; ++i) {
1555         u[i] = ~u[i] + carry; // b's complement
1556         carry = carry && u[i] == 0;
1557       }
1558     }
1559     DEBUG(cerr << "KnuthDiv: after complement:");
1560     DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
1561     DEBUG(cerr << '\n');
1562
1563     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1564     // negative, go to step D6; otherwise go on to step D7.
1565     q[j] = (unsigned)qp;
1566     if (isNeg) {
1567       // D6. [Add back]. The probability that this step is necessary is very
1568       // small, on the order of only 2/b. Make sure that test data accounts for
1569       // this possibility. Decrease q[j] by 1
1570       q[j]--;
1571       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1572       // A carry will occur to the left of u[j+n], and it should be ignored
1573       // since it cancels with the borrow that occurred in D4.
1574       bool carry = false;
1575       for (unsigned i = 0; i < n; i++) {
1576         unsigned limit = std::min(u[j+i],v[i]);
1577         u[j+i] += v[i] + carry;
1578         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1579       }
1580       u[j+n] += carry;
1581     }
1582     DEBUG(cerr << "KnuthDiv: after correction:");
1583     DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]);
1584     DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n');
1585
1586   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1587   } while (--j >= 0);
1588
1589   DEBUG(cerr << "KnuthDiv: quotient:");
1590   DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]);
1591   DEBUG(cerr << '\n');
1592
1593   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1594   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1595   // compute the remainder (urem uses this).
1596   if (r) {
1597     // The value d is expressed by the "shift" value above since we avoided
1598     // multiplication by d by using a shift left. So, all we have to do is
1599     // shift right here. In order to mak
1600     if (shift) {
1601       unsigned carry = 0;
1602       DEBUG(cerr << "KnuthDiv: remainder:");
1603       for (int i = n-1; i >= 0; i--) {
1604         r[i] = (u[i] >> shift) | carry;
1605         carry = u[i] << (32 - shift);
1606         DEBUG(cerr << " " << r[i]);
1607       }
1608     } else {
1609       for (int i = n-1; i >= 0; i--) {
1610         r[i] = u[i];
1611         DEBUG(cerr << " " << r[i]);
1612       }
1613     }
1614     DEBUG(cerr << '\n');
1615   }
1616 #if 0
1617   DEBUG(cerr << std::setbase(10) << '\n');
1618 #endif
1619 }
1620
1621 void APInt::divide(const APInt LHS, unsigned lhsWords,
1622                    const APInt &RHS, unsigned rhsWords,
1623                    APInt *Quotient, APInt *Remainder)
1624 {
1625   assert(lhsWords >= rhsWords && "Fractional result");
1626
1627   // First, compose the values into an array of 32-bit words instead of
1628   // 64-bit words. This is a necessity of both the "short division" algorithm
1629   // and the the Knuth "classical algorithm" which requires there to be native
1630   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1631   // can't use 64-bit operands here because we don't have native results of
1632   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1633   // work on large-endian machines.
1634   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1635   unsigned n = rhsWords * 2;
1636   unsigned m = (lhsWords * 2) - n;
1637
1638   // Allocate space for the temporary values we need either on the stack, if
1639   // it will fit, or on the heap if it won't.
1640   unsigned SPACE[128];
1641   unsigned *U = 0;
1642   unsigned *V = 0;
1643   unsigned *Q = 0;
1644   unsigned *R = 0;
1645   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1646     U = &SPACE[0];
1647     V = &SPACE[m+n+1];
1648     Q = &SPACE[(m+n+1) + n];
1649     if (Remainder)
1650       R = &SPACE[(m+n+1) + n + (m+n)];
1651   } else {
1652     U = new unsigned[m + n + 1];
1653     V = new unsigned[n];
1654     Q = new unsigned[m+n];
1655     if (Remainder)
1656       R = new unsigned[n];
1657   }
1658
1659   // Initialize the dividend
1660   memset(U, 0, (m+n+1)*sizeof(unsigned));
1661   for (unsigned i = 0; i < lhsWords; ++i) {
1662     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1663     U[i * 2] = (unsigned)(tmp & mask);
1664     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1665   }
1666   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1667
1668   // Initialize the divisor
1669   memset(V, 0, (n)*sizeof(unsigned));
1670   for (unsigned i = 0; i < rhsWords; ++i) {
1671     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1672     V[i * 2] = (unsigned)(tmp & mask);
1673     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1674   }
1675
1676   // initialize the quotient and remainder
1677   memset(Q, 0, (m+n) * sizeof(unsigned));
1678   if (Remainder)
1679     memset(R, 0, n * sizeof(unsigned));
1680
1681   // Now, adjust m and n for the Knuth division. n is the number of words in
1682   // the divisor. m is the number of words by which the dividend exceeds the
1683   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1684   // contain any zero words or the Knuth algorithm fails.
1685   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1686     n--;
1687     m++;
1688   }
1689   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1690     m--;
1691
1692   // If we're left with only a single word for the divisor, Knuth doesn't work
1693   // so we implement the short division algorithm here. This is much simpler
1694   // and faster because we are certain that we can divide a 64-bit quantity
1695   // by a 32-bit quantity at hardware speed and short division is simply a
1696   // series of such operations. This is just like doing short division but we
1697   // are using base 2^32 instead of base 10.
1698   assert(n != 0 && "Divide by zero?");
1699   if (n == 1) {
1700     unsigned divisor = V[0];
1701     unsigned remainder = 0;
1702     for (int i = m+n-1; i >= 0; i--) {
1703       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1704       if (partial_dividend == 0) {
1705         Q[i] = 0;
1706         remainder = 0;
1707       } else if (partial_dividend < divisor) {
1708         Q[i] = 0;
1709         remainder = (unsigned)partial_dividend;
1710       } else if (partial_dividend == divisor) {
1711         Q[i] = 1;
1712         remainder = 0;
1713       } else {
1714         Q[i] = (unsigned)(partial_dividend / divisor);
1715         remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1716       }
1717     }
1718     if (R)
1719       R[0] = remainder;
1720   } else {
1721     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1722     // case n > 1.
1723     KnuthDiv(U, V, Q, R, m, n);
1724   }
1725
1726   // If the caller wants the quotient
1727   if (Quotient) {
1728     // Set up the Quotient value's memory.
1729     if (Quotient->BitWidth != LHS.BitWidth) {
1730       if (Quotient->isSingleWord())
1731         Quotient->VAL = 0;
1732       else
1733         delete [] Quotient->pVal;
1734       Quotient->BitWidth = LHS.BitWidth;
1735       if (!Quotient->isSingleWord())
1736         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1737     } else
1738       Quotient->clear();
1739
1740     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1741     // order words.
1742     if (lhsWords == 1) {
1743       uint64_t tmp =
1744         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1745       if (Quotient->isSingleWord())
1746         Quotient->VAL = tmp;
1747       else
1748         Quotient->pVal[0] = tmp;
1749     } else {
1750       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1751       for (unsigned i = 0; i < lhsWords; ++i)
1752         Quotient->pVal[i] =
1753           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1754     }
1755   }
1756
1757   // If the caller wants the remainder
1758   if (Remainder) {
1759     // Set up the Remainder value's memory.
1760     if (Remainder->BitWidth != RHS.BitWidth) {
1761       if (Remainder->isSingleWord())
1762         Remainder->VAL = 0;
1763       else
1764         delete [] Remainder->pVal;
1765       Remainder->BitWidth = RHS.BitWidth;
1766       if (!Remainder->isSingleWord())
1767         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1768     } else
1769       Remainder->clear();
1770
1771     // The remainder is in R. Reconstitute the remainder into Remainder's low
1772     // order words.
1773     if (rhsWords == 1) {
1774       uint64_t tmp =
1775         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1776       if (Remainder->isSingleWord())
1777         Remainder->VAL = tmp;
1778       else
1779         Remainder->pVal[0] = tmp;
1780     } else {
1781       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1782       for (unsigned i = 0; i < rhsWords; ++i)
1783         Remainder->pVal[i] =
1784           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1785     }
1786   }
1787
1788   // Clean up the memory we allocated.
1789   if (U != &SPACE[0]) {
1790     delete [] U;
1791     delete [] V;
1792     delete [] Q;
1793     delete [] R;
1794   }
1795 }
1796
1797 APInt APInt::udiv(const APInt& RHS) const {
1798   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1799
1800   // First, deal with the easy case
1801   if (isSingleWord()) {
1802     assert(RHS.VAL != 0 && "Divide by zero?");
1803     return APInt(BitWidth, VAL / RHS.VAL);
1804   }
1805
1806   // Get some facts about the LHS and RHS number of bits and words
1807   unsigned rhsBits = RHS.getActiveBits();
1808   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1809   assert(rhsWords && "Divided by zero???");
1810   unsigned lhsBits = this->getActiveBits();
1811   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1812
1813   // Deal with some degenerate cases
1814   if (!lhsWords)
1815     // 0 / X ===> 0
1816     return APInt(BitWidth, 0);
1817   else if (lhsWords < rhsWords || this->ult(RHS)) {
1818     // X / Y ===> 0, iff X < Y
1819     return APInt(BitWidth, 0);
1820   } else if (*this == RHS) {
1821     // X / X ===> 1
1822     return APInt(BitWidth, 1);
1823   } else if (lhsWords == 1 && rhsWords == 1) {
1824     // All high words are zero, just use native divide
1825     return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1826   }
1827
1828   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1829   APInt Quotient(1,0); // to hold result.
1830   divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1831   return Quotient;
1832 }
1833
1834 APInt APInt::urem(const APInt& RHS) const {
1835   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1836   if (isSingleWord()) {
1837     assert(RHS.VAL != 0 && "Remainder by zero?");
1838     return APInt(BitWidth, VAL % RHS.VAL);
1839   }
1840
1841   // Get some facts about the LHS
1842   unsigned lhsBits = getActiveBits();
1843   unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1844
1845   // Get some facts about the RHS
1846   unsigned rhsBits = RHS.getActiveBits();
1847   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1848   assert(rhsWords && "Performing remainder operation by zero ???");
1849
1850   // Check the degenerate cases
1851   if (lhsWords == 0) {
1852     // 0 % Y ===> 0
1853     return APInt(BitWidth, 0);
1854   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1855     // X % Y ===> X, iff X < Y
1856     return *this;
1857   } else if (*this == RHS) {
1858     // X % X == 0;
1859     return APInt(BitWidth, 0);
1860   } else if (lhsWords == 1) {
1861     // All high words are zero, just use native remainder
1862     return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1863   }
1864
1865   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1866   APInt Remainder(1,0);
1867   divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1868   return Remainder;
1869 }
1870
1871 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1872                     APInt &Quotient, APInt &Remainder) {
1873   // Get some size facts about the dividend and divisor
1874   unsigned lhsBits  = LHS.getActiveBits();
1875   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1876   unsigned rhsBits  = RHS.getActiveBits();
1877   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1878
1879   // Check the degenerate cases
1880   if (lhsWords == 0) {
1881     Quotient = 0;                // 0 / Y ===> 0
1882     Remainder = 0;               // 0 % Y ===> 0
1883     return;
1884   }
1885
1886   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1887     Quotient = 0;               // X / Y ===> 0, iff X < Y
1888     Remainder = LHS;            // X % Y ===> X, iff X < Y
1889     return;
1890   }
1891
1892   if (LHS == RHS) {
1893     Quotient  = 1;              // X / X ===> 1
1894     Remainder = 0;              // X % X ===> 0;
1895     return;
1896   }
1897
1898   if (lhsWords == 1 && rhsWords == 1) {
1899     // There is only one word to consider so use the native versions.
1900     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
1901     uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
1902     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1903     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1904     return;
1905   }
1906
1907   // Okay, lets do it the long way
1908   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
1909 }
1910
1911 void APInt::fromString(unsigned numbits, const char *str, unsigned slen,
1912                        uint8_t radix) {
1913   // Check our assumptions here
1914   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
1915          "Radix should be 2, 8, 10, or 16!");
1916   assert(str && "String is null?");
1917   bool isNeg = str[0] == '-';
1918   if (isNeg)
1919     str++, slen--;
1920   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1921   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1922   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1923   assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width");
1924
1925   // Allocate memory
1926   if (!isSingleWord())
1927     pVal = getClearedMemory(getNumWords());
1928
1929   // Figure out if we can shift instead of multiply
1930   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1931
1932   // Set up an APInt for the digit to add outside the loop so we don't
1933   // constantly construct/destruct it.
1934   APInt apdigit(getBitWidth(), 0);
1935   APInt apradix(getBitWidth(), radix);
1936
1937   // Enter digit traversal loop
1938   for (unsigned i = 0; i < slen; i++) {
1939     // Get a digit
1940     unsigned digit = 0;
1941     char cdigit = str[i];
1942     if (radix == 16) {
1943       if (!isxdigit(cdigit))
1944         assert(0 && "Invalid hex digit in string");
1945       if (isdigit(cdigit))
1946         digit = cdigit - '0';
1947       else if (cdigit >= 'a')
1948         digit = cdigit - 'a' + 10;
1949       else if (cdigit >= 'A')
1950         digit = cdigit - 'A' + 10;
1951       else
1952         assert(0 && "huh? we shouldn't get here");
1953     } else if (isdigit(cdigit)) {
1954       digit = cdigit - '0';
1955       assert((radix == 10 ||
1956               (radix == 8 && digit != 8 && digit != 9) ||
1957               (radix == 2 && (digit == 0 || digit == 1))) &&
1958              "Invalid digit in string for given radix");
1959     } else {
1960       assert(0 && "Invalid character in digit string");
1961     }
1962
1963     // Shift or multiply the value by the radix
1964     if (slen > 1) {
1965       if (shift)
1966         *this <<= shift;
1967       else
1968         *this *= apradix;
1969     }
1970
1971     // Add in the digit we just interpreted
1972     if (apdigit.isSingleWord())
1973       apdigit.VAL = digit;
1974     else
1975       apdigit.pVal[0] = digit;
1976     *this += apdigit;
1977   }
1978   // If its negative, put it in two's complement form
1979   if (isNeg) {
1980     (*this)--;
1981     this->flip();
1982   }
1983 }
1984
1985 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
1986                      bool Signed) const {
1987   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
1988          "Radix should be 2, 8, 10, or 16!");
1989
1990   // First, check for a zero value and just short circuit the logic below.
1991   if (*this == 0) {
1992     Str.push_back('0');
1993     return;
1994   }
1995
1996   static const char Digits[] = "0123456789ABCDEF";
1997
1998   if (isSingleWord()) {
1999     char Buffer[65];
2000     char *BufPtr = Buffer+65;
2001
2002     uint64_t N;
2003     if (Signed) {
2004       int64_t I = getSExtValue();
2005       if (I < 0) {
2006         Str.push_back('-');
2007         I = -I;
2008       }
2009       N = I;
2010     } else {
2011       N = getZExtValue();
2012     }
2013
2014     while (N) {
2015       *--BufPtr = Digits[N % Radix];
2016       N /= Radix;
2017     }
2018     Str.append(BufPtr, Buffer+65);
2019     return;
2020   }
2021
2022   APInt Tmp(*this);
2023
2024   if (Signed && isNegative()) {
2025     // They want to print the signed version and it is a negative value
2026     // Flip the bits and add one to turn it into the equivalent positive
2027     // value and put a '-' in the result.
2028     Tmp.flip();
2029     Tmp++;
2030     Str.push_back('-');
2031   }
2032
2033   // We insert the digits backward, then reverse them to get the right order.
2034   unsigned StartDig = Str.size();
2035
2036   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2037   // because the number of bits per digit (1, 3 and 4 respectively) divides
2038   // equaly.  We just shift until the value is zero.
2039   if (Radix != 10) {
2040     // Just shift tmp right for each digit width until it becomes zero
2041     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2042     unsigned MaskAmt = Radix - 1;
2043
2044     while (Tmp != 0) {
2045       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2046       Str.push_back(Digits[Digit]);
2047       Tmp = Tmp.lshr(ShiftAmt);
2048     }
2049   } else {
2050     APInt divisor(4, 10);
2051     while (Tmp != 0) {
2052       APInt APdigit(1, 0);
2053       APInt tmp2(Tmp.getBitWidth(), 0);
2054       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2055              &APdigit);
2056       unsigned Digit = (unsigned)APdigit.getZExtValue();
2057       assert(Digit < Radix && "divide failed");
2058       Str.push_back(Digits[Digit]);
2059       Tmp = tmp2;
2060     }
2061   }
2062
2063   // Reverse the digits before returning.
2064   std::reverse(Str.begin()+StartDig, Str.end());
2065 }
2066
2067 /// toString - This returns the APInt as a std::string.  Note that this is an
2068 /// inefficient method.  It is better to pass in a SmallVector/SmallString
2069 /// to the methods above.
2070 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2071   SmallString<40> S;
2072   toString(S, Radix, Signed);
2073   return S.c_str();
2074 }
2075
2076
2077 void APInt::dump() const {
2078   SmallString<40> S, U;
2079   this->toStringUnsigned(U);
2080   this->toStringSigned(S);
2081   fprintf(stderr, "APInt(%db, %su %ss)", BitWidth, U.c_str(), S.c_str());
2082 }
2083
2084 void APInt::print(raw_ostream &OS, bool isSigned) const {
2085   SmallString<40> S;
2086   this->toString(S, 10, isSigned);
2087   OS << S.c_str();
2088 }
2089
2090 // This implements a variety of operations on a representation of
2091 // arbitrary precision, two's-complement, bignum integer values.
2092
2093 /* Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2094    and unrestricting assumption.  */
2095 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2096 COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2097
2098 /* Some handy functions local to this file.  */
2099 namespace {
2100
2101   /* Returns the integer part with the least significant BITS set.
2102      BITS cannot be zero.  */
2103   static inline integerPart
2104   lowBitMask(unsigned int bits)
2105   {
2106     assert (bits != 0 && bits <= integerPartWidth);
2107
2108     return ~(integerPart) 0 >> (integerPartWidth - bits);
2109   }
2110
2111   /* Returns the value of the lower half of PART.  */
2112   static inline integerPart
2113   lowHalf(integerPart part)
2114   {
2115     return part & lowBitMask(integerPartWidth / 2);
2116   }
2117
2118   /* Returns the value of the upper half of PART.  */
2119   static inline integerPart
2120   highHalf(integerPart part)
2121   {
2122     return part >> (integerPartWidth / 2);
2123   }
2124
2125   /* Returns the bit number of the most significant set bit of a part.
2126      If the input number has no bits set -1U is returned.  */
2127   static unsigned int
2128   partMSB(integerPart value)
2129   {
2130     unsigned int n, msb;
2131
2132     if (value == 0)
2133       return -1U;
2134
2135     n = integerPartWidth / 2;
2136
2137     msb = 0;
2138     do {
2139       if (value >> n) {
2140         value >>= n;
2141         msb += n;
2142       }
2143
2144       n >>= 1;
2145     } while (n);
2146
2147     return msb;
2148   }
2149
2150   /* Returns the bit number of the least significant set bit of a
2151      part.  If the input number has no bits set -1U is returned.  */
2152   static unsigned int
2153   partLSB(integerPart value)
2154   {
2155     unsigned int n, lsb;
2156
2157     if (value == 0)
2158       return -1U;
2159
2160     lsb = integerPartWidth - 1;
2161     n = integerPartWidth / 2;
2162
2163     do {
2164       if (value << n) {
2165         value <<= n;
2166         lsb -= n;
2167       }
2168
2169       n >>= 1;
2170     } while (n);
2171
2172     return lsb;
2173   }
2174 }
2175
2176 /* Sets the least significant part of a bignum to the input value, and
2177    zeroes out higher parts.  */
2178 void
2179 APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2180 {
2181   unsigned int i;
2182
2183   assert (parts > 0);
2184
2185   dst[0] = part;
2186   for(i = 1; i < parts; i++)
2187     dst[i] = 0;
2188 }
2189
2190 /* Assign one bignum to another.  */
2191 void
2192 APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2193 {
2194   unsigned int i;
2195
2196   for(i = 0; i < parts; i++)
2197     dst[i] = src[i];
2198 }
2199
2200 /* Returns true if a bignum is zero, false otherwise.  */
2201 bool
2202 APInt::tcIsZero(const integerPart *src, unsigned int parts)
2203 {
2204   unsigned int i;
2205
2206   for(i = 0; i < parts; i++)
2207     if (src[i])
2208       return false;
2209
2210   return true;
2211 }
2212
2213 /* Extract the given bit of a bignum; returns 0 or 1.  */
2214 int
2215 APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2216 {
2217   return(parts[bit / integerPartWidth]
2218          & ((integerPart) 1 << bit % integerPartWidth)) != 0;
2219 }
2220
2221 /* Set the given bit of a bignum.  */
2222 void
2223 APInt::tcSetBit(integerPart *parts, unsigned int bit)
2224 {
2225   parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2226 }
2227
2228 /* Returns the bit number of the least significant set bit of a
2229    number.  If the input number has no bits set -1U is returned.  */
2230 unsigned int
2231 APInt::tcLSB(const integerPart *parts, unsigned int n)
2232 {
2233   unsigned int i, lsb;
2234
2235   for(i = 0; i < n; i++) {
2236       if (parts[i] != 0) {
2237           lsb = partLSB(parts[i]);
2238
2239           return lsb + i * integerPartWidth;
2240       }
2241   }
2242
2243   return -1U;
2244 }
2245
2246 /* Returns the bit number of the most significant set bit of a number.
2247    If the input number has no bits set -1U is returned.  */
2248 unsigned int
2249 APInt::tcMSB(const integerPart *parts, unsigned int n)
2250 {
2251   unsigned int msb;
2252
2253   do {
2254       --n;
2255
2256       if (parts[n] != 0) {
2257           msb = partMSB(parts[n]);
2258
2259           return msb + n * integerPartWidth;
2260       }
2261   } while (n);
2262
2263   return -1U;
2264 }
2265
2266 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2267    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2268    the least significant bit of DST.  All high bits above srcBITS in
2269    DST are zero-filled.  */
2270 void
2271 APInt::tcExtract(integerPart *dst, unsigned int dstCount, const integerPart *src,
2272                  unsigned int srcBits, unsigned int srcLSB)
2273 {
2274   unsigned int firstSrcPart, dstParts, shift, n;
2275
2276   dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2277   assert (dstParts <= dstCount);
2278
2279   firstSrcPart = srcLSB / integerPartWidth;
2280   tcAssign (dst, src + firstSrcPart, dstParts);
2281
2282   shift = srcLSB % integerPartWidth;
2283   tcShiftRight (dst, dstParts, shift);
2284
2285   /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2286      in DST.  If this is less that srcBits, append the rest, else
2287      clear the high bits.  */
2288   n = dstParts * integerPartWidth - shift;
2289   if (n < srcBits) {
2290     integerPart mask = lowBitMask (srcBits - n);
2291     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2292                           << n % integerPartWidth);
2293   } else if (n > srcBits) {
2294     if (srcBits % integerPartWidth)
2295       dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2296   }
2297
2298   /* Clear high parts.  */
2299   while (dstParts < dstCount)
2300     dst[dstParts++] = 0;
2301 }
2302
2303 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2304 integerPart
2305 APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2306              integerPart c, unsigned int parts)
2307 {
2308   unsigned int i;
2309
2310   assert(c <= 1);
2311
2312   for(i = 0; i < parts; i++) {
2313     integerPart l;
2314
2315     l = dst[i];
2316     if (c) {
2317       dst[i] += rhs[i] + 1;
2318       c = (dst[i] <= l);
2319     } else {
2320       dst[i] += rhs[i];
2321       c = (dst[i] < l);
2322     }
2323   }
2324
2325   return c;
2326 }
2327
2328 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2329 integerPart
2330 APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2331                   integerPart c, unsigned int parts)
2332 {
2333   unsigned int i;
2334
2335   assert(c <= 1);
2336
2337   for(i = 0; i < parts; i++) {
2338     integerPart l;
2339
2340     l = dst[i];
2341     if (c) {
2342       dst[i] -= rhs[i] + 1;
2343       c = (dst[i] >= l);
2344     } else {
2345       dst[i] -= rhs[i];
2346       c = (dst[i] > l);
2347     }
2348   }
2349
2350   return c;
2351 }
2352
2353 /* Negate a bignum in-place.  */
2354 void
2355 APInt::tcNegate(integerPart *dst, unsigned int parts)
2356 {
2357   tcComplement(dst, parts);
2358   tcIncrement(dst, parts);
2359 }
2360
2361 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2362     DST  = SRC * MULTIPLIER + CARRY   if add is false
2363
2364     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2365     they must start at the same point, i.e. DST == SRC.
2366
2367     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2368     returned.  Otherwise DST is filled with the least significant
2369     DSTPARTS parts of the result, and if all of the omitted higher
2370     parts were zero return zero, otherwise overflow occurred and
2371     return one.  */
2372 int
2373 APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2374                       integerPart multiplier, integerPart carry,
2375                       unsigned int srcParts, unsigned int dstParts,
2376                       bool add)
2377 {
2378   unsigned int i, n;
2379
2380   /* Otherwise our writes of DST kill our later reads of SRC.  */
2381   assert(dst <= src || dst >= src + srcParts);
2382   assert(dstParts <= srcParts + 1);
2383
2384   /* N loops; minimum of dstParts and srcParts.  */
2385   n = dstParts < srcParts ? dstParts: srcParts;
2386
2387   for(i = 0; i < n; i++) {
2388     integerPart low, mid, high, srcPart;
2389
2390       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2391
2392          This cannot overflow, because
2393
2394          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2395
2396          which is less than n^2.  */
2397
2398     srcPart = src[i];
2399
2400     if (multiplier == 0 || srcPart == 0)        {
2401       low = carry;
2402       high = 0;
2403     } else {
2404       low = lowHalf(srcPart) * lowHalf(multiplier);
2405       high = highHalf(srcPart) * highHalf(multiplier);
2406
2407       mid = lowHalf(srcPart) * highHalf(multiplier);
2408       high += highHalf(mid);
2409       mid <<= integerPartWidth / 2;
2410       if (low + mid < low)
2411         high++;
2412       low += mid;
2413
2414       mid = highHalf(srcPart) * lowHalf(multiplier);
2415       high += highHalf(mid);
2416       mid <<= integerPartWidth / 2;
2417       if (low + mid < low)
2418         high++;
2419       low += mid;
2420
2421       /* Now add carry.  */
2422       if (low + carry < low)
2423         high++;
2424       low += carry;
2425     }
2426
2427     if (add) {
2428       /* And now DST[i], and store the new low part there.  */
2429       if (low + dst[i] < low)
2430         high++;
2431       dst[i] += low;
2432     } else
2433       dst[i] = low;
2434
2435     carry = high;
2436   }
2437
2438   if (i < dstParts) {
2439     /* Full multiplication, there is no overflow.  */
2440     assert(i + 1 == dstParts);
2441     dst[i] = carry;
2442     return 0;
2443   } else {
2444     /* We overflowed if there is carry.  */
2445     if (carry)
2446       return 1;
2447
2448     /* We would overflow if any significant unwritten parts would be
2449        non-zero.  This is true if any remaining src parts are non-zero
2450        and the multiplier is non-zero.  */
2451     if (multiplier)
2452       for(; i < srcParts; i++)
2453         if (src[i])
2454           return 1;
2455
2456     /* We fitted in the narrow destination.  */
2457     return 0;
2458   }
2459 }
2460
2461 /* DST = LHS * RHS, where DST has the same width as the operands and
2462    is filled with the least significant parts of the result.  Returns
2463    one if overflow occurred, otherwise zero.  DST must be disjoint
2464    from both operands.  */
2465 int
2466 APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2467                   const integerPart *rhs, unsigned int parts)
2468 {
2469   unsigned int i;
2470   int overflow;
2471
2472   assert(dst != lhs && dst != rhs);
2473
2474   overflow = 0;
2475   tcSet(dst, 0, parts);
2476
2477   for(i = 0; i < parts; i++)
2478     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2479                                parts - i, true);
2480
2481   return overflow;
2482 }
2483
2484 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2485    operands.  No overflow occurs.  DST must be disjoint from both
2486    operands.  Returns the number of parts required to hold the
2487    result.  */
2488 unsigned int
2489 APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2490                       const integerPart *rhs, unsigned int lhsParts,
2491                       unsigned int rhsParts)
2492 {
2493   /* Put the narrower number on the LHS for less loops below.  */
2494   if (lhsParts > rhsParts) {
2495     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2496   } else {
2497     unsigned int n;
2498
2499     assert(dst != lhs && dst != rhs);
2500
2501     tcSet(dst, 0, rhsParts);
2502
2503     for(n = 0; n < lhsParts; n++)
2504       tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2505
2506     n = lhsParts + rhsParts;
2507
2508     return n - (dst[n - 1] == 0);
2509   }
2510 }
2511
2512 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2513    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2514    set REMAINDER to the remainder, return zero.  i.e.
2515
2516    OLD_LHS = RHS * LHS + REMAINDER
2517
2518    SCRATCH is a bignum of the same size as the operands and result for
2519    use by the routine; its contents need not be initialized and are
2520    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2521 */
2522 int
2523 APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2524                 integerPart *remainder, integerPart *srhs,
2525                 unsigned int parts)
2526 {
2527   unsigned int n, shiftCount;
2528   integerPart mask;
2529
2530   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2531
2532   shiftCount = tcMSB(rhs, parts) + 1;
2533   if (shiftCount == 0)
2534     return true;
2535
2536   shiftCount = parts * integerPartWidth - shiftCount;
2537   n = shiftCount / integerPartWidth;
2538   mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2539
2540   tcAssign(srhs, rhs, parts);
2541   tcShiftLeft(srhs, parts, shiftCount);
2542   tcAssign(remainder, lhs, parts);
2543   tcSet(lhs, 0, parts);
2544
2545   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2546      the total.  */
2547   for(;;) {
2548       int compare;
2549
2550       compare = tcCompare(remainder, srhs, parts);
2551       if (compare >= 0) {
2552         tcSubtract(remainder, srhs, 0, parts);
2553         lhs[n] |= mask;
2554       }
2555
2556       if (shiftCount == 0)
2557         break;
2558       shiftCount--;
2559       tcShiftRight(srhs, parts, 1);
2560       if ((mask >>= 1) == 0)
2561         mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2562   }
2563
2564   return false;
2565 }
2566
2567 /* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2568    There are no restrictions on COUNT.  */
2569 void
2570 APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2571 {
2572   if (count) {
2573     unsigned int jump, shift;
2574
2575     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2576     jump = count / integerPartWidth;
2577     shift = count % integerPartWidth;
2578
2579     while (parts > jump) {
2580       integerPart part;
2581
2582       parts--;
2583
2584       /* dst[i] comes from the two parts src[i - jump] and, if we have
2585          an intra-part shift, src[i - jump - 1].  */
2586       part = dst[parts - jump];
2587       if (shift) {
2588         part <<= shift;
2589         if (parts >= jump + 1)
2590           part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2591       }
2592
2593       dst[parts] = part;
2594     }
2595
2596     while (parts > 0)
2597       dst[--parts] = 0;
2598   }
2599 }
2600
2601 /* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2602    zero.  There are no restrictions on COUNT.  */
2603 void
2604 APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2605 {
2606   if (count) {
2607     unsigned int i, jump, shift;
2608
2609     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2610     jump = count / integerPartWidth;
2611     shift = count % integerPartWidth;
2612
2613     /* Perform the shift.  This leaves the most significant COUNT bits
2614        of the result at zero.  */
2615     for(i = 0; i < parts; i++) {
2616       integerPart part;
2617
2618       if (i + jump >= parts) {
2619         part = 0;
2620       } else {
2621         part = dst[i + jump];
2622         if (shift) {
2623           part >>= shift;
2624           if (i + jump + 1 < parts)
2625             part |= dst[i + jump + 1] << (integerPartWidth - shift);
2626         }
2627       }
2628
2629       dst[i] = part;
2630     }
2631   }
2632 }
2633
2634 /* Bitwise and of two bignums.  */
2635 void
2636 APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2637 {
2638   unsigned int i;
2639
2640   for(i = 0; i < parts; i++)
2641     dst[i] &= rhs[i];
2642 }
2643
2644 /* Bitwise inclusive or of two bignums.  */
2645 void
2646 APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2647 {
2648   unsigned int i;
2649
2650   for(i = 0; i < parts; i++)
2651     dst[i] |= rhs[i];
2652 }
2653
2654 /* Bitwise exclusive or of two bignums.  */
2655 void
2656 APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2657 {
2658   unsigned int i;
2659
2660   for(i = 0; i < parts; i++)
2661     dst[i] ^= rhs[i];
2662 }
2663
2664 /* Complement a bignum in-place.  */
2665 void
2666 APInt::tcComplement(integerPart *dst, unsigned int parts)
2667 {
2668   unsigned int i;
2669
2670   for(i = 0; i < parts; i++)
2671     dst[i] = ~dst[i];
2672 }
2673
2674 /* Comparison (unsigned) of two bignums.  */
2675 int
2676 APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2677                  unsigned int parts)
2678 {
2679   while (parts) {
2680       parts--;
2681       if (lhs[parts] == rhs[parts])
2682         continue;
2683
2684       if (lhs[parts] > rhs[parts])
2685         return 1;
2686       else
2687         return -1;
2688     }
2689
2690   return 0;
2691 }
2692
2693 /* Increment a bignum in-place, return the carry flag.  */
2694 integerPart
2695 APInt::tcIncrement(integerPart *dst, unsigned int parts)
2696 {
2697   unsigned int i;
2698
2699   for(i = 0; i < parts; i++)
2700     if (++dst[i] != 0)
2701       break;
2702
2703   return i == parts;
2704 }
2705
2706 /* Set the least significant BITS bits of a bignum, clear the
2707    rest.  */
2708 void
2709 APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2710                                  unsigned int bits)
2711 {
2712   unsigned int i;
2713
2714   i = 0;
2715   while (bits > integerPartWidth) {
2716     dst[i++] = ~(integerPart) 0;
2717     bits -= integerPartWidth;
2718   }
2719
2720   if (bits)
2721     dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2722
2723   while (i < parts)
2724     dst[i++] = 0;
2725 }