lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision integer
  11 // constant values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "apint"
  16 #include "llvm/ADT/APInt.h"
  17 #include "llvm/ADT/StringRef.h"
  18 #include "llvm/ADT/FoldingSet.h"
  19 #include "llvm/ADT/SmallString.h"
  20 #include "llvm/Support/Debug.h"
  21 #include "llvm/Support/ErrorHandling.h"
  22 #include "llvm/Support/MathExtras.h"
  23 #include "llvm/Support/raw_ostream.h"
  24 #include <cmath>
  25 #include <limits>
  26 #include <cstring>
  27 #include <cstdlib>
  28 using namespace llvm;
  29
  30 /// A utility function for allocating memory, checking for allocation failures,
  31 /// and ensuring the contents are zeroed.
  32 inline static uint64_t* getClearedMemory(unsigned numWords) {
  33   uint64_t * result = new uint64_t[numWords];
  34   assert(result && "APInt memory allocation fails!");
  35   memset(result, 0, numWords * sizeof(uint64_t));
  36   return result;
  37 }
  38
  39 /// A utility function for allocating memory and checking for allocation
  40 /// failure.  The content is not zeroed.
  41 inline static uint64_t* getMemory(unsigned numWords) {
  42   uint64_t * result = new uint64_t[numWords];
  43   assert(result && "APInt memory allocation fails!");
  44   return result;
  45 }
  46
  47 /// A utility function that converts a character to a digit.
  48 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  49   unsigned r;
  50
  51   if (radix == 16) {
  52     r = cdigit - '0';
  53     if (r <= 9)
  54       return r;
  55
  56     r = cdigit - 'A';
  57     if (r <= 5)
  58       return r + 10;
  59
  60     r = cdigit - 'a';
  61     if (r <= 5)
  62       return r + 10;
  63   }
  64
  65   r = cdigit - '0';
  66   if (r < radix)
  67     return r;
  68
  69   return -1U;
  70 }
  71
  72
  73 void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
  74   pVal = getClearedMemory(getNumWords());
  75   pVal[0] = val;
  76   if (isSigned && int64_t(val) < 0)
  77     for (unsigned i = 1; i < getNumWords(); ++i)
  78       pVal[i] = -1ULL;
  79 }
  80
  81 void APInt::initSlowCase(const APInt& that) {
  82   pVal = getMemory(getNumWords());
  83   memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
  84 }
  85
  86
  87 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
  88   : BitWidth(numBits), VAL(0) {
  89   assert(BitWidth && "Bitwidth too small");
  90   assert(bigVal && "Null pointer detected!");
  91   if (isSingleWord())
  92     VAL = bigVal[0];
  93   else {
  94     // Get memory, cleared to 0
  95     pVal = getClearedMemory(getNumWords());
  96     // Calculate the number of words to copy
  97     unsigned words = std::min<unsigned>(numWords, getNumWords());
  98     // Copy the words from bigVal to pVal
  99     memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
 100   }
 101   // Make sure unused high bits are cleared
 102   clearUnusedBits();
 103 }
 104
 105 APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix)
 106   : BitWidth(numbits), VAL(0) {
 107   assert(BitWidth && "Bitwidth too small");
 108   fromString(numbits, Str, radix);
 109 }
 110
 111 APInt& APInt::AssignSlowCase(const APInt& RHS) {
 112   // Don't do anything for X = X
 113   if (this == &RHS)
 114     return *this;
 115
 116   if (BitWidth == RHS.getBitWidth()) {
 117     // assume same bit-width single-word case is already handled
 118     assert(!isSingleWord());
 119     memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
 120     return *this;
 121   }
 122
 123   if (isSingleWord()) {
 124     // assume case where both are single words is already handled
 125     assert(!RHS.isSingleWord());
 126     VAL = 0;
 127     pVal = getMemory(RHS.getNumWords());
 128     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 129   } else if (getNumWords() == RHS.getNumWords())
 130     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 131   else if (RHS.isSingleWord()) {
 132     delete [] pVal;
 133     VAL = RHS.VAL;
 134   } else {
 135     delete [] pVal;
 136     pVal = getMemory(RHS.getNumWords());
 137     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 138   }
 139   BitWidth = RHS.BitWidth;
 140   return clearUnusedBits();
 141 }
 142
 143 APInt& APInt::operator=(uint64_t RHS) {
 144   if (isSingleWord())
 145     VAL = RHS;
 146   else {
 147     pVal[0] = RHS;
 148     memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
 149   }
 150   return clearUnusedBits();
 151 }
 152
 153 /// Profile - This method 'profiles' an APInt for use with FoldingSet.
 154 void APInt::Profile(FoldingSetNodeID& ID) const {
 155   ID.AddInteger(BitWidth);
 156
 157   if (isSingleWord()) {
 158     ID.AddInteger(VAL);
 159     return;
 160   }
 161
 162   unsigned NumWords = getNumWords();
 163   for (unsigned i = 0; i < NumWords; ++i)
 164     ID.AddInteger(pVal[i]);
 165 }
 166
 167 /// add_1 - This function adds a single "digit" integer, y, to the multiple
 168 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
 169 /// 1 is returned if there is a carry out, otherwise 0 is returned.
 170 /// @returns the carry of the addition.
 171 static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 172   for (unsigned i = 0; i < len; ++i) {
 173     dest[i] = y + x[i];
 174     if (dest[i] < y)
 175       y = 1; // Carry one to next digit.
 176     else {
 177       y = 0; // No need to carry so exit early
 178       break;
 179     }
 180   }
 181   return y;
 182 }
 183
 184 /// @brief Prefix increment operator. Increments the APInt by one.
 185 APInt& APInt::operator++() {
 186   if (isSingleWord())
 187     ++VAL;
 188   else
 189     add_1(pVal, pVal, getNumWords(), 1);
 190   return clearUnusedBits();
 191 }
 192
 193 /// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
 194 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
 195 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
 196 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
 197 /// In other words, if y > x then this function returns 1, otherwise 0.
 198 /// @returns the borrow out of the subtraction
 199 static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
 200   for (unsigned i = 0; i < len; ++i) {
 201     uint64_t X = x[i];
 202     x[i] -= y;
 203     if (y > X)
 204       y = 1;  // We have to "borrow 1" from next "digit"
 205     else {
 206       y = 0;  // No need to borrow
 207       break;  // Remaining digits are unchanged so exit early
 208     }
 209   }
 210   return bool(y);
 211 }
 212
 213 /// @brief Prefix decrement operator. Decrements the APInt by one.
 214 APInt& APInt::operator--() {
 215   if (isSingleWord())
 216     --VAL;
 217   else
 218     sub_1(pVal, getNumWords(), 1);
 219   return clearUnusedBits();
 220 }
 221
 222 /// add - This function adds the integer array x to the integer array Y and
 223 /// places the result in dest.
 224 /// @returns the carry out from the addition
 225 /// @brief General addition of 64-bit integer arrays
 226 static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 227                 unsigned len) {
 228   bool carry = false;
 229   for (unsigned i = 0; i< len; ++i) {
 230     uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
 231     dest[i] = x[i] + y[i] + carry;
 232     carry = dest[i] < limit || (carry && dest[i] == limit);
 233   }
 234   return carry;
 235 }
 236
 237 /// Adds the RHS APint to this APInt.
 238 /// @returns this, after addition of RHS.
 239 /// @brief Addition assignment operator.
 240 APInt& APInt::operator+=(const APInt& RHS) {
 241   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 242   if (isSingleWord())
 243     VAL += RHS.VAL;
 244   else {
 245     add(pVal, pVal, RHS.pVal, getNumWords());
 246   }
 247   return clearUnusedBits();
 248 }
 249
 250 /// Subtracts the integer array y from the integer array x
 251 /// @returns returns the borrow out.
 252 /// @brief Generalized subtraction of 64-bit integer arrays.
 253 static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 254                 unsigned len) {
 255   bool borrow = false;
 256   for (unsigned i = 0; i < len; ++i) {
 257     uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
 258     borrow = y[i] > x_tmp || (borrow && x[i] == 0);
 259     dest[i] = x_tmp - y[i];
 260   }
 261   return borrow;
 262 }
 263
 264 /// Subtracts the RHS APInt from this APInt
 265 /// @returns this, after subtraction
 266 /// @brief Subtraction assignment operator.
 267 APInt& APInt::operator-=(const APInt& RHS) {
 268   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 269   if (isSingleWord())
 270     VAL -= RHS.VAL;
 271   else
 272     sub(pVal, pVal, RHS.pVal, getNumWords());
 273   return clearUnusedBits();
 274 }
 275
 276 /// Multiplies an integer array, x by a a uint64_t integer and places the result
 277 /// into dest.
 278 /// @returns the carry out of the multiplication.
 279 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
 280 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 281   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
 282   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
 283   uint64_t carry = 0;
 284
 285   // For each digit of x.
 286   for (unsigned i = 0; i < len; ++i) {
 287     // Split x into high and low words
 288     uint64_t lx = x[i] & 0xffffffffULL;
 289     uint64_t hx = x[i] >> 32;
 290     // hasCarry - A flag to indicate if there is a carry to the next digit.
 291     // hasCarry == 0, no carry
 292     // hasCarry == 1, has carry
 293     // hasCarry == 2, no carry and the calculation result == 0.
 294     uint8_t hasCarry = 0;
 295     dest[i] = carry + lx * ly;
 296     // Determine if the add above introduces carry.
 297     hasCarry = (dest[i] < carry) ? 1 : 0;
 298     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
 299     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
 300     // (2^32 - 1) + 2^32 = 2^64.
 301     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 302
 303     carry += (lx * hy) & 0xffffffffULL;
 304     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
 305     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
 306             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
 307   }
 308   return carry;
 309 }
 310
 311 /// Multiplies integer array x by integer array y and stores the result into
 312 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
 313 /// @brief Generalized multiplicate of integer arrays.
 314 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
 315                 unsigned ylen) {
 316   dest[xlen] = mul_1(dest, x, xlen, y[0]);
 317   for (unsigned i = 1; i < ylen; ++i) {
 318     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
 319     uint64_t carry = 0, lx = 0, hx = 0;
 320     for (unsigned j = 0; j < xlen; ++j) {
 321       lx = x[j] & 0xffffffffULL;
 322       hx = x[j] >> 32;
 323       // hasCarry - A flag to indicate if has carry.
 324       // hasCarry == 0, no carry
 325       // hasCarry == 1, has carry
 326       // hasCarry == 2, no carry and the calculation result == 0.
 327       uint8_t hasCarry = 0;
 328       uint64_t resul = carry + lx * ly;
 329       hasCarry = (resul < carry) ? 1 : 0;
 330       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
 331       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 332
 333       carry += (lx * hy) & 0xffffffffULL;
 334       resul = (carry << 32) | (resul & 0xffffffffULL);
 335       dest[i+j] += resul;
 336       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
 337               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
 338               ((lx * hy) >> 32) + hx * hy;
 339     }
 340     dest[i+xlen] = carry;
 341   }
 342 }
 343
 344 APInt& APInt::operator*=(const APInt& RHS) {
 345   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 346   if (isSingleWord()) {
 347     VAL *= RHS.VAL;
 348     clearUnusedBits();
 349     return *this;
 350   }
 351
 352   // Get some bit facts about LHS and check for zero
 353   unsigned lhsBits = getActiveBits();
 354   unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
 355   if (!lhsWords)
 356     // 0 * X ===> 0
 357     return *this;
 358
 359   // Get some bit facts about RHS and check for zero
 360   unsigned rhsBits = RHS.getActiveBits();
 361   unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
 362   if (!rhsWords) {
 363     // X * 0 ===> 0
 364     clear();
 365     return *this;
 366   }
 367
 368   // Allocate space for the result
 369   unsigned destWords = rhsWords + lhsWords;
 370   uint64_t *dest = getMemory(destWords);
 371
 372   // Perform the long multiply
 373   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
 374
 375   // Copy result back into *this
 376   clear();
 377   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
 378   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
 379
 380   // delete dest array and return
 381   delete[] dest;
 382   return *this;
 383 }
 384
 385 APInt& APInt::operator&=(const APInt& RHS) {
 386   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 387   if (isSingleWord()) {
 388     VAL &= RHS.VAL;
 389     return *this;
 390   }
 391   unsigned numWords = getNumWords();
 392   for (unsigned i = 0; i < numWords; ++i)
 393     pVal[i] &= RHS.pVal[i];
 394   return *this;
 395 }
 396
 397 APInt& APInt::operator|=(const APInt& RHS) {
 398   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 399   if (isSingleWord()) {
 400     VAL |= RHS.VAL;
 401     return *this;
 402   }
 403   unsigned numWords = getNumWords();
 404   for (unsigned i = 0; i < numWords; ++i)
 405     pVal[i] |= RHS.pVal[i];
 406   return *this;
 407 }
 408
 409 APInt& APInt::operator^=(const APInt& RHS) {
 410   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 411   if (isSingleWord()) {
 412     VAL ^= RHS.VAL;
 413     this->clearUnusedBits();
 414     return *this;
 415   }
 416   unsigned numWords = getNumWords();
 417   for (unsigned i = 0; i < numWords; ++i)
 418     pVal[i] ^= RHS.pVal[i];
 419   return clearUnusedBits();
 420 }
 421
 422 APInt APInt::AndSlowCase(const APInt& RHS) const {
 423   unsigned numWords = getNumWords();
 424   uint64_t* val = getMemory(numWords);
 425   for (unsigned i = 0; i < numWords; ++i)
 426     val[i] = pVal[i] & RHS.pVal[i];
 427   return APInt(val, getBitWidth());
 428 }
 429
 430 APInt APInt::OrSlowCase(const APInt& RHS) const {
 431   unsigned numWords = getNumWords();
 432   uint64_t *val = getMemory(numWords);
 433   for (unsigned i = 0; i < numWords; ++i)
 434     val[i] = pVal[i] | RHS.pVal[i];
 435   return APInt(val, getBitWidth());
 436 }
 437
 438 APInt APInt::XorSlowCase(const APInt& RHS) const {
 439   unsigned numWords = getNumWords();
 440   uint64_t *val = getMemory(numWords);
 441   for (unsigned i = 0; i < numWords; ++i)
 442     val[i] = pVal[i] ^ RHS.pVal[i];
 443
 444   // 0^0==1 so clear the high bits in case they got set.
 445   return APInt(val, getBitWidth()).clearUnusedBits();
 446 }
 447
 448 bool APInt::operator !() const {
 449   if (isSingleWord())
 450     return !VAL;
 451
 452   for (unsigned i = 0; i < getNumWords(); ++i)
 453     if (pVal[i])
 454       return false;
 455   return true;
 456 }
 457
 458 APInt APInt::operator*(const APInt& RHS) const {
 459   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 460   if (isSingleWord())
 461     return APInt(BitWidth, VAL * RHS.VAL);
 462   APInt Result(*this);
 463   Result *= RHS;
 464   return Result.clearUnusedBits();
 465 }
 466
 467 APInt APInt::operator+(const APInt& RHS) const {
 468   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 469   if (isSingleWord())
 470     return APInt(BitWidth, VAL + RHS.VAL);
 471   APInt Result(BitWidth, 0);
 472   add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 473   return Result.clearUnusedBits();
 474 }
 475
 476 APInt APInt::operator-(const APInt& RHS) const {
 477   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 478   if (isSingleWord())
 479     return APInt(BitWidth, VAL - RHS.VAL);
 480   APInt Result(BitWidth, 0);
 481   sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 482   return Result.clearUnusedBits();
 483 }
 484
 485 bool APInt::operator[](unsigned bitPosition) const {
 486   return (maskBit(bitPosition) &
 487           (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
 488 }
 489
 490 bool APInt::EqualSlowCase(const APInt& RHS) const {
 491   // Get some facts about the number of bits used in the two operands.
 492   unsigned n1 = getActiveBits();
 493   unsigned n2 = RHS.getActiveBits();
 494
 495   // If the number of bits isn't the same, they aren't equal
 496   if (n1 != n2)
 497     return false;
 498
 499   // If the number of bits fits in a word, we only need to compare the low word.
 500   if (n1 <= APINT_BITS_PER_WORD)
 501     return pVal[0] == RHS.pVal[0];
 502
 503   // Otherwise, compare everything
 504   for (int i = whichWord(n1 - 1); i >= 0; --i)
 505     if (pVal[i] != RHS.pVal[i])
 506       return false;
 507   return true;
 508 }
 509
 510 bool APInt::EqualSlowCase(uint64_t Val) const {
 511   unsigned n = getActiveBits();
 512   if (n <= APINT_BITS_PER_WORD)
 513     return pVal[0] == Val;
 514   else
 515     return false;
 516 }
 517
 518 bool APInt::ult(const APInt& RHS) const {
 519   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 520   if (isSingleWord())
 521     return VAL < RHS.VAL;
 522
 523   // Get active bit length of both operands
 524   unsigned n1 = getActiveBits();
 525   unsigned n2 = RHS.getActiveBits();
 526
 527   // If magnitude of LHS is less than RHS, return true.
 528   if (n1 < n2)
 529     return true;
 530
 531   // If magnitude of RHS is greather than LHS, return false.
 532   if (n2 < n1)
 533     return false;
 534
 535   // If they bot fit in a word, just compare the low order word
 536   if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
 537     return pVal[0] < RHS.pVal[0];
 538
 539   // Otherwise, compare all words
 540   unsigned topWord = whichWord(std::max(n1,n2)-1);
 541   for (int i = topWord; i >= 0; --i) {
 542     if (pVal[i] > RHS.pVal[i])
 543       return false;
 544     if (pVal[i] < RHS.pVal[i])
 545       return true;
 546   }
 547   return false;
 548 }
 549
 550 bool APInt::slt(const APInt& RHS) const {
 551   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 552   if (isSingleWord()) {
 553     int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
 554     int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
 555     return lhsSext < rhsSext;
 556   }
 557
 558   APInt lhs(*this);
 559   APInt rhs(RHS);
 560   bool lhsNeg = isNegative();
 561   bool rhsNeg = rhs.isNegative();
 562   if (lhsNeg) {
 563     // Sign bit is set so perform two's complement to make it positive
 564     lhs.flip();
 565     lhs++;
 566   }
 567   if (rhsNeg) {
 568     // Sign bit is set so perform two's complement to make it positive
 569     rhs.flip();
 570     rhs++;
 571   }
 572
 573   // Now we have unsigned values to compare so do the comparison if necessary
 574   // based on the negativeness of the values.
 575   if (lhsNeg)
 576     if (rhsNeg)
 577       return lhs.ugt(rhs);
 578     else
 579       return true;
 580   else if (rhsNeg)
 581     return false;
 582   else
 583     return lhs.ult(rhs);
 584 }
 585
 586 APInt& APInt::set(unsigned bitPosition) {
 587   if (isSingleWord())
 588     VAL |= maskBit(bitPosition);
 589   else
 590     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
 591   return *this;
 592 }
 593
 594 /// Set the given bit to 0 whose position is given as "bitPosition".
 595 /// @brief Set a given bit to 0.
 596 APInt& APInt::clear(unsigned bitPosition) {
 597   if (isSingleWord())
 598     VAL &= ~maskBit(bitPosition);
 599   else
 600     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
 601   return *this;
 602 }
 603
 604 /// @brief Toggle every bit to its opposite value.
 605
 606 /// Toggle a given bit to its opposite value whose position is given
 607 /// as "bitPosition".
 608 /// @brief Toggles a given bit to its opposite value.
 609 APInt& APInt::flip(unsigned bitPosition) {
 610   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 611   if ((*this)[bitPosition]) clear(bitPosition);
 612   else set(bitPosition);
 613   return *this;
 614 }
 615
 616 unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) {
 617   assert(!str.empty() && "Invalid string length");
 618   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
 619          "Radix should be 2, 8, 10, or 16!");
 620
 621   size_t slen = str.size();
 622
 623   // Each computation below needs to know if it's negative.
 624   StringRef::iterator p = str.begin();
 625   unsigned isNegative = *p == '-';
 626   if (*p == '-' || *p == '+') {
 627     p++;
 628     slen--;
 629     assert(slen && "String is only a sign, needs a value.");
 630   }
 631
 632   // For radixes of power-of-two values, the bits required is accurately and
 633   // easily computed
 634   if (radix == 2)
 635     return slen + isNegative;
 636   if (radix == 8)
 637     return slen * 3 + isNegative;
 638   if (radix == 16)
 639     return slen * 4 + isNegative;
 640
 641   // This is grossly inefficient but accurate. We could probably do something
 642   // with a computation of roughly slen*64/20 and then adjust by the value of
 643   // the first few digits. But, I'm not sure how accurate that could be.
 644
 645   // Compute a sufficient number of bits that is always large enough but might
 646   // be too large. This avoids the assertion in the constructor. This
 647   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 648   // bits in that case.
 649   unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
 650
 651   // Convert to the actual binary value.
 652   APInt tmp(sufficient, StringRef(p, slen), radix);
 653
 654   // Compute how many bits are required. If the log is infinite, assume we need
 655   // just bit.
 656   unsigned log = tmp.logBase2();
 657   if (log == (unsigned)-1) {
 658     return isNegative + 1;
 659   } else {
 660     return isNegative + log + 1;
 661   }
 662 }
 663
 664 // From http://www.burtleburtle.net, byBob Jenkins.
 665 // When targeting x86, both GCC and LLVM seem to recognize this as a
 666 // rotate instruction.
 667 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
 668
 669 // From http://www.burtleburtle.net, by Bob Jenkins.
 670 #define mix(a,b,c) \
 671   { \
 672     a -= c;  a ^= rot(c, 4);  c += b; \
 673     b -= a;  b ^= rot(a, 6);  a += c; \
 674     c -= b;  c ^= rot(b, 8);  b += a; \
 675     a -= c;  a ^= rot(c,16);  c += b; \
 676     b -= a;  b ^= rot(a,19);  a += c; \
 677     c -= b;  c ^= rot(b, 4);  b += a; \
 678   }
 679
 680 // From http://www.burtleburtle.net, by Bob Jenkins.
 681 #define final(a,b,c) \
 682   { \
 683     c ^= b; c -= rot(b,14); \
 684     a ^= c; a -= rot(c,11); \
 685     b ^= a; b -= rot(a,25); \
 686     c ^= b; c -= rot(b,16); \
 687     a ^= c; a -= rot(c,4);  \
 688     b ^= a; b -= rot(a,14); \
 689     c ^= b; c -= rot(b,24); \
 690   }
 691
 692 // hashword() was adapted from http://www.burtleburtle.net, by Bob
 693 // Jenkins.  k is a pointer to an array of uint32_t values; length is
 694 // the length of the key, in 32-bit chunks.  This version only handles
 695 // keys that are a multiple of 32 bits in size.
 696 static inline uint32_t hashword(const uint64_t *k64, size_t length)
 697 {
 698   const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
 699   uint32_t a,b,c;
 700
 701   /* Set up the internal state */
 702   a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
 703
 704   /*------------------------------------------------- handle most of the key */
 705   while (length > 3)
 706     {
 707       a += k[0];
 708       b += k[1];
 709       c += k[2];
 710       mix(a,b,c);
 711       length -= 3;
 712       k += 3;
 713     }
 714
 715   /*------------------------------------------- handle the last 3 uint32_t's */
 716   switch (length) {                  /* all the case statements fall through */
 717   case 3 : c+=k[2];
 718   case 2 : b+=k[1];
 719   case 1 : a+=k[0];
 720     final(a,b,c);
 721     case 0:     /* case 0: nothing left to add */
 722       break;
 723     }
 724   /*------------------------------------------------------ report the result */
 725   return c;
 726 }
 727
 728 // hashword8() was adapted from http://www.burtleburtle.net, by Bob
 729 // Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
 730 // targeting x86 (32 or 64 bit), both LLVM and GCC compile this
 731 // function into about 35 instructions when inlined.
 732 static inline uint32_t hashword8(const uint64_t k64)
 733 {
 734   uint32_t a,b,c;
 735   a = b = c = 0xdeadbeef + 4;
 736   b += k64 >> 32;
 737   a += k64 & 0xffffffff;
 738   final(a,b,c);
 739   return c;
 740 }
 741 #undef final
 742 #undef mix
 743 #undef rot
 744
 745 uint64_t APInt::getHashValue() const {
 746   uint64_t hash;
 747   if (isSingleWord())
 748     hash = hashword8(VAL);
 749   else
 750     hash = hashword(pVal, getNumWords()*2);
 751   return hash;
 752 }
 753
 754 /// HiBits - This function returns the high "numBits" bits of this APInt.
 755 APInt APInt::getHiBits(unsigned numBits) const {
 756   return APIntOps::lshr(*this, BitWidth - numBits);
 757 }
 758
 759 /// LoBits - This function returns the low "numBits" bits of this APInt.
 760 APInt APInt::getLoBits(unsigned numBits) const {
 761   return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
 762                         BitWidth - numBits);
 763 }
 764
 765 bool APInt::isPowerOf2() const {
 766   return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
 767 }
 768
 769 unsigned APInt::countLeadingZerosSlowCase() const {
 770   unsigned Count = 0;
 771   for (unsigned i = getNumWords(); i > 0u; --i) {
 772     if (pVal[i-1] == 0)
 773       Count += APINT_BITS_PER_WORD;
 774     else {
 775       Count += CountLeadingZeros_64(pVal[i-1]);
 776       break;
 777     }
 778   }
 779   unsigned remainder = BitWidth % APINT_BITS_PER_WORD;
 780   if (remainder)
 781     Count -= APINT_BITS_PER_WORD - remainder;
 782   return std::min(Count, BitWidth);
 783 }
 784
 785 static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
 786   unsigned Count = 0;
 787   if (skip)
 788     V <<= skip;
 789   while (V && (V & (1ULL << 63))) {
 790     Count++;
 791     V <<= 1;
 792   }
 793   return Count;
 794 }
 795
 796 unsigned APInt::countLeadingOnes() const {
 797   if (isSingleWord())
 798     return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
 799
 800   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 801   unsigned shift;
 802   if (!highWordBits) {
 803     highWordBits = APINT_BITS_PER_WORD;
 804     shift = 0;
 805   } else {
 806     shift = APINT_BITS_PER_WORD - highWordBits;
 807   }
 808   int i = getNumWords() - 1;
 809   unsigned Count = countLeadingOnes_64(pVal[i], shift);
 810   if (Count == highWordBits) {
 811     for (i--; i >= 0; --i) {
 812       if (pVal[i] == -1ULL)
 813         Count += APINT_BITS_PER_WORD;
 814       else {
 815         Count += countLeadingOnes_64(pVal[i], 0);
 816         break;
 817       }
 818     }
 819   }
 820   return Count;
 821 }
 822
 823 unsigned APInt::countTrailingZeros() const {
 824   if (isSingleWord())
 825     return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
 826   unsigned Count = 0;
 827   unsigned i = 0;
 828   for (; i < getNumWords() && pVal[i] == 0; ++i)
 829     Count += APINT_BITS_PER_WORD;
 830   if (i < getNumWords())
 831     Count += CountTrailingZeros_64(pVal[i]);
 832   return std::min(Count, BitWidth);
 833 }
 834
 835 unsigned APInt::countTrailingOnesSlowCase() const {
 836   unsigned Count = 0;
 837   unsigned i = 0;
 838   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
 839     Count += APINT_BITS_PER_WORD;
 840   if (i < getNumWords())
 841     Count += CountTrailingOnes_64(pVal[i]);
 842   return std::min(Count, BitWidth);
 843 }
 844
 845 unsigned APInt::countPopulationSlowCase() const {
 846   unsigned Count = 0;
 847   for (unsigned i = 0; i < getNumWords(); ++i)
 848     Count += CountPopulation_64(pVal[i]);
 849   return Count;
 850 }
 851
 852 APInt APInt::byteSwap() const {
 853   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 854   if (BitWidth == 16)
 855     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
 856   else if (BitWidth == 32)
 857     return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
 858   else if (BitWidth == 48) {
 859     unsigned Tmp1 = unsigned(VAL >> 16);
 860     Tmp1 = ByteSwap_32(Tmp1);
 861     uint16_t Tmp2 = uint16_t(VAL);
 862     Tmp2 = ByteSwap_16(Tmp2);
 863     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 864   } else if (BitWidth == 64)
 865     return APInt(BitWidth, ByteSwap_64(VAL));
 866   else {
 867     APInt Result(BitWidth, 0);
 868     char *pByte = (char*)Result.pVal;
 869     for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
 870       char Tmp = pByte[i];
 871       pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
 872       pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
 873     }
 874     return Result;
 875   }
 876 }
 877
 878 APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
 879                                             const APInt& API2) {
 880   APInt A = API1, B = API2;
 881   while (!!B) {
 882     APInt T = B;
 883     B = APIntOps::urem(A, B);
 884     A = T;
 885   }
 886   return A;
 887 }
 888
 889 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 890   union {
 891     double D;
 892     uint64_t I;
 893   } T;
 894   T.D = Double;
 895
 896   // Get the sign bit from the highest order bit
 897   bool isNeg = T.I >> 63;
 898
 899   // Get the 11-bit exponent and adjust for the 1023 bit bias
 900   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
 901
 902   // If the exponent is negative, the value is < 0 so just return 0.
 903   if (exp < 0)
 904     return APInt(width, 0u);
 905
 906   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 907   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
 908
 909   // If the exponent doesn't shift all bits out of the mantissa
 910   if (exp < 52)
 911     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 912                     APInt(width, mantissa >> (52 - exp));
 913
 914   // If the client didn't provide enough bits for us to shift the mantissa into
 915   // then the result is undefined, just return 0
 916   if (width <= exp - 52)
 917     return APInt(width, 0);
 918
 919   // Otherwise, we have to shift the mantissa bits up to the right location
 920   APInt Tmp(width, mantissa);
 921   Tmp = Tmp.shl((unsigned)exp - 52);
 922   return isNeg ? -Tmp : Tmp;
 923 }
 924
 925 /// RoundToDouble - This function converts this APInt to a double.
 926 /// The layout for double is as following (IEEE Standard 754):
 927 ///  --------------------------------------
 928 /// |  Sign    Exponent    Fraction    Bias |
 929 /// |-------------------------------------- |
 930 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 931 ///  --------------------------------------
 932 double APInt::roundToDouble(bool isSigned) const {
 933
 934   // Handle the simple case where the value is contained in one uint64_t.
 935   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 936   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 937     if (isSigned) {
 938       int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
 939       return double(sext);
 940     } else
 941       return double(getWord(0));
 942   }
 943
 944   // Determine if the value is negative.
 945   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 946
 947   // Construct the absolute value if we're negative.
 948   APInt Tmp(isNeg ? -(*this) : (*this));
 949
 950   // Figure out how many bits we're using.
 951   unsigned n = Tmp.getActiveBits();
 952
 953   // The exponent (without bias normalization) is just the number of bits
 954   // we are using. Note that the sign bit is gone since we constructed the
 955   // absolute value.
 956   uint64_t exp = n;
 957
 958   // Return infinity for exponent overflow
 959   if (exp > 1023) {
 960     if (!isSigned || !isNeg)
 961       return std::numeric_limits<double>::infinity();
 962     else
 963       return -std::numeric_limits<double>::infinity();
 964   }
 965   exp += 1023; // Increment for 1023 bias
 966
 967   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 968   // extract the high 52 bits from the correct words in pVal.
 969   uint64_t mantissa;
 970   unsigned hiWord = whichWord(n-1);
 971   if (hiWord == 0) {
 972     mantissa = Tmp.pVal[0];
 973     if (n > 52)
 974       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 975   } else {
 976     assert(hiWord > 0 && "huh?");
 977     uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 978     uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 979     mantissa = hibits | lobits;
 980   }
 981
 982   // The leading bit of mantissa is implicit, so get rid of it.
 983   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 984   union {
 985     double D;
 986     uint64_t I;
 987   } T;
 988   T.I = sign | (exp << 52) | mantissa;
 989   return T.D;
 990 }
 991
 992 // Truncate to new width.
 993 APInt &APInt::trunc(unsigned width) {
 994   assert(width < BitWidth && "Invalid APInt Truncate request");
 995   assert(width && "Can't truncate to 0 bits");
 996   unsigned wordsBefore = getNumWords();
 997   BitWidth = width;
 998   unsigned wordsAfter = getNumWords();
 999   if (wordsBefore != wordsAfter) {
1000     if (wordsAfter == 1) {
1001       uint64_t *tmp = pVal;
1002       VAL = pVal[0];
1003       delete [] tmp;
1004     } else {
1005       uint64_t *newVal = getClearedMemory(wordsAfter);
1006       for (unsigned i = 0; i < wordsAfter; ++i)
1007         newVal[i] = pVal[i];
1008       delete [] pVal;
1009       pVal = newVal;
1010     }
1011   }
1012   return clearUnusedBits();
1013 }
1014
1015 // Sign extend to a new width.
1016 APInt &APInt::sext(unsigned width) {
1017   assert(width > BitWidth && "Invalid APInt SignExtend request");
1018   // If the sign bit isn't set, this is the same as zext.
1019   if (!isNegative()) {
1020     zext(width);
1021     return *this;
1022   }
1023
1024   // The sign bit is set. First, get some facts
1025   unsigned wordsBefore = getNumWords();
1026   unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
1027   BitWidth = width;
1028   unsigned wordsAfter = getNumWords();
1029
1030   // Mask the high order word appropriately
1031   if (wordsBefore == wordsAfter) {
1032     unsigned newWordBits = width % APINT_BITS_PER_WORD;
1033     // The extension is contained to the wordsBefore-1th word.
1034     uint64_t mask = ~0ULL;
1035     if (newWordBits)
1036       mask >>= APINT_BITS_PER_WORD - newWordBits;
1037     mask <<= wordBits;
1038     if (wordsBefore == 1)
1039       VAL |= mask;
1040     else
1041       pVal[wordsBefore-1] |= mask;
1042     return clearUnusedBits();
1043   }
1044
1045   uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1046   uint64_t *newVal = getMemory(wordsAfter);
1047   if (wordsBefore == 1)
1048     newVal[0] = VAL | mask;
1049   else {
1050     for (unsigned i = 0; i < wordsBefore; ++i)
1051       newVal[i] = pVal[i];
1052     newVal[wordsBefore-1] |= mask;
1053   }
1054   for (unsigned i = wordsBefore; i < wordsAfter; i++)
1055     newVal[i] = -1ULL;
1056   if (wordsBefore != 1)
1057     delete [] pVal;
1058   pVal = newVal;
1059   return clearUnusedBits();
1060 }
1061
1062 //  Zero extend to a new width.
1063 APInt &APInt::zext(unsigned width) {
1064   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1065   unsigned wordsBefore = getNumWords();
1066   BitWidth = width;
1067   unsigned wordsAfter = getNumWords();
1068   if (wordsBefore != wordsAfter) {
1069     uint64_t *newVal = getClearedMemory(wordsAfter);
1070     if (wordsBefore == 1)
1071       newVal[0] = VAL;
1072     else
1073       for (unsigned i = 0; i < wordsBefore; ++i)
1074         newVal[i] = pVal[i];
1075     if (wordsBefore != 1)
1076       delete [] pVal;
1077     pVal = newVal;
1078   }
1079   return *this;
1080 }
1081
1082 APInt &APInt::zextOrTrunc(unsigned width) {
1083   if (BitWidth < width)
1084     return zext(width);
1085   if (BitWidth > width)
1086     return trunc(width);
1087   return *this;
1088 }
1089
1090 APInt &APInt::sextOrTrunc(unsigned width) {
1091   if (BitWidth < width)
1092     return sext(width);
1093   if (BitWidth > width)
1094     return trunc(width);
1095   return *this;
1096 }
1097
1098 /// Arithmetic right-shift this APInt by shiftAmt.
1099 /// @brief Arithmetic right-shift function.
1100 APInt APInt::ashr(const APInt &shiftAmt) const {
1101   return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1102 }
1103
1104 /// Arithmetic right-shift this APInt by shiftAmt.
1105 /// @brief Arithmetic right-shift function.
1106 APInt APInt::ashr(unsigned shiftAmt) const {
1107   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1108   // Handle a degenerate case
1109   if (shiftAmt == 0)
1110     return *this;
1111
1112   // Handle single word shifts with built-in ashr
1113   if (isSingleWord()) {
1114     if (shiftAmt == BitWidth)
1115       return APInt(BitWidth, 0); // undefined
1116     else {
1117       unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1118       return APInt(BitWidth,
1119         (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1120     }
1121   }
1122
1123   // If all the bits were shifted out, the result is, technically, undefined.
1124   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1125   // issues in the algorithm below.
1126   if (shiftAmt == BitWidth) {
1127     if (isNegative())
1128       return APInt(BitWidth, -1ULL, true);
1129     else
1130       return APInt(BitWidth, 0);
1131   }
1132
1133   // Create some space for the result.
1134   uint64_t * val = new uint64_t[getNumWords()];
1135
1136   // Compute some values needed by the following shift algorithms
1137   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1138   unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1139   unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1140   unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1141   if (bitsInWord == 0)
1142     bitsInWord = APINT_BITS_PER_WORD;
1143
1144   // If we are shifting whole words, just move whole words
1145   if (wordShift == 0) {
1146     // Move the words containing significant bits
1147     for (unsigned i = 0; i <= breakWord; ++i)
1148       val[i] = pVal[i+offset]; // move whole word
1149
1150     // Adjust the top significant word for sign bit fill, if negative
1151     if (isNegative())
1152       if (bitsInWord < APINT_BITS_PER_WORD)
1153         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1154   } else {
1155     // Shift the low order words
1156     for (unsigned i = 0; i < breakWord; ++i) {
1157       // This combines the shifted corresponding word with the low bits from
1158       // the next word (shifted into this word's high bits).
1159       val[i] = (pVal[i+offset] >> wordShift) |
1160                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1161     }
1162
1163     // Shift the break word. In this case there are no bits from the next word
1164     // to include in this word.
1165     val[breakWord] = pVal[breakWord+offset] >> wordShift;
1166
1167     // Deal with sign extenstion in the break word, and possibly the word before
1168     // it.
1169     if (isNegative()) {
1170       if (wordShift > bitsInWord) {
1171         if (breakWord > 0)
1172           val[breakWord-1] |=
1173             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1174         val[breakWord] |= ~0ULL;
1175       } else
1176         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1177     }
1178   }
1179
1180   // Remaining words are 0 or -1, just assign them.
1181   uint64_t fillValue = (isNegative() ? -1ULL : 0);
1182   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1183     val[i] = fillValue;
1184   return APInt(val, BitWidth).clearUnusedBits();
1185 }
1186
1187 /// Logical right-shift this APInt by shiftAmt.
1188 /// @brief Logical right-shift function.
1189 APInt APInt::lshr(const APInt &shiftAmt) const {
1190   return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1191 }
1192
1193 /// Logical right-shift this APInt by shiftAmt.
1194 /// @brief Logical right-shift function.
1195 APInt APInt::lshr(unsigned shiftAmt) const {
1196   if (isSingleWord()) {
1197     if (shiftAmt == BitWidth)
1198       return APInt(BitWidth, 0);
1199     else
1200       return APInt(BitWidth, this->VAL >> shiftAmt);
1201   }
1202
1203   // If all the bits were shifted out, the result is 0. This avoids issues
1204   // with shifting by the size of the integer type, which produces undefined
1205   // results. We define these "undefined results" to always be 0.
1206   if (shiftAmt == BitWidth)
1207     return APInt(BitWidth, 0);
1208
1209   // If none of the bits are shifted out, the result is *this. This avoids
1210   // issues with shifting by the size of the integer type, which produces
1211   // undefined results in the code below. This is also an optimization.
1212   if (shiftAmt == 0)
1213     return *this;
1214
1215   // Create some space for the result.
1216   uint64_t * val = new uint64_t[getNumWords()];
1217
1218   // If we are shifting less than a word, compute the shift with a simple carry
1219   if (shiftAmt < APINT_BITS_PER_WORD) {
1220     uint64_t carry = 0;
1221     for (int i = getNumWords()-1; i >= 0; --i) {
1222       val[i] = (pVal[i] >> shiftAmt) | carry;
1223       carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1224     }
1225     return APInt(val, BitWidth).clearUnusedBits();
1226   }
1227
1228   // Compute some values needed by the remaining shift algorithms
1229   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1230   unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1231
1232   // If we are shifting whole words, just move whole words
1233   if (wordShift == 0) {
1234     for (unsigned i = 0; i < getNumWords() - offset; ++i)
1235       val[i] = pVal[i+offset];
1236     for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1237       val[i] = 0;
1238     return APInt(val,BitWidth).clearUnusedBits();
1239   }
1240
1241   // Shift the low order words
1242   unsigned breakWord = getNumWords() - offset -1;
1243   for (unsigned i = 0; i < breakWord; ++i)
1244     val[i] = (pVal[i+offset] >> wordShift) |
1245              (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1246   // Shift the break word.
1247   val[breakWord] = pVal[breakWord+offset] >> wordShift;
1248
1249   // Remaining words are 0
1250   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1251     val[i] = 0;
1252   return APInt(val, BitWidth).clearUnusedBits();
1253 }
1254
1255 /// Left-shift this APInt by shiftAmt.
1256 /// @brief Left-shift function.
1257 APInt APInt::shl(const APInt &shiftAmt) const {
1258   // It's undefined behavior in C to shift by BitWidth or greater.
1259   return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1260 }
1261
1262 APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1263   // If all the bits were shifted out, the result is 0. This avoids issues
1264   // with shifting by the size of the integer type, which produces undefined
1265   // results. We define these "undefined results" to always be 0.
1266   if (shiftAmt == BitWidth)
1267     return APInt(BitWidth, 0);
1268
1269   // If none of the bits are shifted out, the result is *this. This avoids a
1270   // lshr by the words size in the loop below which can produce incorrect
1271   // results. It also avoids the expensive computation below for a common case.
1272   if (shiftAmt == 0)
1273     return *this;
1274
1275   // Create some space for the result.
1276   uint64_t * val = new uint64_t[getNumWords()];
1277
1278   // If we are shifting less than a word, do it the easy way
1279   if (shiftAmt < APINT_BITS_PER_WORD) {
1280     uint64_t carry = 0;
1281     for (unsigned i = 0; i < getNumWords(); i++) {
1282       val[i] = pVal[i] << shiftAmt | carry;
1283       carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1284     }
1285     return APInt(val, BitWidth).clearUnusedBits();
1286   }
1287
1288   // Compute some values needed by the remaining shift algorithms
1289   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1290   unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1291
1292   // If we are shifting whole words, just move whole words
1293   if (wordShift == 0) {
1294     for (unsigned i = 0; i < offset; i++)
1295       val[i] = 0;
1296     for (unsigned i = offset; i < getNumWords(); i++)
1297       val[i] = pVal[i-offset];
1298     return APInt(val,BitWidth).clearUnusedBits();
1299   }
1300
1301   // Copy whole words from this to Result.
1302   unsigned i = getNumWords() - 1;
1303   for (; i > offset; --i)
1304     val[i] = pVal[i-offset] << wordShift |
1305              pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1306   val[offset] = pVal[0] << wordShift;
1307   for (i = 0; i < offset; ++i)
1308     val[i] = 0;
1309   return APInt(val, BitWidth).clearUnusedBits();
1310 }
1311
1312 APInt APInt::rotl(const APInt &rotateAmt) const {
1313   return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1314 }
1315
1316 APInt APInt::rotl(unsigned rotateAmt) const {
1317   if (rotateAmt == 0)
1318     return *this;
1319   // Don't get too fancy, just use existing shift/or facilities
1320   APInt hi(*this);
1321   APInt lo(*this);
1322   hi.shl(rotateAmt);
1323   lo.lshr(BitWidth - rotateAmt);
1324   return hi | lo;
1325 }
1326
1327 APInt APInt::rotr(const APInt &rotateAmt) const {
1328   return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1329 }
1330
1331 APInt APInt::rotr(unsigned rotateAmt) const {
1332   if (rotateAmt == 0)
1333     return *this;
1334   // Don't get too fancy, just use existing shift/or facilities
1335   APInt hi(*this);
1336   APInt lo(*this);
1337   lo.lshr(rotateAmt);
1338   hi.shl(BitWidth - rotateAmt);
1339   return hi | lo;
1340 }
1341
1342 // Square Root - this method computes and returns the square root of "this".
1343 // Three mechanisms are used for computation. For small values (<= 5 bits),
1344 // a table lookup is done. This gets some performance for common cases. For
1345 // values using less than 52 bits, the value is converted to double and then
1346 // the libc sqrt function is called. The result is rounded and then converted
1347 // back to a uint64_t which is then used to construct the result. Finally,
1348 // the Babylonian method for computing square roots is used.
1349 APInt APInt::sqrt() const {
1350
1351   // Determine the magnitude of the value.
1352   unsigned magnitude = getActiveBits();
1353
1354   // Use a fast table for some small values. This also gets rid of some
1355   // rounding errors in libc sqrt for small values.
1356   if (magnitude <= 5) {
1357     static const uint8_t results[32] = {
1358       /*     0 */ 0,
1359       /*  1- 2 */ 1, 1,
1360       /*  3- 6 */ 2, 2, 2, 2,
1361       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1362       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1363       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1364       /*    31 */ 6
1365     };
1366     return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1367   }
1368
1369   // If the magnitude of the value fits in less than 52 bits (the precision of
1370   // an IEEE double precision floating point value), then we can use the
1371   // libc sqrt function which will probably use a hardware sqrt computation.
1372   // This should be faster than the algorithm below.
1373   if (magnitude < 52) {
1374 #ifdef _MSC_VER
1375     // Amazingly, VC++ doesn't have round().
1376     return APInt(BitWidth,
1377                  uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1378 #else
1379     return APInt(BitWidth,
1380                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1381 #endif
1382   }
1383
1384   // Okay, all the short cuts are exhausted. We must compute it. The following
1385   // is a classical Babylonian method for computing the square root. This code
1386   // was adapted to APINt from a wikipedia article on such computations.
1387   // See http://www.wikipedia.org/ and go to the page named
1388   // Calculate_an_integer_square_root.
1389   unsigned nbits = BitWidth, i = 4;
1390   APInt testy(BitWidth, 16);
1391   APInt x_old(BitWidth, 1);
1392   APInt x_new(BitWidth, 0);
1393   APInt two(BitWidth, 2);
1394
1395   // Select a good starting value using binary logarithms.
1396   for (;; i += 2, testy = testy.shl(2))
1397     if (i >= nbits || this->ule(testy)) {
1398       x_old = x_old.shl(i / 2);
1399       break;
1400     }
1401
1402   // Use the Babylonian method to arrive at the integer square root:
1403   for (;;) {
1404     x_new = (this->udiv(x_old) + x_old).udiv(two);
1405     if (x_old.ule(x_new))
1406       break;
1407     x_old = x_new;
1408   }
1409
1410   // Make sure we return the closest approximation
1411   // NOTE: The rounding calculation below is correct. It will produce an
1412   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1413   // determined to be a rounding issue with pari/gp as it begins to use a
1414   // floating point representation after 192 bits. There are no discrepancies
1415   // between this algorithm and pari/gp for bit widths < 192 bits.
1416   APInt square(x_old * x_old);
1417   APInt nextSquare((x_old + 1) * (x_old +1));
1418   if (this->ult(square))
1419     return x_old;
1420   else if (this->ule(nextSquare)) {
1421     APInt midpoint((nextSquare - square).udiv(two));
1422     APInt offset(*this - square);
1423     if (offset.ult(midpoint))
1424       return x_old;
1425     else
1426       return x_old + 1;
1427   } else
1428     llvm_unreachable("Error in APInt::sqrt computation");
1429   return x_old + 1;
1430 }
1431
1432 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1433 /// iterative extended Euclidean algorithm is used to solve for this value,
1434 /// however we simplify it to speed up calculating only the inverse, and take
1435 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1436 /// (potentially large) APInts around.
1437 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1438   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1439
1440   // Using the properties listed at the following web page (accessed 06/21/08):
1441   //   http://www.numbertheory.org/php/euclid.html
1442   // (especially the properties numbered 3, 4 and 9) it can be proved that
1443   // BitWidth bits suffice for all the computations in the algorithm implemented
1444   // below. More precisely, this number of bits suffice if the multiplicative
1445   // inverse exists, but may not suffice for the general extended Euclidean
1446   // algorithm.
1447
1448   APInt r[2] = { modulo, *this };
1449   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1450   APInt q(BitWidth, 0);
1451
1452   unsigned i;
1453   for (i = 0; r[i^1] != 0; i ^= 1) {
1454     // An overview of the math without the confusing bit-flipping:
1455     // q = r[i-2] / r[i-1]
1456     // r[i] = r[i-2] % r[i-1]
1457     // t[i] = t[i-2] - t[i-1] * q
1458     udivrem(r[i], r[i^1], q, r[i]);
1459     t[i] -= t[i^1] * q;
1460   }
1461
1462   // If this APInt and the modulo are not coprime, there is no multiplicative
1463   // inverse, so return 0. We check this by looking at the next-to-last
1464   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1465   // algorithm.
1466   if (r[i] != 1)
1467     return APInt(BitWidth, 0);
1468
1469   // The next-to-last t is the multiplicative inverse.  However, we are
1470   // interested in a positive inverse. Calcuate a positive one from a negative
1471   // one if necessary. A simple addition of the modulo suffices because
1472   // abs(t[i]) is known to be less than *this/2 (see the link above).
1473   return t[i].isNegative() ? t[i] + modulo : t[i];
1474 }
1475
1476 /// Calculate the magic numbers required to implement a signed integer division
1477 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1478 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1479 /// Warren, Jr., chapter 10.
1480 APInt::ms APInt::magic() const {
1481   const APInt& d = *this;
1482   unsigned p;
1483   APInt ad, anc, delta, q1, r1, q2, r2, t;
1484   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1485   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1486   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1487   struct ms mag;
1488
1489   ad = d.abs();
1490   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1491   anc = t - 1 - t.urem(ad);   // absolute value of nc
1492   p = d.getBitWidth() - 1;    // initialize p
1493   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1494   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1495   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1496   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1497   do {
1498     p = p + 1;
1499     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1500     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1501     if (r1.uge(anc)) {  // must be unsigned comparison
1502       q1 = q1 + 1;
1503       r1 = r1 - anc;
1504     }
1505     q2 = q2<<1;          // update q2 = 2p/abs(d)
1506     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1507     if (r2.uge(ad)) {   // must be unsigned comparison
1508       q2 = q2 + 1;
1509       r2 = r2 - ad;
1510     }
1511     delta = ad - r2;
1512   } while (q1.ule(delta) || (q1 == delta && r1 == 0));
1513
1514   mag.m = q2 + 1;
1515   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1516   mag.s = p - d.getBitWidth();          // resulting shift
1517   return mag;
1518 }
1519
1520 /// Calculate the magic numbers required to implement an unsigned integer
1521 /// division by a constant as a sequence of multiplies, adds and shifts.
1522 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1523 /// S. Warren, Jr., chapter 10.
1524 APInt::mu APInt::magicu() const {
1525   const APInt& d = *this;
1526   unsigned p;
1527   APInt nc, delta, q1, r1, q2, r2;
1528   struct mu magu;
1529   magu.a = 0;               // initialize "add" indicator
1530   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1531   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1532   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1533
1534   nc = allOnes - (-d).urem(d);
1535   p = d.getBitWidth() - 1;  // initialize p
1536   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1537   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1538   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1539   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1540   do {
1541     p = p + 1;
1542     if (r1.uge(nc - r1)) {
1543       q1 = q1 + q1 + 1;  // update q1
1544       r1 = r1 + r1 - nc; // update r1
1545     }
1546     else {
1547       q1 = q1+q1; // update q1
1548       r1 = r1+r1; // update r1
1549     }
1550     if ((r2 + 1).uge(d - r2)) {
1551       if (q2.uge(signedMax)) magu.a = 1;
1552       q2 = q2+q2 + 1;     // update q2
1553       r2 = r2+r2 + 1 - d; // update r2
1554     }
1555     else {
1556       if (q2.uge(signedMin)) magu.a = 1;
1557       q2 = q2+q2;     // update q2
1558       r2 = r2+r2 + 1; // update r2
1559     }
1560     delta = d - 1 - r2;
1561   } while (p < d.getBitWidth()*2 &&
1562            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1563   magu.m = q2 + 1; // resulting magic number
1564   magu.s = p - d.getBitWidth();  // resulting shift
1565   return magu;
1566 }
1567
1568 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1569 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1570 /// variables here have the same names as in the algorithm. Comments explain
1571 /// the algorithm and any deviation from it.
1572 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1573                      unsigned m, unsigned n) {
1574   assert(u && "Must provide dividend");
1575   assert(v && "Must provide divisor");
1576   assert(q && "Must provide quotient");
1577   assert(u != v && u != q && v != q && "Must us different memory");
1578   assert(n>1 && "n must be > 1");
1579
1580   // Knuth uses the value b as the base of the number system. In our case b
1581   // is 2^31 so we just set it to -1u.
1582   uint64_t b = uint64_t(1) << 32;
1583
1584 #if 0
1585   DEBUG(errs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1586   DEBUG(errs() << "KnuthDiv: original:");
1587   DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1588   DEBUG(errs() << " by");
1589   DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]);
1590   DEBUG(errs() << '\n');
1591 #endif
1592   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1593   // u and v by d. Note that we have taken Knuth's advice here to use a power
1594   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1595   // 2 allows us to shift instead of multiply and it is easy to determine the
1596   // shift amount from the leading zeros.  We are basically normalizing the u
1597   // and v so that its high bits are shifted to the top of v's range without
1598   // overflow. Note that this can require an extra word in u so that u must
1599   // be of length m+n+1.
1600   unsigned shift = CountLeadingZeros_32(v[n-1]);
1601   unsigned v_carry = 0;
1602   unsigned u_carry = 0;
1603   if (shift) {
1604     for (unsigned i = 0; i < m+n; ++i) {
1605       unsigned u_tmp = u[i] >> (32 - shift);
1606       u[i] = (u[i] << shift) | u_carry;
1607       u_carry = u_tmp;
1608     }
1609     for (unsigned i = 0; i < n; ++i) {
1610       unsigned v_tmp = v[i] >> (32 - shift);
1611       v[i] = (v[i] << shift) | v_carry;
1612       v_carry = v_tmp;
1613     }
1614   }
1615   u[m+n] = u_carry;
1616 #if 0
1617   DEBUG(errs() << "KnuthDiv:   normal:");
1618   DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1619   DEBUG(errs() << " by");
1620   DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]);
1621   DEBUG(errs() << '\n');
1622 #endif
1623
1624   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1625   int j = m;
1626   do {
1627     DEBUG(errs() << "KnuthDiv: quotient digit #" << j << '\n');
1628     // D3. [Calculate q'.].
1629     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1630     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1631     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1632     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1633     // on v[n-2] determines at high speed most of the cases in which the trial
1634     // value qp is one too large, and it eliminates all cases where qp is two
1635     // too large.
1636     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1637     DEBUG(errs() << "KnuthDiv: dividend == " << dividend << '\n');
1638     uint64_t qp = dividend / v[n-1];
1639     uint64_t rp = dividend % v[n-1];
1640     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1641       qp--;
1642       rp += v[n-1];
1643       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1644         qp--;
1645     }
1646     DEBUG(errs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1647
1648     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1649     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1650     // consists of a simple multiplication by a one-place number, combined with
1651     // a subtraction.
1652     bool isNeg = false;
1653     for (unsigned i = 0; i < n; ++i) {
1654       uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1655       uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1656       bool borrow = subtrahend > u_tmp;
1657       DEBUG(errs() << "KnuthDiv: u_tmp == " << u_tmp
1658                    << ", subtrahend == " << subtrahend
1659                    << ", borrow = " << borrow << '\n');
1660
1661       uint64_t result = u_tmp - subtrahend;
1662       unsigned k = j + i;
1663       u[k++] = (unsigned)(result & (b-1)); // subtract low word
1664       u[k++] = (unsigned)(result >> 32);   // subtract high word
1665       while (borrow && k <= m+n) { // deal with borrow to the left
1666         borrow = u[k] == 0;
1667         u[k]--;
1668         k++;
1669       }
1670       isNeg |= borrow;
1671       DEBUG(errs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1672                     u[j+i+1] << '\n');
1673     }
1674     DEBUG(errs() << "KnuthDiv: after subtraction:");
1675     DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1676     DEBUG(errs() << '\n');
1677     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1678     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1679     // true value plus b**(n+1), namely as the b's complement of
1680     // the true value, and a "borrow" to the left should be remembered.
1681     //
1682     if (isNeg) {
1683       bool carry = true;  // true because b's complement is "complement + 1"
1684       for (unsigned i = 0; i <= m+n; ++i) {
1685         u[i] = ~u[i] + carry; // b's complement
1686         carry = carry && u[i] == 0;
1687       }
1688     }
1689     DEBUG(errs() << "KnuthDiv: after complement:");
1690     DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1691     DEBUG(errs() << '\n');
1692
1693     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1694     // negative, go to step D6; otherwise go on to step D7.
1695     q[j] = (unsigned)qp;
1696     if (isNeg) {
1697       // D6. [Add back]. The probability that this step is necessary is very
1698       // small, on the order of only 2/b. Make sure that test data accounts for
1699       // this possibility. Decrease q[j] by 1
1700       q[j]--;
1701       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1702       // A carry will occur to the left of u[j+n], and it should be ignored
1703       // since it cancels with the borrow that occurred in D4.
1704       bool carry = false;
1705       for (unsigned i = 0; i < n; i++) {
1706         unsigned limit = std::min(u[j+i],v[i]);
1707         u[j+i] += v[i] + carry;
1708         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1709       }
1710       u[j+n] += carry;
1711     }
1712     DEBUG(errs() << "KnuthDiv: after correction:");
1713     DEBUG(for (int i = m+n; i >=0; i--) errs() <<" " << u[i]);
1714     DEBUG(errs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1715
1716   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1717   } while (--j >= 0);
1718
1719   DEBUG(errs() << "KnuthDiv: quotient:");
1720   DEBUG(for (int i = m; i >=0; i--) errs() <<" " << q[i]);
1721   DEBUG(errs() << '\n');
1722
1723   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1724   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1725   // compute the remainder (urem uses this).
1726   if (r) {
1727     // The value d is expressed by the "shift" value above since we avoided
1728     // multiplication by d by using a shift left. So, all we have to do is
1729     // shift right here. In order to mak
1730     if (shift) {
1731       unsigned carry = 0;
1732       DEBUG(errs() << "KnuthDiv: remainder:");
1733       for (int i = n-1; i >= 0; i--) {
1734         r[i] = (u[i] >> shift) | carry;
1735         carry = u[i] << (32 - shift);
1736         DEBUG(errs() << " " << r[i]);
1737       }
1738     } else {
1739       for (int i = n-1; i >= 0; i--) {
1740         r[i] = u[i];
1741         DEBUG(errs() << " " << r[i]);
1742       }
1743     }
1744     DEBUG(errs() << '\n');
1745   }
1746 #if 0
1747   DEBUG(errs() << '\n');
1748 #endif
1749 }
1750
1751 void APInt::divide(const APInt LHS, unsigned lhsWords,
1752                    const APInt &RHS, unsigned rhsWords,
1753                    APInt *Quotient, APInt *Remainder)
1754 {
1755   assert(lhsWords >= rhsWords && "Fractional result");
1756
1757   // First, compose the values into an array of 32-bit words instead of
1758   // 64-bit words. This is a necessity of both the "short division" algorithm
1759   // and the the Knuth "classical algorithm" which requires there to be native
1760   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1761   // can't use 64-bit operands here because we don't have native results of
1762   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1763   // work on large-endian machines.
1764   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1765   unsigned n = rhsWords * 2;
1766   unsigned m = (lhsWords * 2) - n;
1767
1768   // Allocate space for the temporary values we need either on the stack, if
1769   // it will fit, or on the heap if it won't.
1770   unsigned SPACE[128];
1771   unsigned *U = 0;
1772   unsigned *V = 0;
1773   unsigned *Q = 0;
1774   unsigned *R = 0;
1775   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1776     U = &SPACE[0];
1777     V = &SPACE[m+n+1];
1778     Q = &SPACE[(m+n+1) + n];
1779     if (Remainder)
1780       R = &SPACE[(m+n+1) + n + (m+n)];
1781   } else {
1782     U = new unsigned[m + n + 1];
1783     V = new unsigned[n];
1784     Q = new unsigned[m+n];
1785     if (Remainder)
1786       R = new unsigned[n];
1787   }
1788
1789   // Initialize the dividend
1790   memset(U, 0, (m+n+1)*sizeof(unsigned));
1791   for (unsigned i = 0; i < lhsWords; ++i) {
1792     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1793     U[i * 2] = (unsigned)(tmp & mask);
1794     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1795   }
1796   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1797
1798   // Initialize the divisor
1799   memset(V, 0, (n)*sizeof(unsigned));
1800   for (unsigned i = 0; i < rhsWords; ++i) {
1801     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1802     V[i * 2] = (unsigned)(tmp & mask);
1803     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1804   }
1805
1806   // initialize the quotient and remainder
1807   memset(Q, 0, (m+n) * sizeof(unsigned));
1808   if (Remainder)
1809     memset(R, 0, n * sizeof(unsigned));
1810
1811   // Now, adjust m and n for the Knuth division. n is the number of words in
1812   // the divisor. m is the number of words by which the dividend exceeds the
1813   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1814   // contain any zero words or the Knuth algorithm fails.
1815   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1816     n--;
1817     m++;
1818   }
1819   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1820     m--;
1821
1822   // If we're left with only a single word for the divisor, Knuth doesn't work
1823   // so we implement the short division algorithm here. This is much simpler
1824   // and faster because we are certain that we can divide a 64-bit quantity
1825   // by a 32-bit quantity at hardware speed and short division is simply a
1826   // series of such operations. This is just like doing short division but we
1827   // are using base 2^32 instead of base 10.
1828   assert(n != 0 && "Divide by zero?");
1829   if (n == 1) {
1830     unsigned divisor = V[0];
1831     unsigned remainder = 0;
1832     for (int i = m+n-1; i >= 0; i--) {
1833       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1834       if (partial_dividend == 0) {
1835         Q[i] = 0;
1836         remainder = 0;
1837       } else if (partial_dividend < divisor) {
1838         Q[i] = 0;
1839         remainder = (unsigned)partial_dividend;
1840       } else if (partial_dividend == divisor) {
1841         Q[i] = 1;
1842         remainder = 0;
1843       } else {
1844         Q[i] = (unsigned)(partial_dividend / divisor);
1845         remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1846       }
1847     }
1848     if (R)
1849       R[0] = remainder;
1850   } else {
1851     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1852     // case n > 1.
1853     KnuthDiv(U, V, Q, R, m, n);
1854   }
1855
1856   // If the caller wants the quotient
1857   if (Quotient) {
1858     // Set up the Quotient value's memory.
1859     if (Quotient->BitWidth != LHS.BitWidth) {
1860       if (Quotient->isSingleWord())
1861         Quotient->VAL = 0;
1862       else
1863         delete [] Quotient->pVal;
1864       Quotient->BitWidth = LHS.BitWidth;
1865       if (!Quotient->isSingleWord())
1866         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1867     } else
1868       Quotient->clear();
1869
1870     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1871     // order words.
1872     if (lhsWords == 1) {
1873       uint64_t tmp =
1874         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1875       if (Quotient->isSingleWord())
1876         Quotient->VAL = tmp;
1877       else
1878         Quotient->pVal[0] = tmp;
1879     } else {
1880       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1881       for (unsigned i = 0; i < lhsWords; ++i)
1882         Quotient->pVal[i] =
1883           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1884     }
1885   }
1886
1887   // If the caller wants the remainder
1888   if (Remainder) {
1889     // Set up the Remainder value's memory.
1890     if (Remainder->BitWidth != RHS.BitWidth) {
1891       if (Remainder->isSingleWord())
1892         Remainder->VAL = 0;
1893       else
1894         delete [] Remainder->pVal;
1895       Remainder->BitWidth = RHS.BitWidth;
1896       if (!Remainder->isSingleWord())
1897         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1898     } else
1899       Remainder->clear();
1900
1901     // The remainder is in R. Reconstitute the remainder into Remainder's low
1902     // order words.
1903     if (rhsWords == 1) {
1904       uint64_t tmp =
1905         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1906       if (Remainder->isSingleWord())
1907         Remainder->VAL = tmp;
1908       else
1909         Remainder->pVal[0] = tmp;
1910     } else {
1911       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1912       for (unsigned i = 0; i < rhsWords; ++i)
1913         Remainder->pVal[i] =
1914           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1915     }
1916   }
1917
1918   // Clean up the memory we allocated.
1919   if (U != &SPACE[0]) {
1920     delete [] U;
1921     delete [] V;
1922     delete [] Q;
1923     delete [] R;
1924   }
1925 }
1926
1927 APInt APInt::udiv(const APInt& RHS) const {
1928   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1929
1930   // First, deal with the easy case
1931   if (isSingleWord()) {
1932     assert(RHS.VAL != 0 && "Divide by zero?");
1933     return APInt(BitWidth, VAL / RHS.VAL);
1934   }
1935
1936   // Get some facts about the LHS and RHS number of bits and words
1937   unsigned rhsBits = RHS.getActiveBits();
1938   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1939   assert(rhsWords && "Divided by zero???");
1940   unsigned lhsBits = this->getActiveBits();
1941   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1942
1943   // Deal with some degenerate cases
1944   if (!lhsWords)
1945     // 0 / X ===> 0
1946     return APInt(BitWidth, 0);
1947   else if (lhsWords < rhsWords || this->ult(RHS)) {
1948     // X / Y ===> 0, iff X < Y
1949     return APInt(BitWidth, 0);
1950   } else if (*this == RHS) {
1951     // X / X ===> 1
1952     return APInt(BitWidth, 1);
1953   } else if (lhsWords == 1 && rhsWords == 1) {
1954     // All high words are zero, just use native divide
1955     return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1956   }
1957
1958   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1959   APInt Quotient(1,0); // to hold result.
1960   divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1961   return Quotient;
1962 }
1963
1964 APInt APInt::urem(const APInt& RHS) const {
1965   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1966   if (isSingleWord()) {
1967     assert(RHS.VAL != 0 && "Remainder by zero?");
1968     return APInt(BitWidth, VAL % RHS.VAL);
1969   }
1970
1971   // Get some facts about the LHS
1972   unsigned lhsBits = getActiveBits();
1973   unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1974
1975   // Get some facts about the RHS
1976   unsigned rhsBits = RHS.getActiveBits();
1977   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1978   assert(rhsWords && "Performing remainder operation by zero ???");
1979
1980   // Check the degenerate cases
1981   if (lhsWords == 0) {
1982     // 0 % Y ===> 0
1983     return APInt(BitWidth, 0);
1984   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1985     // X % Y ===> X, iff X < Y
1986     return *this;
1987   } else if (*this == RHS) {
1988     // X % X == 0;
1989     return APInt(BitWidth, 0);
1990   } else if (lhsWords == 1) {
1991     // All high words are zero, just use native remainder
1992     return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1993   }
1994
1995   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1996   APInt Remainder(1,0);
1997   divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1998   return Remainder;
1999 }
2000
2001 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
2002                     APInt &Quotient, APInt &Remainder) {
2003   // Get some size facts about the dividend and divisor
2004   unsigned lhsBits  = LHS.getActiveBits();
2005   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
2006   unsigned rhsBits  = RHS.getActiveBits();
2007   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
2008
2009   // Check the degenerate cases
2010   if (lhsWords == 0) {
2011     Quotient = 0;                // 0 / Y ===> 0
2012     Remainder = 0;               // 0 % Y ===> 0
2013     return;
2014   }
2015
2016   if (lhsWords < rhsWords || LHS.ult(RHS)) {
2017     Quotient = 0;               // X / Y ===> 0, iff X < Y
2018     Remainder = LHS;            // X % Y ===> X, iff X < Y
2019     return;
2020   }
2021
2022   if (LHS == RHS) {
2023     Quotient  = 1;              // X / X ===> 1
2024     Remainder = 0;              // X % X ===> 0;
2025     return;
2026   }
2027
2028   if (lhsWords == 1 && rhsWords == 1) {
2029     // There is only one word to consider so use the native versions.
2030     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
2031     uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
2032     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
2033     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
2034     return;
2035   }
2036
2037   // Okay, lets do it the long way
2038   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2039 }
2040
2041 void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
2042   // Check our assumptions here
2043   assert(!str.empty() && "Invalid string length");
2044   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2045          "Radix should be 2, 8, 10, or 16!");
2046
2047   StringRef::iterator p = str.begin();
2048   size_t slen = str.size();
2049   bool isNeg = *p == '-';
2050   if (*p == '-' || *p == '+') {
2051     p++;
2052     slen--;
2053     assert(slen && "String is only a sign, needs a value.");
2054   }
2055   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2056   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2057   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2058   assert((((slen-1)*64)/22 <= numbits || radix != 10)
2059          && "Insufficient bit width");
2060
2061   // Allocate memory
2062   if (!isSingleWord())
2063     pVal = getClearedMemory(getNumWords());
2064
2065   // Figure out if we can shift instead of multiply
2066   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2067
2068   // Set up an APInt for the digit to add outside the loop so we don't
2069   // constantly construct/destruct it.
2070   APInt apdigit(getBitWidth(), 0);
2071   APInt apradix(getBitWidth(), radix);
2072
2073   // Enter digit traversal loop
2074   for (StringRef::iterator e = str.end(); p != e; ++p) {
2075     unsigned digit = getDigit(*p, radix);
2076     assert(digit < radix && "Invalid character in digit string");
2077
2078     // Shift or multiply the value by the radix
2079     if (slen > 1) {
2080       if (shift)
2081         *this <<= shift;
2082       else
2083         *this *= apradix;
2084     }
2085
2086     // Add in the digit we just interpreted
2087     if (apdigit.isSingleWord())
2088       apdigit.VAL = digit;
2089     else
2090       apdigit.pVal[0] = digit;
2091     *this += apdigit;
2092   }
2093   // If its negative, put it in two's complement form
2094   if (isNeg) {
2095     (*this)--;
2096     this->flip();
2097   }
2098 }
2099
2100 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2101                      bool Signed) const {
2102   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2103          "Radix should be 2, 8, 10, or 16!");
2104
2105   // First, check for a zero value and just short circuit the logic below.
2106   if (*this == 0) {
2107     Str.push_back('0');
2108     return;
2109   }
2110
2111   static const char Digits[] = "0123456789ABCDEF";
2112
2113   if (isSingleWord()) {
2114     char Buffer[65];
2115     char *BufPtr = Buffer+65;
2116
2117     uint64_t N;
2118     if (Signed) {
2119       int64_t I = getSExtValue();
2120       if (I < 0) {
2121         Str.push_back('-');
2122         I = -I;
2123       }
2124       N = I;
2125     } else {
2126       N = getZExtValue();
2127     }
2128
2129     while (N) {
2130       *--BufPtr = Digits[N % Radix];
2131       N /= Radix;
2132     }
2133     Str.append(BufPtr, Buffer+65);
2134     return;
2135   }
2136
2137   APInt Tmp(*this);
2138
2139   if (Signed && isNegative()) {
2140     // They want to print the signed version and it is a negative value
2141     // Flip the bits and add one to turn it into the equivalent positive
2142     // value and put a '-' in the result.
2143     Tmp.flip();
2144     Tmp++;
2145     Str.push_back('-');
2146   }
2147
2148   // We insert the digits backward, then reverse them to get the right order.
2149   unsigned StartDig = Str.size();
2150
2151   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2152   // because the number of bits per digit (1, 3 and 4 respectively) divides
2153   // equaly.  We just shift until the value is zero.
2154   if (Radix != 10) {
2155     // Just shift tmp right for each digit width until it becomes zero
2156     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2157     unsigned MaskAmt = Radix - 1;
2158
2159     while (Tmp != 0) {
2160       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2161       Str.push_back(Digits[Digit]);
2162       Tmp = Tmp.lshr(ShiftAmt);
2163     }
2164   } else {
2165     APInt divisor(4, 10);
2166     while (Tmp != 0) {
2167       APInt APdigit(1, 0);
2168       APInt tmp2(Tmp.getBitWidth(), 0);
2169       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2170              &APdigit);
2171       unsigned Digit = (unsigned)APdigit.getZExtValue();
2172       assert(Digit < Radix && "divide failed");
2173       Str.push_back(Digits[Digit]);
2174       Tmp = tmp2;
2175     }
2176   }
2177
2178   // Reverse the digits before returning.
2179   std::reverse(Str.begin()+StartDig, Str.end());
2180 }
2181
2182 /// toString - This returns the APInt as a std::string.  Note that this is an
2183 /// inefficient method.  It is better to pass in a SmallVector/SmallString
2184 /// to the methods above.
2185 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2186   SmallString<40> S;
2187   toString(S, Radix, Signed);
2188   return S.str();
2189 }
2190
2191
2192 void APInt::dump() const {
2193   SmallString<40> S, U;
2194   this->toStringUnsigned(U);
2195   this->toStringSigned(S);
2196   errs() << "APInt(" << BitWidth << "b, "
2197          << U.str() << "u " << S.str() << "s)";
2198 }
2199
2200 void APInt::print(raw_ostream &OS, bool isSigned) const {
2201   SmallString<40> S;
2202   this->toString(S, 10, isSigned);
2203   OS << S.str();
2204 }
2205
2206 // This implements a variety of operations on a representation of
2207 // arbitrary precision, two's-complement, bignum integer values.
2208
2209 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2210 // and unrestricting assumption.
2211 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2212 COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2213
2214 /* Some handy functions local to this file.  */
2215 namespace {
2216
2217   /* Returns the integer part with the least significant BITS set.
2218      BITS cannot be zero.  */
2219   static inline integerPart
2220   lowBitMask(unsigned int bits)
2221   {
2222     assert (bits != 0 && bits <= integerPartWidth);
2223
2224     return ~(integerPart) 0 >> (integerPartWidth - bits);
2225   }
2226
2227   /* Returns the value of the lower half of PART.  */
2228   static inline integerPart
2229   lowHalf(integerPart part)
2230   {
2231     return part & lowBitMask(integerPartWidth / 2);
2232   }
2233
2234   /* Returns the value of the upper half of PART.  */
2235   static inline integerPart
2236   highHalf(integerPart part)
2237   {
2238     return part >> (integerPartWidth / 2);
2239   }
2240
2241   /* Returns the bit number of the most significant set bit of a part.
2242      If the input number has no bits set -1U is returned.  */
2243   static unsigned int
2244   partMSB(integerPart value)
2245   {
2246     unsigned int n, msb;
2247
2248     if (value == 0)
2249       return -1U;
2250
2251     n = integerPartWidth / 2;
2252
2253     msb = 0;
2254     do {
2255       if (value >> n) {
2256         value >>= n;
2257         msb += n;
2258       }
2259
2260       n >>= 1;
2261     } while (n);
2262
2263     return msb;
2264   }
2265
2266   /* Returns the bit number of the least significant set bit of a
2267      part.  If the input number has no bits set -1U is returned.  */
2268   static unsigned int
2269   partLSB(integerPart value)
2270   {
2271     unsigned int n, lsb;
2272
2273     if (value == 0)
2274       return -1U;
2275
2276     lsb = integerPartWidth - 1;
2277     n = integerPartWidth / 2;
2278
2279     do {
2280       if (value << n) {
2281         value <<= n;
2282         lsb -= n;
2283       }
2284
2285       n >>= 1;
2286     } while (n);
2287
2288     return lsb;
2289   }
2290 }
2291
2292 /* Sets the least significant part of a bignum to the input value, and
2293    zeroes out higher parts.  */
2294 void
2295 APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2296 {
2297   unsigned int i;
2298
2299   assert (parts > 0);
2300
2301   dst[0] = part;
2302   for(i = 1; i < parts; i++)
2303     dst[i] = 0;
2304 }
2305
2306 /* Assign one bignum to another.  */
2307 void
2308 APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2309 {
2310   unsigned int i;
2311
2312   for(i = 0; i < parts; i++)
2313     dst[i] = src[i];
2314 }
2315
2316 /* Returns true if a bignum is zero, false otherwise.  */
2317 bool
2318 APInt::tcIsZero(const integerPart *src, unsigned int parts)
2319 {
2320   unsigned int i;
2321
2322   for(i = 0; i < parts; i++)
2323     if (src[i])
2324       return false;
2325
2326   return true;
2327 }
2328
2329 /* Extract the given bit of a bignum; returns 0 or 1.  */
2330 int
2331 APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2332 {
2333   return(parts[bit / integerPartWidth]
2334          & ((integerPart) 1 << bit % integerPartWidth)) != 0;
2335 }
2336
2337 /* Set the given bit of a bignum.  */
2338 void
2339 APInt::tcSetBit(integerPart *parts, unsigned int bit)
2340 {
2341   parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2342 }
2343
2344 /* Returns the bit number of the least significant set bit of a
2345    number.  If the input number has no bits set -1U is returned.  */
2346 unsigned int
2347 APInt::tcLSB(const integerPart *parts, unsigned int n)
2348 {
2349   unsigned int i, lsb;
2350
2351   for(i = 0; i < n; i++) {
2352       if (parts[i] != 0) {
2353           lsb = partLSB(parts[i]);
2354
2355           return lsb + i * integerPartWidth;
2356       }
2357   }
2358
2359   return -1U;
2360 }
2361
2362 /* Returns the bit number of the most significant set bit of a number.
2363    If the input number has no bits set -1U is returned.  */
2364 unsigned int
2365 APInt::tcMSB(const integerPart *parts, unsigned int n)
2366 {
2367   unsigned int msb;
2368
2369   do {
2370       --n;
2371
2372       if (parts[n] != 0) {
2373           msb = partMSB(parts[n]);
2374
2375           return msb + n * integerPartWidth;
2376       }
2377   } while (n);
2378
2379   return -1U;
2380 }
2381
2382 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2383    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2384    the least significant bit of DST.  All high bits above srcBITS in
2385    DST are zero-filled.  */
2386 void
2387 APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2388                  unsigned int srcBits, unsigned int srcLSB)
2389 {
2390   unsigned int firstSrcPart, dstParts, shift, n;
2391
2392   dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2393   assert (dstParts <= dstCount);
2394
2395   firstSrcPart = srcLSB / integerPartWidth;
2396   tcAssign (dst, src + firstSrcPart, dstParts);
2397
2398   shift = srcLSB % integerPartWidth;
2399   tcShiftRight (dst, dstParts, shift);
2400
2401   /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2402      in DST.  If this is less that srcBits, append the rest, else
2403      clear the high bits.  */
2404   n = dstParts * integerPartWidth - shift;
2405   if (n < srcBits) {
2406     integerPart mask = lowBitMask (srcBits - n);
2407     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2408                           << n % integerPartWidth);
2409   } else if (n > srcBits) {
2410     if (srcBits % integerPartWidth)
2411       dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2412   }
2413
2414   /* Clear high parts.  */
2415   while (dstParts < dstCount)
2416     dst[dstParts++] = 0;
2417 }
2418
2419 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2420 integerPart
2421 APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2422              integerPart c, unsigned int parts)
2423 {
2424   unsigned int i;
2425
2426   assert(c <= 1);
2427
2428   for(i = 0; i < parts; i++) {
2429     integerPart l;
2430
2431     l = dst[i];
2432     if (c) {
2433       dst[i] += rhs[i] + 1;
2434       c = (dst[i] <= l);
2435     } else {
2436       dst[i] += rhs[i];
2437       c = (dst[i] < l);
2438     }
2439   }
2440
2441   return c;
2442 }
2443
2444 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2445 integerPart
2446 APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2447                   integerPart c, unsigned int parts)
2448 {
2449   unsigned int i;
2450
2451   assert(c <= 1);
2452
2453   for(i = 0; i < parts; i++) {
2454     integerPart l;
2455
2456     l = dst[i];
2457     if (c) {
2458       dst[i] -= rhs[i] + 1;
2459       c = (dst[i] >= l);
2460     } else {
2461       dst[i] -= rhs[i];
2462       c = (dst[i] > l);
2463     }
2464   }
2465
2466   return c;
2467 }
2468
2469 /* Negate a bignum in-place.  */
2470 void
2471 APInt::tcNegate(integerPart *dst, unsigned int parts)
2472 {
2473   tcComplement(dst, parts);
2474   tcIncrement(dst, parts);
2475 }
2476
2477 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2478     DST  = SRC * MULTIPLIER + CARRY   if add is false
2479
2480     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2481     they must start at the same point, i.e. DST == SRC.
2482
2483     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2484     returned.  Otherwise DST is filled with the least significant
2485     DSTPARTS parts of the result, and if all of the omitted higher
2486     parts were zero return zero, otherwise overflow occurred and
2487     return one.  */
2488 int
2489 APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2490                       integerPart multiplier, integerPart carry,
2491                       unsigned int srcParts, unsigned int dstParts,
2492                       bool add)
2493 {
2494   unsigned int i, n;
2495
2496   /* Otherwise our writes of DST kill our later reads of SRC.  */
2497   assert(dst <= src || dst >= src + srcParts);
2498   assert(dstParts <= srcParts + 1);
2499
2500   /* N loops; minimum of dstParts and srcParts.  */
2501   n = dstParts < srcParts ? dstParts: srcParts;
2502
2503   for(i = 0; i < n; i++) {
2504     integerPart low, mid, high, srcPart;
2505
2506       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2507
2508          This cannot overflow, because
2509
2510          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2511
2512          which is less than n^2.  */
2513
2514     srcPart = src[i];
2515
2516     if (multiplier == 0 || srcPart == 0)        {
2517       low = carry;
2518       high = 0;
2519     } else {
2520       low = lowHalf(srcPart) * lowHalf(multiplier);
2521       high = highHalf(srcPart) * highHalf(multiplier);
2522
2523       mid = lowHalf(srcPart) * highHalf(multiplier);
2524       high += highHalf(mid);
2525       mid <<= integerPartWidth / 2;
2526       if (low + mid < low)
2527         high++;
2528       low += mid;
2529
2530       mid = highHalf(srcPart) * lowHalf(multiplier);
2531       high += highHalf(mid);
2532       mid <<= integerPartWidth / 2;
2533       if (low + mid < low)
2534         high++;
2535       low += mid;
2536
2537       /* Now add carry.  */
2538       if (low + carry < low)
2539         high++;
2540       low += carry;
2541     }
2542
2543     if (add) {
2544       /* And now DST[i], and store the new low part there.  */
2545       if (low + dst[i] < low)
2546         high++;
2547       dst[i] += low;
2548     } else
2549       dst[i] = low;
2550
2551     carry = high;
2552   }
2553
2554   if (i < dstParts) {
2555     /* Full multiplication, there is no overflow.  */
2556     assert(i + 1 == dstParts);
2557     dst[i] = carry;
2558     return 0;
2559   } else {
2560     /* We overflowed if there is carry.  */
2561     if (carry)
2562       return 1;
2563
2564     /* We would overflow if any significant unwritten parts would be
2565        non-zero.  This is true if any remaining src parts are non-zero
2566        and the multiplier is non-zero.  */
2567     if (multiplier)
2568       for(; i < srcParts; i++)
2569         if (src[i])
2570           return 1;
2571
2572     /* We fitted in the narrow destination.  */
2573     return 0;
2574   }
2575 }
2576
2577 /* DST = LHS * RHS, where DST has the same width as the operands and
2578    is filled with the least significant parts of the result.  Returns
2579    one if overflow occurred, otherwise zero.  DST must be disjoint
2580    from both operands.  */
2581 int
2582 APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2583                   const integerPart *rhs, unsigned int parts)
2584 {
2585   unsigned int i;
2586   int overflow;
2587
2588   assert(dst != lhs && dst != rhs);
2589
2590   overflow = 0;
2591   tcSet(dst, 0, parts);
2592
2593   for(i = 0; i < parts; i++)
2594     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2595                                parts - i, true);
2596
2597   return overflow;
2598 }
2599
2600 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2601    operands.  No overflow occurs.  DST must be disjoint from both
2602    operands.  Returns the number of parts required to hold the
2603    result.  */
2604 unsigned int
2605 APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2606                       const integerPart *rhs, unsigned int lhsParts,
2607                       unsigned int rhsParts)
2608 {
2609   /* Put the narrower number on the LHS for less loops below.  */
2610   if (lhsParts > rhsParts) {
2611     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2612   } else {
2613     unsigned int n;
2614
2615     assert(dst != lhs && dst != rhs);
2616
2617     tcSet(dst, 0, rhsParts);
2618
2619     for(n = 0; n < lhsParts; n++)
2620       tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2621
2622     n = lhsParts + rhsParts;
2623
2624     return n - (dst[n - 1] == 0);
2625   }
2626 }
2627
2628 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2629    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2630    set REMAINDER to the remainder, return zero.  i.e.
2631
2632    OLD_LHS = RHS * LHS + REMAINDER
2633
2634    SCRATCH is a bignum of the same size as the operands and result for
2635    use by the routine; its contents need not be initialized and are
2636    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2637 */
2638 int
2639 APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2640                 integerPart *remainder, integerPart *srhs,
2641                 unsigned int parts)
2642 {
2643   unsigned int n, shiftCount;
2644   integerPart mask;
2645
2646   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2647
2648   shiftCount = tcMSB(rhs, parts) + 1;
2649   if (shiftCount == 0)
2650     return true;
2651
2652   shiftCount = parts * integerPartWidth - shiftCount;
2653   n = shiftCount / integerPartWidth;
2654   mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2655
2656   tcAssign(srhs, rhs, parts);
2657   tcShiftLeft(srhs, parts, shiftCount);
2658   tcAssign(remainder, lhs, parts);
2659   tcSet(lhs, 0, parts);
2660
2661   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2662      the total.  */
2663   for(;;) {
2664       int compare;
2665
2666       compare = tcCompare(remainder, srhs, parts);
2667       if (compare >= 0) {
2668         tcSubtract(remainder, srhs, 0, parts);
2669         lhs[n] |= mask;
2670       }
2671
2672       if (shiftCount == 0)
2673         break;
2674       shiftCount--;
2675       tcShiftRight(srhs, parts, 1);
2676       if ((mask >>= 1) == 0)
2677         mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2678   }
2679
2680   return false;
2681 }
2682
2683 /* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2684    There are no restrictions on COUNT.  */
2685 void
2686 APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2687 {
2688   if (count) {
2689     unsigned int jump, shift;
2690
2691     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2692     jump = count / integerPartWidth;
2693     shift = count % integerPartWidth;
2694
2695     while (parts > jump) {
2696       integerPart part;
2697
2698       parts--;
2699
2700       /* dst[i] comes from the two parts src[i - jump] and, if we have
2701          an intra-part shift, src[i - jump - 1].  */
2702       part = dst[parts - jump];
2703       if (shift) {
2704         part <<= shift;
2705         if (parts >= jump + 1)
2706           part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2707       }
2708
2709       dst[parts] = part;
2710     }
2711
2712     while (parts > 0)
2713       dst[--parts] = 0;
2714   }
2715 }
2716
2717 /* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2718    zero.  There are no restrictions on COUNT.  */
2719 void
2720 APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2721 {
2722   if (count) {
2723     unsigned int i, jump, shift;
2724
2725     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2726     jump = count / integerPartWidth;
2727     shift = count % integerPartWidth;
2728
2729     /* Perform the shift.  This leaves the most significant COUNT bits
2730        of the result at zero.  */
2731     for(i = 0; i < parts; i++) {
2732       integerPart part;
2733
2734       if (i + jump >= parts) {
2735         part = 0;
2736       } else {
2737         part = dst[i + jump];
2738         if (shift) {
2739           part >>= shift;
2740           if (i + jump + 1 < parts)
2741             part |= dst[i + jump + 1] << (integerPartWidth - shift);
2742         }
2743       }
2744
2745       dst[i] = part;
2746     }
2747   }
2748 }
2749
2750 /* Bitwise and of two bignums.  */
2751 void
2752 APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2753 {
2754   unsigned int i;
2755
2756   for(i = 0; i < parts; i++)
2757     dst[i] &= rhs[i];
2758 }
2759
2760 /* Bitwise inclusive or of two bignums.  */
2761 void
2762 APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2763 {
2764   unsigned int i;
2765
2766   for(i = 0; i < parts; i++)
2767     dst[i] |= rhs[i];
2768 }
2769
2770 /* Bitwise exclusive or of two bignums.  */
2771 void
2772 APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2773 {
2774   unsigned int i;
2775
2776   for(i = 0; i < parts; i++)
2777     dst[i] ^= rhs[i];
2778 }
2779
2780 /* Complement a bignum in-place.  */
2781 void
2782 APInt::tcComplement(integerPart *dst, unsigned int parts)
2783 {
2784   unsigned int i;
2785
2786   for(i = 0; i < parts; i++)
2787     dst[i] = ~dst[i];
2788 }
2789
2790 /* Comparison (unsigned) of two bignums.  */
2791 int
2792 APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2793                  unsigned int parts)
2794 {
2795   while (parts) {
2796       parts--;
2797       if (lhs[parts] == rhs[parts])
2798         continue;
2799
2800       if (lhs[parts] > rhs[parts])
2801         return 1;
2802       else
2803         return -1;
2804     }
2805
2806   return 0;
2807 }
2808
2809 /* Increment a bignum in-place, return the carry flag.  */
2810 integerPart
2811 APInt::tcIncrement(integerPart *dst, unsigned int parts)
2812 {
2813   unsigned int i;
2814
2815   for(i = 0; i < parts; i++)
2816     if (++dst[i] != 0)
2817       break;
2818
2819   return i == parts;
2820 }
2821
2822 /* Set the least significant BITS bits of a bignum, clear the
2823    rest.  */
2824 void
2825 APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2826                                  unsigned int bits)
2827 {
2828   unsigned int i;
2829
2830   i = 0;
2831   while (bits > integerPartWidth) {
2832     dst[i++] = ~(integerPart) 0;
2833     bits -= integerPartWidth;
2834   }
2835
2836   if (bits)
2837     dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2838
2839   while (i < parts)
2840     dst[i++] = 0;
2841 }