lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision integer
  11 // constant values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "apint"
  16 #include "llvm/ADT/APInt.h"
  17 #include "llvm/ADT/StringRef.h"
  18 #include "llvm/ADT/FoldingSet.h"
  19 #include "llvm/ADT/SmallString.h"
  20 #include "llvm/Support/Debug.h"
  21 #include "llvm/Support/ErrorHandling.h"
  22 #include "llvm/Support/MathExtras.h"
  23 #include "llvm/Support/raw_ostream.h"
  24 #include <cmath>
  25 #include <limits>
  26 #include <cstring>
  27 #include <cstdlib>
  28 using namespace llvm;
  29
  30 /// A utility function for allocating memory, checking for allocation failures,
  31 /// and ensuring the contents are zeroed.
  32 inline static uint64_t* getClearedMemory(unsigned numWords) {
  33   uint64_t * result = new uint64_t[numWords];
  34   assert(result && "APInt memory allocation fails!");
  35   memset(result, 0, numWords * sizeof(uint64_t));
  36   return result;
  37 }
  38
  39 /// A utility function for allocating memory and checking for allocation
  40 /// failure.  The content is not zeroed.
  41 inline static uint64_t* getMemory(unsigned numWords) {
  42   uint64_t * result = new uint64_t[numWords];
  43   assert(result && "APInt memory allocation fails!");
  44   return result;
  45 }
  46
  47 /// A utility function that converts a character to a digit.
  48 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  49   unsigned r;
  50
  51   if (radix == 16) {
  52     r = cdigit - '0';
  53     if (r <= 9)
  54       return r;
  55
  56     r = cdigit - 'A';
  57     if (r <= 5)
  58       return r + 10;
  59
  60     r = cdigit - 'a';
  61     if (r <= 5)
  62       return r + 10;
  63   }
  64
  65   r = cdigit - '0';
  66   if (r < radix)
  67     return r;
  68
  69   return -1U;
  70 }
  71
  72
  73 void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
  74   pVal = getClearedMemory(getNumWords());
  75   pVal[0] = val;
  76   if (isSigned && int64_t(val) < 0)
  77     for (unsigned i = 1; i < getNumWords(); ++i)
  78       pVal[i] = -1ULL;
  79 }
  80
  81 void APInt::initSlowCase(const APInt& that) {
  82   pVal = getMemory(getNumWords());
  83   memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
  84 }
  85
  86
  87 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
  88   : BitWidth(numBits), VAL(0) {
  89   assert(BitWidth && "Bitwidth too small");
  90   assert(bigVal && "Null pointer detected!");
  91   if (isSingleWord())
  92     VAL = bigVal[0];
  93   else {
  94     // Get memory, cleared to 0
  95     pVal = getClearedMemory(getNumWords());
  96     // Calculate the number of words to copy
  97     unsigned words = std::min<unsigned>(numWords, getNumWords());
  98     // Copy the words from bigVal to pVal
  99     memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
 100   }
 101   // Make sure unused high bits are cleared
 102   clearUnusedBits();
 103 }
 104
 105 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
 106   : BitWidth(numbits), VAL(0) {
 107   assert(BitWidth && "Bitwidth too small");
 108   fromString(numbits, Str, radix);
 109 }
 110
 111 APInt& APInt::AssignSlowCase(const APInt& RHS) {
 112   // Don't do anything for X = X
 113   if (this == &RHS)
 114     return *this;
 115
 116   if (BitWidth == RHS.getBitWidth()) {
 117     // assume same bit-width single-word case is already handled
 118     assert(!isSingleWord());
 119     memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
 120     return *this;
 121   }
 122
 123   if (isSingleWord()) {
 124     // assume case where both are single words is already handled
 125     assert(!RHS.isSingleWord());
 126     VAL = 0;
 127     pVal = getMemory(RHS.getNumWords());
 128     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 129   } else if (getNumWords() == RHS.getNumWords())
 130     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 131   else if (RHS.isSingleWord()) {
 132     delete [] pVal;
 133     VAL = RHS.VAL;
 134   } else {
 135     delete [] pVal;
 136     pVal = getMemory(RHS.getNumWords());
 137     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 138   }
 139   BitWidth = RHS.BitWidth;
 140   return clearUnusedBits();
 141 }
 142
 143 APInt& APInt::operator=(uint64_t RHS) {
 144   if (isSingleWord())
 145     VAL = RHS;
 146   else {
 147     pVal[0] = RHS;
 148     memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
 149   }
 150   return clearUnusedBits();
 151 }
 152
 153 /// Profile - This method 'profiles' an APInt for use with FoldingSet.
 154 void APInt::Profile(FoldingSetNodeID& ID) const {
 155   ID.AddInteger(BitWidth);
 156
 157   if (isSingleWord()) {
 158     ID.AddInteger(VAL);
 159     return;
 160   }
 161
 162   unsigned NumWords = getNumWords();
 163   for (unsigned i = 0; i < NumWords; ++i)
 164     ID.AddInteger(pVal[i]);
 165 }
 166
 167 /// add_1 - This function adds a single "digit" integer, y, to the multiple
 168 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
 169 /// 1 is returned if there is a carry out, otherwise 0 is returned.
 170 /// @returns the carry of the addition.
 171 static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 172   for (unsigned i = 0; i < len; ++i) {
 173     dest[i] = y + x[i];
 174     if (dest[i] < y)
 175       y = 1; // Carry one to next digit.
 176     else {
 177       y = 0; // No need to carry so exit early
 178       break;
 179     }
 180   }
 181   return y;
 182 }
 183
 184 /// @brief Prefix increment operator. Increments the APInt by one.
 185 APInt& APInt::operator++() {
 186   if (isSingleWord())
 187     ++VAL;
 188   else
 189     add_1(pVal, pVal, getNumWords(), 1);
 190   return clearUnusedBits();
 191 }
 192
 193 /// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
 194 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
 195 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
 196 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
 197 /// In other words, if y > x then this function returns 1, otherwise 0.
 198 /// @returns the borrow out of the subtraction
 199 static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
 200   for (unsigned i = 0; i < len; ++i) {
 201     uint64_t X = x[i];
 202     x[i] -= y;
 203     if (y > X)
 204       y = 1;  // We have to "borrow 1" from next "digit"
 205     else {
 206       y = 0;  // No need to borrow
 207       break;  // Remaining digits are unchanged so exit early
 208     }
 209   }
 210   return bool(y);
 211 }
 212
 213 /// @brief Prefix decrement operator. Decrements the APInt by one.
 214 APInt& APInt::operator--() {
 215   if (isSingleWord())
 216     --VAL;
 217   else
 218     sub_1(pVal, getNumWords(), 1);
 219   return clearUnusedBits();
 220 }
 221
 222 /// add - This function adds the integer array x to the integer array Y and
 223 /// places the result in dest.
 224 /// @returns the carry out from the addition
 225 /// @brief General addition of 64-bit integer arrays
 226 static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 227                 unsigned len) {
 228   bool carry = false;
 229   for (unsigned i = 0; i< len; ++i) {
 230     uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
 231     dest[i] = x[i] + y[i] + carry;
 232     carry = dest[i] < limit || (carry && dest[i] == limit);
 233   }
 234   return carry;
 235 }
 236
 237 /// Adds the RHS APint to this APInt.
 238 /// @returns this, after addition of RHS.
 239 /// @brief Addition assignment operator.
 240 APInt& APInt::operator+=(const APInt& RHS) {
 241   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 242   if (isSingleWord())
 243     VAL += RHS.VAL;
 244   else {
 245     add(pVal, pVal, RHS.pVal, getNumWords());
 246   }
 247   return clearUnusedBits();
 248 }
 249
 250 /// Subtracts the integer array y from the integer array x
 251 /// @returns returns the borrow out.
 252 /// @brief Generalized subtraction of 64-bit integer arrays.
 253 static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
 254                 unsigned len) {
 255   bool borrow = false;
 256   for (unsigned i = 0; i < len; ++i) {
 257     uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
 258     borrow = y[i] > x_tmp || (borrow && x[i] == 0);
 259     dest[i] = x_tmp - y[i];
 260   }
 261   return borrow;
 262 }
 263
 264 /// Subtracts the RHS APInt from this APInt
 265 /// @returns this, after subtraction
 266 /// @brief Subtraction assignment operator.
 267 APInt& APInt::operator-=(const APInt& RHS) {
 268   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 269   if (isSingleWord())
 270     VAL -= RHS.VAL;
 271   else
 272     sub(pVal, pVal, RHS.pVal, getNumWords());
 273   return clearUnusedBits();
 274 }
 275
 276 /// Multiplies an integer array, x, by a uint64_t integer and places the result
 277 /// into dest.
 278 /// @returns the carry out of the multiplication.
 279 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
 280 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 281   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
 282   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
 283   uint64_t carry = 0;
 284
 285   // For each digit of x.
 286   for (unsigned i = 0; i < len; ++i) {
 287     // Split x into high and low words
 288     uint64_t lx = x[i] & 0xffffffffULL;
 289     uint64_t hx = x[i] >> 32;
 290     // hasCarry - A flag to indicate if there is a carry to the next digit.
 291     // hasCarry == 0, no carry
 292     // hasCarry == 1, has carry
 293     // hasCarry == 2, no carry and the calculation result == 0.
 294     uint8_t hasCarry = 0;
 295     dest[i] = carry + lx * ly;
 296     // Determine if the add above introduces carry.
 297     hasCarry = (dest[i] < carry) ? 1 : 0;
 298     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
 299     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
 300     // (2^32 - 1) + 2^32 = 2^64.
 301     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 302
 303     carry += (lx * hy) & 0xffffffffULL;
 304     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
 305     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
 306             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
 307   }
 308   return carry;
 309 }
 310
 311 /// Multiplies integer array x by integer array y and stores the result into
 312 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
 313 /// @brief Generalized multiplicate of integer arrays.
 314 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
 315                 unsigned ylen) {
 316   dest[xlen] = mul_1(dest, x, xlen, y[0]);
 317   for (unsigned i = 1; i < ylen; ++i) {
 318     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
 319     uint64_t carry = 0, lx = 0, hx = 0;
 320     for (unsigned j = 0; j < xlen; ++j) {
 321       lx = x[j] & 0xffffffffULL;
 322       hx = x[j] >> 32;
 323       // hasCarry - A flag to indicate if has carry.
 324       // hasCarry == 0, no carry
 325       // hasCarry == 1, has carry
 326       // hasCarry == 2, no carry and the calculation result == 0.
 327       uint8_t hasCarry = 0;
 328       uint64_t resul = carry + lx * ly;
 329       hasCarry = (resul < carry) ? 1 : 0;
 330       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
 331       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 332
 333       carry += (lx * hy) & 0xffffffffULL;
 334       resul = (carry << 32) | (resul & 0xffffffffULL);
 335       dest[i+j] += resul;
 336       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
 337               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
 338               ((lx * hy) >> 32) + hx * hy;
 339     }
 340     dest[i+xlen] = carry;
 341   }
 342 }
 343
 344 APInt& APInt::operator*=(const APInt& RHS) {
 345   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 346   if (isSingleWord()) {
 347     VAL *= RHS.VAL;
 348     clearUnusedBits();
 349     return *this;
 350   }
 351
 352   // Get some bit facts about LHS and check for zero
 353   unsigned lhsBits = getActiveBits();
 354   unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
 355   if (!lhsWords)
 356     // 0 * X ===> 0
 357     return *this;
 358
 359   // Get some bit facts about RHS and check for zero
 360   unsigned rhsBits = RHS.getActiveBits();
 361   unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
 362   if (!rhsWords) {
 363     // X * 0 ===> 0
 364     clearAllBits();
 365     return *this;
 366   }
 367
 368   // Allocate space for the result
 369   unsigned destWords = rhsWords + lhsWords;
 370   uint64_t *dest = getMemory(destWords);
 371
 372   // Perform the long multiply
 373   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
 374
 375   // Copy result back into *this
 376   clearAllBits();
 377   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
 378   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
 379
 380   // delete dest array and return
 381   delete[] dest;
 382   return *this;
 383 }
 384
 385 APInt& APInt::operator&=(const APInt& RHS) {
 386   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 387   if (isSingleWord()) {
 388     VAL &= RHS.VAL;
 389     return *this;
 390   }
 391   unsigned numWords = getNumWords();
 392   for (unsigned i = 0; i < numWords; ++i)
 393     pVal[i] &= RHS.pVal[i];
 394   return *this;
 395 }
 396
 397 APInt& APInt::operator|=(const APInt& RHS) {
 398   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 399   if (isSingleWord()) {
 400     VAL |= RHS.VAL;
 401     return *this;
 402   }
 403   unsigned numWords = getNumWords();
 404   for (unsigned i = 0; i < numWords; ++i)
 405     pVal[i] |= RHS.pVal[i];
 406   return *this;
 407 }
 408
 409 APInt& APInt::operator^=(const APInt& RHS) {
 410   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 411   if (isSingleWord()) {
 412     VAL ^= RHS.VAL;
 413     this->clearUnusedBits();
 414     return *this;
 415   }
 416   unsigned numWords = getNumWords();
 417   for (unsigned i = 0; i < numWords; ++i)
 418     pVal[i] ^= RHS.pVal[i];
 419   return clearUnusedBits();
 420 }
 421
 422 APInt APInt::AndSlowCase(const APInt& RHS) const {
 423   unsigned numWords = getNumWords();
 424   uint64_t* val = getMemory(numWords);
 425   for (unsigned i = 0; i < numWords; ++i)
 426     val[i] = pVal[i] & RHS.pVal[i];
 427   return APInt(val, getBitWidth());
 428 }
 429
 430 APInt APInt::OrSlowCase(const APInt& RHS) const {
 431   unsigned numWords = getNumWords();
 432   uint64_t *val = getMemory(numWords);
 433   for (unsigned i = 0; i < numWords; ++i)
 434     val[i] = pVal[i] | RHS.pVal[i];
 435   return APInt(val, getBitWidth());
 436 }
 437
 438 APInt APInt::XorSlowCase(const APInt& RHS) const {
 439   unsigned numWords = getNumWords();
 440   uint64_t *val = getMemory(numWords);
 441   for (unsigned i = 0; i < numWords; ++i)
 442     val[i] = pVal[i] ^ RHS.pVal[i];
 443
 444   // 0^0==1 so clear the high bits in case they got set.
 445   return APInt(val, getBitWidth()).clearUnusedBits();
 446 }
 447
 448 bool APInt::operator !() const {
 449   if (isSingleWord())
 450     return !VAL;
 451
 452   for (unsigned i = 0; i < getNumWords(); ++i)
 453     if (pVal[i])
 454       return false;
 455   return true;
 456 }
 457
 458 APInt APInt::operator*(const APInt& RHS) const {
 459   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 460   if (isSingleWord())
 461     return APInt(BitWidth, VAL * RHS.VAL);
 462   APInt Result(*this);
 463   Result *= RHS;
 464   return Result.clearUnusedBits();
 465 }
 466
 467 APInt APInt::operator+(const APInt& RHS) const {
 468   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 469   if (isSingleWord())
 470     return APInt(BitWidth, VAL + RHS.VAL);
 471   APInt Result(BitWidth, 0);
 472   add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 473   return Result.clearUnusedBits();
 474 }
 475
 476 APInt APInt::operator-(const APInt& RHS) const {
 477   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 478   if (isSingleWord())
 479     return APInt(BitWidth, VAL - RHS.VAL);
 480   APInt Result(BitWidth, 0);
 481   sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
 482   return Result.clearUnusedBits();
 483 }
 484
 485 bool APInt::operator[](unsigned bitPosition) const {
 486   assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
 487   return (maskBit(bitPosition) &
 488           (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
 489 }
 490
 491 bool APInt::EqualSlowCase(const APInt& RHS) const {
 492   // Get some facts about the number of bits used in the two operands.
 493   unsigned n1 = getActiveBits();
 494   unsigned n2 = RHS.getActiveBits();
 495
 496   // If the number of bits isn't the same, they aren't equal
 497   if (n1 != n2)
 498     return false;
 499
 500   // If the number of bits fits in a word, we only need to compare the low word.
 501   if (n1 <= APINT_BITS_PER_WORD)
 502     return pVal[0] == RHS.pVal[0];
 503
 504   // Otherwise, compare everything
 505   for (int i = whichWord(n1 - 1); i >= 0; --i)
 506     if (pVal[i] != RHS.pVal[i])
 507       return false;
 508   return true;
 509 }
 510
 511 bool APInt::EqualSlowCase(uint64_t Val) const {
 512   unsigned n = getActiveBits();
 513   if (n <= APINT_BITS_PER_WORD)
 514     return pVal[0] == Val;
 515   else
 516     return false;
 517 }
 518
 519 bool APInt::ult(const APInt& RHS) const {
 520   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 521   if (isSingleWord())
 522     return VAL < RHS.VAL;
 523
 524   // Get active bit length of both operands
 525   unsigned n1 = getActiveBits();
 526   unsigned n2 = RHS.getActiveBits();
 527
 528   // If magnitude of LHS is less than RHS, return true.
 529   if (n1 < n2)
 530     return true;
 531
 532   // If magnitude of RHS is greather than LHS, return false.
 533   if (n2 < n1)
 534     return false;
 535
 536   // If they bot fit in a word, just compare the low order word
 537   if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
 538     return pVal[0] < RHS.pVal[0];
 539
 540   // Otherwise, compare all words
 541   unsigned topWord = whichWord(std::max(n1,n2)-1);
 542   for (int i = topWord; i >= 0; --i) {
 543     if (pVal[i] > RHS.pVal[i])
 544       return false;
 545     if (pVal[i] < RHS.pVal[i])
 546       return true;
 547   }
 548   return false;
 549 }
 550
 551 bool APInt::slt(const APInt& RHS) const {
 552   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 553   if (isSingleWord()) {
 554     int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
 555     int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
 556     return lhsSext < rhsSext;
 557   }
 558
 559   APInt lhs(*this);
 560   APInt rhs(RHS);
 561   bool lhsNeg = isNegative();
 562   bool rhsNeg = rhs.isNegative();
 563   if (lhsNeg) {
 564     // Sign bit is set so perform two's complement to make it positive
 565     lhs.flipAllBits();
 566     lhs++;
 567   }
 568   if (rhsNeg) {
 569     // Sign bit is set so perform two's complement to make it positive
 570     rhs.flipAllBits();
 571     rhs++;
 572   }
 573
 574   // Now we have unsigned values to compare so do the comparison if necessary
 575   // based on the negativeness of the values.
 576   if (lhsNeg)
 577     if (rhsNeg)
 578       return lhs.ugt(rhs);
 579     else
 580       return true;
 581   else if (rhsNeg)
 582     return false;
 583   else
 584     return lhs.ult(rhs);
 585 }
 586
 587 void APInt::setBit(unsigned bitPosition) {
 588   if (isSingleWord())
 589     VAL |= maskBit(bitPosition);
 590   else
 591     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
 592 }
 593
 594 /// Set the given bit to 0 whose position is given as "bitPosition".
 595 /// @brief Set a given bit to 0.
 596 void APInt::clearBit(unsigned bitPosition) {
 597   if (isSingleWord())
 598     VAL &= ~maskBit(bitPosition);
 599   else
 600     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
 601 }
 602
 603 /// @brief Toggle every bit to its opposite value.
 604
 605 /// Toggle a given bit to its opposite value whose position is given
 606 /// as "bitPosition".
 607 /// @brief Toggles a given bit to its opposite value.
 608 void APInt::flipBit(unsigned bitPosition) {
 609   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 610   if ((*this)[bitPosition]) clearBit(bitPosition);
 611   else setBit(bitPosition);
 612 }
 613
 614 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
 615   assert(!str.empty() && "Invalid string length");
 616   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
 617          "Radix should be 2, 8, 10, or 16!");
 618
 619   size_t slen = str.size();
 620
 621   // Each computation below needs to know if it's negative.
 622   StringRef::iterator p = str.begin();
 623   unsigned isNegative = *p == '-';
 624   if (*p == '-' || *p == '+') {
 625     p++;
 626     slen--;
 627     assert(slen && "String is only a sign, needs a value.");
 628   }
 629
 630   // For radixes of power-of-two values, the bits required is accurately and
 631   // easily computed
 632   if (radix == 2)
 633     return slen + isNegative;
 634   if (radix == 8)
 635     return slen * 3 + isNegative;
 636   if (radix == 16)
 637     return slen * 4 + isNegative;
 638
 639   // This is grossly inefficient but accurate. We could probably do something
 640   // with a computation of roughly slen*64/20 and then adjust by the value of
 641   // the first few digits. But, I'm not sure how accurate that could be.
 642
 643   // Compute a sufficient number of bits that is always large enough but might
 644   // be too large. This avoids the assertion in the constructor. This
 645   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 646   // bits in that case.
 647   unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
 648
 649   // Convert to the actual binary value.
 650   APInt tmp(sufficient, StringRef(p, slen), radix);
 651
 652   // Compute how many bits are required. If the log is infinite, assume we need
 653   // just bit.
 654   unsigned log = tmp.logBase2();
 655   if (log == (unsigned)-1) {
 656     return isNegative + 1;
 657   } else {
 658     return isNegative + log + 1;
 659   }
 660 }
 661
 662 // From http://www.burtleburtle.net, byBob Jenkins.
 663 // When targeting x86, both GCC and LLVM seem to recognize this as a
 664 // rotate instruction.
 665 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
 666
 667 // From http://www.burtleburtle.net, by Bob Jenkins.
 668 #define mix(a,b,c) \
 669   { \
 670     a -= c;  a ^= rot(c, 4);  c += b; \
 671     b -= a;  b ^= rot(a, 6);  a += c; \
 672     c -= b;  c ^= rot(b, 8);  b += a; \
 673     a -= c;  a ^= rot(c,16);  c += b; \
 674     b -= a;  b ^= rot(a,19);  a += c; \
 675     c -= b;  c ^= rot(b, 4);  b += a; \
 676   }
 677
 678 // From http://www.burtleburtle.net, by Bob Jenkins.
 679 #define final(a,b,c) \
 680   { \
 681     c ^= b; c -= rot(b,14); \
 682     a ^= c; a -= rot(c,11); \
 683     b ^= a; b -= rot(a,25); \
 684     c ^= b; c -= rot(b,16); \
 685     a ^= c; a -= rot(c,4);  \
 686     b ^= a; b -= rot(a,14); \
 687     c ^= b; c -= rot(b,24); \
 688   }
 689
 690 // hashword() was adapted from http://www.burtleburtle.net, by Bob
 691 // Jenkins.  k is a pointer to an array of uint32_t values; length is
 692 // the length of the key, in 32-bit chunks.  This version only handles
 693 // keys that are a multiple of 32 bits in size.
 694 static inline uint32_t hashword(const uint64_t *k64, size_t length)
 695 {
 696   const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
 697   uint32_t a,b,c;
 698
 699   /* Set up the internal state */
 700   a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
 701
 702   /*------------------------------------------------- handle most of the key */
 703   while (length > 3) {
 704     a += k[0];
 705     b += k[1];
 706     c += k[2];
 707     mix(a,b,c);
 708     length -= 3;
 709     k += 3;
 710   }
 711
 712   /*------------------------------------------- handle the last 3 uint32_t's */
 713   switch (length) {                  /* all the case statements fall through */
 714   case 3 : c+=k[2];
 715   case 2 : b+=k[1];
 716   case 1 : a+=k[0];
 717     final(a,b,c);
 718     case 0:     /* case 0: nothing left to add */
 719       break;
 720     }
 721   /*------------------------------------------------------ report the result */
 722   return c;
 723 }
 724
 725 // hashword8() was adapted from http://www.burtleburtle.net, by Bob
 726 // Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
 727 // targeting x86 (32 or 64 bit), both LLVM and GCC compile this
 728 // function into about 35 instructions when inlined.
 729 static inline uint32_t hashword8(const uint64_t k64)
 730 {
 731   uint32_t a,b,c;
 732   a = b = c = 0xdeadbeef + 4;
 733   b += k64 >> 32;
 734   a += k64 & 0xffffffff;
 735   final(a,b,c);
 736   return c;
 737 }
 738 #undef final
 739 #undef mix
 740 #undef rot
 741
 742 uint64_t APInt::getHashValue() const {
 743   uint64_t hash;
 744   if (isSingleWord())
 745     hash = hashword8(VAL);
 746   else
 747     hash = hashword(pVal, getNumWords()*2);
 748   return hash;
 749 }
 750
 751 /// HiBits - This function returns the high "numBits" bits of this APInt.
 752 APInt APInt::getHiBits(unsigned numBits) const {
 753   return APIntOps::lshr(*this, BitWidth - numBits);
 754 }
 755
 756 /// LoBits - This function returns the low "numBits" bits of this APInt.
 757 APInt APInt::getLoBits(unsigned numBits) const {
 758   return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
 759                         BitWidth - numBits);
 760 }
 761
 762 unsigned APInt::countLeadingZerosSlowCase() const {
 763   // Treat the most significand word differently because it might have
 764   // meaningless bits set beyond the precision.
 765   unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
 766   integerPart MSWMask;
 767   if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
 768   else {
 769     MSWMask = ~integerPart(0);
 770     BitsInMSW = APINT_BITS_PER_WORD;
 771   }
 772
 773   unsigned i = getNumWords();
 774   integerPart MSW = pVal[i-1] & MSWMask;
 775   if (MSW)
 776     return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
 777
 778   unsigned Count = BitsInMSW;
 779   for (--i; i > 0u; --i) {
 780     if (pVal[i-1] == 0)
 781       Count += APINT_BITS_PER_WORD;
 782     else {
 783       Count += CountLeadingZeros_64(pVal[i-1]);
 784       break;
 785     }
 786   }
 787   return Count;
 788 }
 789
 790 static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
 791   unsigned Count = 0;
 792   if (skip)
 793     V <<= skip;
 794   while (V && (V & (1ULL << 63))) {
 795     Count++;
 796     V <<= 1;
 797   }
 798   return Count;
 799 }
 800
 801 unsigned APInt::countLeadingOnes() const {
 802   if (isSingleWord())
 803     return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
 804
 805   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 806   unsigned shift;
 807   if (!highWordBits) {
 808     highWordBits = APINT_BITS_PER_WORD;
 809     shift = 0;
 810   } else {
 811     shift = APINT_BITS_PER_WORD - highWordBits;
 812   }
 813   int i = getNumWords() - 1;
 814   unsigned Count = countLeadingOnes_64(pVal[i], shift);
 815   if (Count == highWordBits) {
 816     for (i--; i >= 0; --i) {
 817       if (pVal[i] == -1ULL)
 818         Count += APINT_BITS_PER_WORD;
 819       else {
 820         Count += countLeadingOnes_64(pVal[i], 0);
 821         break;
 822       }
 823     }
 824   }
 825   return Count;
 826 }
 827
 828 unsigned APInt::countTrailingZeros() const {
 829   if (isSingleWord())
 830     return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
 831   unsigned Count = 0;
 832   unsigned i = 0;
 833   for (; i < getNumWords() && pVal[i] == 0; ++i)
 834     Count += APINT_BITS_PER_WORD;
 835   if (i < getNumWords())
 836     Count += CountTrailingZeros_64(pVal[i]);
 837   return std::min(Count, BitWidth);
 838 }
 839
 840 unsigned APInt::countTrailingOnesSlowCase() const {
 841   unsigned Count = 0;
 842   unsigned i = 0;
 843   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
 844     Count += APINT_BITS_PER_WORD;
 845   if (i < getNumWords())
 846     Count += CountTrailingOnes_64(pVal[i]);
 847   return std::min(Count, BitWidth);
 848 }
 849
 850 unsigned APInt::countPopulationSlowCase() const {
 851   unsigned Count = 0;
 852   for (unsigned i = 0; i < getNumWords(); ++i)
 853     Count += CountPopulation_64(pVal[i]);
 854   return Count;
 855 }
 856
 857 APInt APInt::byteSwap() const {
 858   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 859   if (BitWidth == 16)
 860     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
 861   else if (BitWidth == 32)
 862     return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
 863   else if (BitWidth == 48) {
 864     unsigned Tmp1 = unsigned(VAL >> 16);
 865     Tmp1 = ByteSwap_32(Tmp1);
 866     uint16_t Tmp2 = uint16_t(VAL);
 867     Tmp2 = ByteSwap_16(Tmp2);
 868     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 869   } else if (BitWidth == 64)
 870     return APInt(BitWidth, ByteSwap_64(VAL));
 871   else {
 872     APInt Result(BitWidth, 0);
 873     char *pByte = (char*)Result.pVal;
 874     for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
 875       char Tmp = pByte[i];
 876       pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
 877       pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
 878     }
 879     return Result;
 880   }
 881 }
 882
 883 APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
 884                                             const APInt& API2) {
 885   APInt A = API1, B = API2;
 886   while (!!B) {
 887     APInt T = B;
 888     B = APIntOps::urem(A, B);
 889     A = T;
 890   }
 891   return A;
 892 }
 893
 894 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 895   union {
 896     double D;
 897     uint64_t I;
 898   } T;
 899   T.D = Double;
 900
 901   // Get the sign bit from the highest order bit
 902   bool isNeg = T.I >> 63;
 903
 904   // Get the 11-bit exponent and adjust for the 1023 bit bias
 905   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
 906
 907   // If the exponent is negative, the value is < 0 so just return 0.
 908   if (exp < 0)
 909     return APInt(width, 0u);
 910
 911   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 912   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
 913
 914   // If the exponent doesn't shift all bits out of the mantissa
 915   if (exp < 52)
 916     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 917                     APInt(width, mantissa >> (52 - exp));
 918
 919   // If the client didn't provide enough bits for us to shift the mantissa into
 920   // then the result is undefined, just return 0
 921   if (width <= exp - 52)
 922     return APInt(width, 0);
 923
 924   // Otherwise, we have to shift the mantissa bits up to the right location
 925   APInt Tmp(width, mantissa);
 926   Tmp = Tmp.shl((unsigned)exp - 52);
 927   return isNeg ? -Tmp : Tmp;
 928 }
 929
 930 /// RoundToDouble - This function converts this APInt to a double.
 931 /// The layout for double is as following (IEEE Standard 754):
 932 ///  --------------------------------------
 933 /// |  Sign    Exponent    Fraction    Bias |
 934 /// |-------------------------------------- |
 935 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 936 ///  --------------------------------------
 937 double APInt::roundToDouble(bool isSigned) const {
 938
 939   // Handle the simple case where the value is contained in one uint64_t.
 940   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 941   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 942     if (isSigned) {
 943       int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
 944       return double(sext);
 945     } else
 946       return double(getWord(0));
 947   }
 948
 949   // Determine if the value is negative.
 950   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 951
 952   // Construct the absolute value if we're negative.
 953   APInt Tmp(isNeg ? -(*this) : (*this));
 954
 955   // Figure out how many bits we're using.
 956   unsigned n = Tmp.getActiveBits();
 957
 958   // The exponent (without bias normalization) is just the number of bits
 959   // we are using. Note that the sign bit is gone since we constructed the
 960   // absolute value.
 961   uint64_t exp = n;
 962
 963   // Return infinity for exponent overflow
 964   if (exp > 1023) {
 965     if (!isSigned || !isNeg)
 966       return std::numeric_limits<double>::infinity();
 967     else
 968       return -std::numeric_limits<double>::infinity();
 969   }
 970   exp += 1023; // Increment for 1023 bias
 971
 972   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 973   // extract the high 52 bits from the correct words in pVal.
 974   uint64_t mantissa;
 975   unsigned hiWord = whichWord(n-1);
 976   if (hiWord == 0) {
 977     mantissa = Tmp.pVal[0];
 978     if (n > 52)
 979       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 980   } else {
 981     assert(hiWord > 0 && "huh?");
 982     uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 983     uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 984     mantissa = hibits | lobits;
 985   }
 986
 987   // The leading bit of mantissa is implicit, so get rid of it.
 988   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 989   union {
 990     double D;
 991     uint64_t I;
 992   } T;
 993   T.I = sign | (exp << 52) | mantissa;
 994   return T.D;
 995 }
 996
 997 // Truncate to new width.
 998 APInt APInt::trunc(unsigned width) const {
 999   assert(width < BitWidth && "Invalid APInt Truncate request");
1000   assert(width && "Can't truncate to 0 bits");
1001
1002   if (width <= APINT_BITS_PER_WORD)
1003     return APInt(width, getRawData()[0]);
1004
1005   APInt Result(getMemory(getNumWords(width)), width);
1006
1007   // Copy full words.
1008   unsigned i;
1009   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
1010     Result.pVal[i] = pVal[i];
1011
1012   // Truncate and copy any partial word.
1013   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
1014   if (bits != 0)
1015     Result.pVal[i] = pVal[i] << bits >> bits;
1016
1017   return Result;
1018 }
1019
1020 // Sign extend to a new width.
1021 APInt APInt::sext(unsigned width) const {
1022   assert(width > BitWidth && "Invalid APInt SignExtend request");
1023
1024   if (width <= APINT_BITS_PER_WORD) {
1025     uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
1026     val = (int64_t)val >> (width - BitWidth);
1027     return APInt(width, val >> (APINT_BITS_PER_WORD - width));
1028   }
1029
1030   APInt Result(getMemory(getNumWords(width)), width);
1031
1032   // Copy full words.
1033   unsigned i;
1034   uint64_t word = 0;
1035   for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
1036     word = getRawData()[i];
1037     Result.pVal[i] = word;
1038   }
1039
1040   // Read and sign-extend any partial word.
1041   unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
1042   if (bits != 0)
1043     word = (int64_t)getRawData()[i] << bits >> bits;
1044   else
1045     word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
1046
1047   // Write remaining full words.
1048   for (; i != width / APINT_BITS_PER_WORD; i++) {
1049     Result.pVal[i] = word;
1050     word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
1051   }
1052
1053   // Write any partial word.
1054   bits = (0 - width) % APINT_BITS_PER_WORD;
1055   if (bits != 0)
1056     Result.pVal[i] = word << bits >> bits;
1057
1058   return Result;
1059 }
1060
1061 //  Zero extend to a new width.
1062 APInt APInt::zext(unsigned width) const {
1063   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1064
1065   if (width <= APINT_BITS_PER_WORD)
1066     return APInt(width, VAL);
1067
1068   APInt Result(getMemory(getNumWords(width)), width);
1069
1070   // Copy words.
1071   unsigned i;
1072   for (i = 0; i != getNumWords(); i++)
1073     Result.pVal[i] = getRawData()[i];
1074
1075   // Zero remaining words.
1076   memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
1077
1078   return Result;
1079 }
1080
1081 APInt APInt::zextOrTrunc(unsigned width) const {
1082   if (BitWidth < width)
1083     return zext(width);
1084   if (BitWidth > width)
1085     return trunc(width);
1086   return *this;
1087 }
1088
1089 APInt APInt::sextOrTrunc(unsigned width) const {
1090   if (BitWidth < width)
1091     return sext(width);
1092   if (BitWidth > width)
1093     return trunc(width);
1094   return *this;
1095 }
1096
1097 /// Arithmetic right-shift this APInt by shiftAmt.
1098 /// @brief Arithmetic right-shift function.
1099 APInt APInt::ashr(const APInt &shiftAmt) const {
1100   return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1101 }
1102
1103 /// Arithmetic right-shift this APInt by shiftAmt.
1104 /// @brief Arithmetic right-shift function.
1105 APInt APInt::ashr(unsigned shiftAmt) const {
1106   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1107   // Handle a degenerate case
1108   if (shiftAmt == 0)
1109     return *this;
1110
1111   // Handle single word shifts with built-in ashr
1112   if (isSingleWord()) {
1113     if (shiftAmt == BitWidth)
1114       return APInt(BitWidth, 0); // undefined
1115     else {
1116       unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1117       return APInt(BitWidth,
1118         (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1119     }
1120   }
1121
1122   // If all the bits were shifted out, the result is, technically, undefined.
1123   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1124   // issues in the algorithm below.
1125   if (shiftAmt == BitWidth) {
1126     if (isNegative())
1127       return APInt(BitWidth, -1ULL, true);
1128     else
1129       return APInt(BitWidth, 0);
1130   }
1131
1132   // Create some space for the result.
1133   uint64_t * val = new uint64_t[getNumWords()];
1134
1135   // Compute some values needed by the following shift algorithms
1136   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1137   unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1138   unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1139   unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1140   if (bitsInWord == 0)
1141     bitsInWord = APINT_BITS_PER_WORD;
1142
1143   // If we are shifting whole words, just move whole words
1144   if (wordShift == 0) {
1145     // Move the words containing significant bits
1146     for (unsigned i = 0; i <= breakWord; ++i)
1147       val[i] = pVal[i+offset]; // move whole word
1148
1149     // Adjust the top significant word for sign bit fill, if negative
1150     if (isNegative())
1151       if (bitsInWord < APINT_BITS_PER_WORD)
1152         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1153   } else {
1154     // Shift the low order words
1155     for (unsigned i = 0; i < breakWord; ++i) {
1156       // This combines the shifted corresponding word with the low bits from
1157       // the next word (shifted into this word's high bits).
1158       val[i] = (pVal[i+offset] >> wordShift) |
1159                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1160     }
1161
1162     // Shift the break word. In this case there are no bits from the next word
1163     // to include in this word.
1164     val[breakWord] = pVal[breakWord+offset] >> wordShift;
1165
1166     // Deal with sign extenstion in the break word, and possibly the word before
1167     // it.
1168     if (isNegative()) {
1169       if (wordShift > bitsInWord) {
1170         if (breakWord > 0)
1171           val[breakWord-1] |=
1172             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1173         val[breakWord] |= ~0ULL;
1174       } else
1175         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1176     }
1177   }
1178
1179   // Remaining words are 0 or -1, just assign them.
1180   uint64_t fillValue = (isNegative() ? -1ULL : 0);
1181   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1182     val[i] = fillValue;
1183   return APInt(val, BitWidth).clearUnusedBits();
1184 }
1185
1186 /// Logical right-shift this APInt by shiftAmt.
1187 /// @brief Logical right-shift function.
1188 APInt APInt::lshr(const APInt &shiftAmt) const {
1189   return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1190 }
1191
1192 /// Logical right-shift this APInt by shiftAmt.
1193 /// @brief Logical right-shift function.
1194 APInt APInt::lshr(unsigned shiftAmt) const {
1195   if (isSingleWord()) {
1196     if (shiftAmt == BitWidth)
1197       return APInt(BitWidth, 0);
1198     else
1199       return APInt(BitWidth, this->VAL >> shiftAmt);
1200   }
1201
1202   // If all the bits were shifted out, the result is 0. This avoids issues
1203   // with shifting by the size of the integer type, which produces undefined
1204   // results. We define these "undefined results" to always be 0.
1205   if (shiftAmt == BitWidth)
1206     return APInt(BitWidth, 0);
1207
1208   // If none of the bits are shifted out, the result is *this. This avoids
1209   // issues with shifting by the size of the integer type, which produces
1210   // undefined results in the code below. This is also an optimization.
1211   if (shiftAmt == 0)
1212     return *this;
1213
1214   // Create some space for the result.
1215   uint64_t * val = new uint64_t[getNumWords()];
1216
1217   // If we are shifting less than a word, compute the shift with a simple carry
1218   if (shiftAmt < APINT_BITS_PER_WORD) {
1219     uint64_t carry = 0;
1220     for (int i = getNumWords()-1; i >= 0; --i) {
1221       val[i] = (pVal[i] >> shiftAmt) | carry;
1222       carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1223     }
1224     return APInt(val, BitWidth).clearUnusedBits();
1225   }
1226
1227   // Compute some values needed by the remaining shift algorithms
1228   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1229   unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1230
1231   // If we are shifting whole words, just move whole words
1232   if (wordShift == 0) {
1233     for (unsigned i = 0; i < getNumWords() - offset; ++i)
1234       val[i] = pVal[i+offset];
1235     for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1236       val[i] = 0;
1237     return APInt(val,BitWidth).clearUnusedBits();
1238   }
1239
1240   // Shift the low order words
1241   unsigned breakWord = getNumWords() - offset -1;
1242   for (unsigned i = 0; i < breakWord; ++i)
1243     val[i] = (pVal[i+offset] >> wordShift) |
1244              (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1245   // Shift the break word.
1246   val[breakWord] = pVal[breakWord+offset] >> wordShift;
1247
1248   // Remaining words are 0
1249   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1250     val[i] = 0;
1251   return APInt(val, BitWidth).clearUnusedBits();
1252 }
1253
1254 /// Left-shift this APInt by shiftAmt.
1255 /// @brief Left-shift function.
1256 APInt APInt::shl(const APInt &shiftAmt) const {
1257   // It's undefined behavior in C to shift by BitWidth or greater.
1258   return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1259 }
1260
1261 APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1262   // If all the bits were shifted out, the result is 0. This avoids issues
1263   // with shifting by the size of the integer type, which produces undefined
1264   // results. We define these "undefined results" to always be 0.
1265   if (shiftAmt == BitWidth)
1266     return APInt(BitWidth, 0);
1267
1268   // If none of the bits are shifted out, the result is *this. This avoids a
1269   // lshr by the words size in the loop below which can produce incorrect
1270   // results. It also avoids the expensive computation below for a common case.
1271   if (shiftAmt == 0)
1272     return *this;
1273
1274   // Create some space for the result.
1275   uint64_t * val = new uint64_t[getNumWords()];
1276
1277   // If we are shifting less than a word, do it the easy way
1278   if (shiftAmt < APINT_BITS_PER_WORD) {
1279     uint64_t carry = 0;
1280     for (unsigned i = 0; i < getNumWords(); i++) {
1281       val[i] = pVal[i] << shiftAmt | carry;
1282       carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1283     }
1284     return APInt(val, BitWidth).clearUnusedBits();
1285   }
1286
1287   // Compute some values needed by the remaining shift algorithms
1288   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1289   unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1290
1291   // If we are shifting whole words, just move whole words
1292   if (wordShift == 0) {
1293     for (unsigned i = 0; i < offset; i++)
1294       val[i] = 0;
1295     for (unsigned i = offset; i < getNumWords(); i++)
1296       val[i] = pVal[i-offset];
1297     return APInt(val,BitWidth).clearUnusedBits();
1298   }
1299
1300   // Copy whole words from this to Result.
1301   unsigned i = getNumWords() - 1;
1302   for (; i > offset; --i)
1303     val[i] = pVal[i-offset] << wordShift |
1304              pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1305   val[offset] = pVal[0] << wordShift;
1306   for (i = 0; i < offset; ++i)
1307     val[i] = 0;
1308   return APInt(val, BitWidth).clearUnusedBits();
1309 }
1310
1311 APInt APInt::rotl(const APInt &rotateAmt) const {
1312   return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1313 }
1314
1315 APInt APInt::rotl(unsigned rotateAmt) const {
1316   if (rotateAmt == 0)
1317     return *this;
1318   // Don't get too fancy, just use existing shift/or facilities
1319   APInt hi(*this);
1320   APInt lo(*this);
1321   hi.shl(rotateAmt);
1322   lo.lshr(BitWidth - rotateAmt);
1323   return hi | lo;
1324 }
1325
1326 APInt APInt::rotr(const APInt &rotateAmt) const {
1327   return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1328 }
1329
1330 APInt APInt::rotr(unsigned rotateAmt) const {
1331   if (rotateAmt == 0)
1332     return *this;
1333   // Don't get too fancy, just use existing shift/or facilities
1334   APInt hi(*this);
1335   APInt lo(*this);
1336   lo.lshr(rotateAmt);
1337   hi.shl(BitWidth - rotateAmt);
1338   return hi | lo;
1339 }
1340
1341 // Square Root - this method computes and returns the square root of "this".
1342 // Three mechanisms are used for computation. For small values (<= 5 bits),
1343 // a table lookup is done. This gets some performance for common cases. For
1344 // values using less than 52 bits, the value is converted to double and then
1345 // the libc sqrt function is called. The result is rounded and then converted
1346 // back to a uint64_t which is then used to construct the result. Finally,
1347 // the Babylonian method for computing square roots is used.
1348 APInt APInt::sqrt() const {
1349
1350   // Determine the magnitude of the value.
1351   unsigned magnitude = getActiveBits();
1352
1353   // Use a fast table for some small values. This also gets rid of some
1354   // rounding errors in libc sqrt for small values.
1355   if (magnitude <= 5) {
1356     static const uint8_t results[32] = {
1357       /*     0 */ 0,
1358       /*  1- 2 */ 1, 1,
1359       /*  3- 6 */ 2, 2, 2, 2,
1360       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1361       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1362       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1363       /*    31 */ 6
1364     };
1365     return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1366   }
1367
1368   // If the magnitude of the value fits in less than 52 bits (the precision of
1369   // an IEEE double precision floating point value), then we can use the
1370   // libc sqrt function which will probably use a hardware sqrt computation.
1371   // This should be faster than the algorithm below.
1372   if (magnitude < 52) {
1373 #if HAVE_ROUND
1374     return APInt(BitWidth,
1375                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1376 #else
1377     return APInt(BitWidth,
1378                  uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0])) + 0.5));
1379 #endif
1380   }
1381
1382   // Okay, all the short cuts are exhausted. We must compute it. The following
1383   // is a classical Babylonian method for computing the square root. This code
1384   // was adapted to APINt from a wikipedia article on such computations.
1385   // See http://www.wikipedia.org/ and go to the page named
1386   // Calculate_an_integer_square_root.
1387   unsigned nbits = BitWidth, i = 4;
1388   APInt testy(BitWidth, 16);
1389   APInt x_old(BitWidth, 1);
1390   APInt x_new(BitWidth, 0);
1391   APInt two(BitWidth, 2);
1392
1393   // Select a good starting value using binary logarithms.
1394   for (;; i += 2, testy = testy.shl(2))
1395     if (i >= nbits || this->ule(testy)) {
1396       x_old = x_old.shl(i / 2);
1397       break;
1398     }
1399
1400   // Use the Babylonian method to arrive at the integer square root:
1401   for (;;) {
1402     x_new = (this->udiv(x_old) + x_old).udiv(two);
1403     if (x_old.ule(x_new))
1404       break;
1405     x_old = x_new;
1406   }
1407
1408   // Make sure we return the closest approximation
1409   // NOTE: The rounding calculation below is correct. It will produce an
1410   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1411   // determined to be a rounding issue with pari/gp as it begins to use a
1412   // floating point representation after 192 bits. There are no discrepancies
1413   // between this algorithm and pari/gp for bit widths < 192 bits.
1414   APInt square(x_old * x_old);
1415   APInt nextSquare((x_old + 1) * (x_old +1));
1416   if (this->ult(square))
1417     return x_old;
1418   else if (this->ule(nextSquare)) {
1419     APInt midpoint((nextSquare - square).udiv(two));
1420     APInt offset(*this - square);
1421     if (offset.ult(midpoint))
1422       return x_old;
1423     else
1424       return x_old + 1;
1425   } else
1426     llvm_unreachable("Error in APInt::sqrt computation");
1427   return x_old + 1;
1428 }
1429
1430 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1431 /// iterative extended Euclidean algorithm is used to solve for this value,
1432 /// however we simplify it to speed up calculating only the inverse, and take
1433 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1434 /// (potentially large) APInts around.
1435 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1436   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1437
1438   // Using the properties listed at the following web page (accessed 06/21/08):
1439   //   http://www.numbertheory.org/php/euclid.html
1440   // (especially the properties numbered 3, 4 and 9) it can be proved that
1441   // BitWidth bits suffice for all the computations in the algorithm implemented
1442   // below. More precisely, this number of bits suffice if the multiplicative
1443   // inverse exists, but may not suffice for the general extended Euclidean
1444   // algorithm.
1445
1446   APInt r[2] = { modulo, *this };
1447   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1448   APInt q(BitWidth, 0);
1449
1450   unsigned i;
1451   for (i = 0; r[i^1] != 0; i ^= 1) {
1452     // An overview of the math without the confusing bit-flipping:
1453     // q = r[i-2] / r[i-1]
1454     // r[i] = r[i-2] % r[i-1]
1455     // t[i] = t[i-2] - t[i-1] * q
1456     udivrem(r[i], r[i^1], q, r[i]);
1457     t[i] -= t[i^1] * q;
1458   }
1459
1460   // If this APInt and the modulo are not coprime, there is no multiplicative
1461   // inverse, so return 0. We check this by looking at the next-to-last
1462   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1463   // algorithm.
1464   if (r[i] != 1)
1465     return APInt(BitWidth, 0);
1466
1467   // The next-to-last t is the multiplicative inverse.  However, we are
1468   // interested in a positive inverse. Calcuate a positive one from a negative
1469   // one if necessary. A simple addition of the modulo suffices because
1470   // abs(t[i]) is known to be less than *this/2 (see the link above).
1471   return t[i].isNegative() ? t[i] + modulo : t[i];
1472 }
1473
1474 /// Calculate the magic numbers required to implement a signed integer division
1475 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1476 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1477 /// Warren, Jr., chapter 10.
1478 APInt::ms APInt::magic() const {
1479   const APInt& d = *this;
1480   unsigned p;
1481   APInt ad, anc, delta, q1, r1, q2, r2, t;
1482   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1483   struct ms mag;
1484
1485   ad = d.abs();
1486   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1487   anc = t - 1 - t.urem(ad);   // absolute value of nc
1488   p = d.getBitWidth() - 1;    // initialize p
1489   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1490   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1491   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1492   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1493   do {
1494     p = p + 1;
1495     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1496     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1497     if (r1.uge(anc)) {  // must be unsigned comparison
1498       q1 = q1 + 1;
1499       r1 = r1 - anc;
1500     }
1501     q2 = q2<<1;          // update q2 = 2p/abs(d)
1502     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1503     if (r2.uge(ad)) {   // must be unsigned comparison
1504       q2 = q2 + 1;
1505       r2 = r2 - ad;
1506     }
1507     delta = ad - r2;
1508   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1509
1510   mag.m = q2 + 1;
1511   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1512   mag.s = p - d.getBitWidth();          // resulting shift
1513   return mag;
1514 }
1515
1516 /// Calculate the magic numbers required to implement an unsigned integer
1517 /// division by a constant as a sequence of multiplies, adds and shifts.
1518 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1519 /// S. Warren, Jr., chapter 10.
1520 /// LeadingZeros can be used to simplify the calculation if the upper bits
1521 /// of the divided value are known zero.
1522 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1523   const APInt& d = *this;
1524   unsigned p;
1525   APInt nc, delta, q1, r1, q2, r2;
1526   struct mu magu;
1527   magu.a = 0;               // initialize "add" indicator
1528   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1529   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1530   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1531
1532   nc = allOnes - (-d).urem(d);
1533   p = d.getBitWidth() - 1;  // initialize p
1534   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1535   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1536   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1537   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1538   do {
1539     p = p + 1;
1540     if (r1.uge(nc - r1)) {
1541       q1 = q1 + q1 + 1;  // update q1
1542       r1 = r1 + r1 - nc; // update r1
1543     }
1544     else {
1545       q1 = q1+q1; // update q1
1546       r1 = r1+r1; // update r1
1547     }
1548     if ((r2 + 1).uge(d - r2)) {
1549       if (q2.uge(signedMax)) magu.a = 1;
1550       q2 = q2+q2 + 1;     // update q2
1551       r2 = r2+r2 + 1 - d; // update r2
1552     }
1553     else {
1554       if (q2.uge(signedMin)) magu.a = 1;
1555       q2 = q2+q2;     // update q2
1556       r2 = r2+r2 + 1; // update r2
1557     }
1558     delta = d - 1 - r2;
1559   } while (p < d.getBitWidth()*2 &&
1560            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1561   magu.m = q2 + 1; // resulting magic number
1562   magu.s = p - d.getBitWidth();  // resulting shift
1563   return magu;
1564 }
1565
1566 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1567 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1568 /// variables here have the same names as in the algorithm. Comments explain
1569 /// the algorithm and any deviation from it.
1570 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1571                      unsigned m, unsigned n) {
1572   assert(u && "Must provide dividend");
1573   assert(v && "Must provide divisor");
1574   assert(q && "Must provide quotient");
1575   assert(u != v && u != q && v != q && "Must us different memory");
1576   assert(n>1 && "n must be > 1");
1577
1578   // Knuth uses the value b as the base of the number system. In our case b
1579   // is 2^31 so we just set it to -1u.
1580   uint64_t b = uint64_t(1) << 32;
1581
1582 #if 0
1583   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1584   DEBUG(dbgs() << "KnuthDiv: original:");
1585   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1586   DEBUG(dbgs() << " by");
1587   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1588   DEBUG(dbgs() << '\n');
1589 #endif
1590   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1591   // u and v by d. Note that we have taken Knuth's advice here to use a power
1592   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1593   // 2 allows us to shift instead of multiply and it is easy to determine the
1594   // shift amount from the leading zeros.  We are basically normalizing the u
1595   // and v so that its high bits are shifted to the top of v's range without
1596   // overflow. Note that this can require an extra word in u so that u must
1597   // be of length m+n+1.
1598   unsigned shift = CountLeadingZeros_32(v[n-1]);
1599   unsigned v_carry = 0;
1600   unsigned u_carry = 0;
1601   if (shift) {
1602     for (unsigned i = 0; i < m+n; ++i) {
1603       unsigned u_tmp = u[i] >> (32 - shift);
1604       u[i] = (u[i] << shift) | u_carry;
1605       u_carry = u_tmp;
1606     }
1607     for (unsigned i = 0; i < n; ++i) {
1608       unsigned v_tmp = v[i] >> (32 - shift);
1609       v[i] = (v[i] << shift) | v_carry;
1610       v_carry = v_tmp;
1611     }
1612   }
1613   u[m+n] = u_carry;
1614 #if 0
1615   DEBUG(dbgs() << "KnuthDiv:   normal:");
1616   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1617   DEBUG(dbgs() << " by");
1618   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1619   DEBUG(dbgs() << '\n');
1620 #endif
1621
1622   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1623   int j = m;
1624   do {
1625     DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1626     // D3. [Calculate q'.].
1627     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1628     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1629     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1630     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1631     // on v[n-2] determines at high speed most of the cases in which the trial
1632     // value qp is one too large, and it eliminates all cases where qp is two
1633     // too large.
1634     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1635     DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1636     uint64_t qp = dividend / v[n-1];
1637     uint64_t rp = dividend % v[n-1];
1638     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1639       qp--;
1640       rp += v[n-1];
1641       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1642         qp--;
1643     }
1644     DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1645
1646     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1647     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1648     // consists of a simple multiplication by a one-place number, combined with
1649     // a subtraction.
1650     bool isNeg = false;
1651     for (unsigned i = 0; i < n; ++i) {
1652       uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1653       uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1654       bool borrow = subtrahend > u_tmp;
1655       DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
1656                    << ", subtrahend == " << subtrahend
1657                    << ", borrow = " << borrow << '\n');
1658
1659       uint64_t result = u_tmp - subtrahend;
1660       unsigned k = j + i;
1661       u[k++] = (unsigned)(result & (b-1)); // subtract low word
1662       u[k++] = (unsigned)(result >> 32);   // subtract high word
1663       while (borrow && k <= m+n) { // deal with borrow to the left
1664         borrow = u[k] == 0;
1665         u[k]--;
1666         k++;
1667       }
1668       isNeg |= borrow;
1669       DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1670                     u[j+i+1] << '\n');
1671     }
1672     DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1673     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1674     DEBUG(dbgs() << '\n');
1675     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1676     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1677     // true value plus b**(n+1), namely as the b's complement of
1678     // the true value, and a "borrow" to the left should be remembered.
1679     //
1680     if (isNeg) {
1681       bool carry = true;  // true because b's complement is "complement + 1"
1682       for (unsigned i = 0; i <= m+n; ++i) {
1683         u[i] = ~u[i] + carry; // b's complement
1684         carry = carry && u[i] == 0;
1685       }
1686     }
1687     DEBUG(dbgs() << "KnuthDiv: after complement:");
1688     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1689     DEBUG(dbgs() << '\n');
1690
1691     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1692     // negative, go to step D6; otherwise go on to step D7.
1693     q[j] = (unsigned)qp;
1694     if (isNeg) {
1695       // D6. [Add back]. The probability that this step is necessary is very
1696       // small, on the order of only 2/b. Make sure that test data accounts for
1697       // this possibility. Decrease q[j] by 1
1698       q[j]--;
1699       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1700       // A carry will occur to the left of u[j+n], and it should be ignored
1701       // since it cancels with the borrow that occurred in D4.
1702       bool carry = false;
1703       for (unsigned i = 0; i < n; i++) {
1704         unsigned limit = std::min(u[j+i],v[i]);
1705         u[j+i] += v[i] + carry;
1706         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1707       }
1708       u[j+n] += carry;
1709     }
1710     DEBUG(dbgs() << "KnuthDiv: after correction:");
1711     DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
1712     DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1713
1714   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1715   } while (--j >= 0);
1716
1717   DEBUG(dbgs() << "KnuthDiv: quotient:");
1718   DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1719   DEBUG(dbgs() << '\n');
1720
1721   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1722   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1723   // compute the remainder (urem uses this).
1724   if (r) {
1725     // The value d is expressed by the "shift" value above since we avoided
1726     // multiplication by d by using a shift left. So, all we have to do is
1727     // shift right here. In order to mak
1728     if (shift) {
1729       unsigned carry = 0;
1730       DEBUG(dbgs() << "KnuthDiv: remainder:");
1731       for (int i = n-1; i >= 0; i--) {
1732         r[i] = (u[i] >> shift) | carry;
1733         carry = u[i] << (32 - shift);
1734         DEBUG(dbgs() << " " << r[i]);
1735       }
1736     } else {
1737       for (int i = n-1; i >= 0; i--) {
1738         r[i] = u[i];
1739         DEBUG(dbgs() << " " << r[i]);
1740       }
1741     }
1742     DEBUG(dbgs() << '\n');
1743   }
1744 #if 0
1745   DEBUG(dbgs() << '\n');
1746 #endif
1747 }
1748
1749 void APInt::divide(const APInt LHS, unsigned lhsWords,
1750                    const APInt &RHS, unsigned rhsWords,
1751                    APInt *Quotient, APInt *Remainder)
1752 {
1753   assert(lhsWords >= rhsWords && "Fractional result");
1754
1755   // First, compose the values into an array of 32-bit words instead of
1756   // 64-bit words. This is a necessity of both the "short division" algorithm
1757   // and the Knuth "classical algorithm" which requires there to be native
1758   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1759   // can't use 64-bit operands here because we don't have native results of
1760   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1761   // work on large-endian machines.
1762   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1763   unsigned n = rhsWords * 2;
1764   unsigned m = (lhsWords * 2) - n;
1765
1766   // Allocate space for the temporary values we need either on the stack, if
1767   // it will fit, or on the heap if it won't.
1768   unsigned SPACE[128];
1769   unsigned *U = 0;
1770   unsigned *V = 0;
1771   unsigned *Q = 0;
1772   unsigned *R = 0;
1773   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1774     U = &SPACE[0];
1775     V = &SPACE[m+n+1];
1776     Q = &SPACE[(m+n+1) + n];
1777     if (Remainder)
1778       R = &SPACE[(m+n+1) + n + (m+n)];
1779   } else {
1780     U = new unsigned[m + n + 1];
1781     V = new unsigned[n];
1782     Q = new unsigned[m+n];
1783     if (Remainder)
1784       R = new unsigned[n];
1785   }
1786
1787   // Initialize the dividend
1788   memset(U, 0, (m+n+1)*sizeof(unsigned));
1789   for (unsigned i = 0; i < lhsWords; ++i) {
1790     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1791     U[i * 2] = (unsigned)(tmp & mask);
1792     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1793   }
1794   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1795
1796   // Initialize the divisor
1797   memset(V, 0, (n)*sizeof(unsigned));
1798   for (unsigned i = 0; i < rhsWords; ++i) {
1799     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1800     V[i * 2] = (unsigned)(tmp & mask);
1801     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1802   }
1803
1804   // initialize the quotient and remainder
1805   memset(Q, 0, (m+n) * sizeof(unsigned));
1806   if (Remainder)
1807     memset(R, 0, n * sizeof(unsigned));
1808
1809   // Now, adjust m and n for the Knuth division. n is the number of words in
1810   // the divisor. m is the number of words by which the dividend exceeds the
1811   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1812   // contain any zero words or the Knuth algorithm fails.
1813   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1814     n--;
1815     m++;
1816   }
1817   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1818     m--;
1819
1820   // If we're left with only a single word for the divisor, Knuth doesn't work
1821   // so we implement the short division algorithm here. This is much simpler
1822   // and faster because we are certain that we can divide a 64-bit quantity
1823   // by a 32-bit quantity at hardware speed and short division is simply a
1824   // series of such operations. This is just like doing short division but we
1825   // are using base 2^32 instead of base 10.
1826   assert(n != 0 && "Divide by zero?");
1827   if (n == 1) {
1828     unsigned divisor = V[0];
1829     unsigned remainder = 0;
1830     for (int i = m+n-1; i >= 0; i--) {
1831       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1832       if (partial_dividend == 0) {
1833         Q[i] = 0;
1834         remainder = 0;
1835       } else if (partial_dividend < divisor) {
1836         Q[i] = 0;
1837         remainder = (unsigned)partial_dividend;
1838       } else if (partial_dividend == divisor) {
1839         Q[i] = 1;
1840         remainder = 0;
1841       } else {
1842         Q[i] = (unsigned)(partial_dividend / divisor);
1843         remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1844       }
1845     }
1846     if (R)
1847       R[0] = remainder;
1848   } else {
1849     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1850     // case n > 1.
1851     KnuthDiv(U, V, Q, R, m, n);
1852   }
1853
1854   // If the caller wants the quotient
1855   if (Quotient) {
1856     // Set up the Quotient value's memory.
1857     if (Quotient->BitWidth != LHS.BitWidth) {
1858       if (Quotient->isSingleWord())
1859         Quotient->VAL = 0;
1860       else
1861         delete [] Quotient->pVal;
1862       Quotient->BitWidth = LHS.BitWidth;
1863       if (!Quotient->isSingleWord())
1864         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1865     } else
1866       Quotient->clearAllBits();
1867
1868     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1869     // order words.
1870     if (lhsWords == 1) {
1871       uint64_t tmp =
1872         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1873       if (Quotient->isSingleWord())
1874         Quotient->VAL = tmp;
1875       else
1876         Quotient->pVal[0] = tmp;
1877     } else {
1878       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1879       for (unsigned i = 0; i < lhsWords; ++i)
1880         Quotient->pVal[i] =
1881           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1882     }
1883   }
1884
1885   // If the caller wants the remainder
1886   if (Remainder) {
1887     // Set up the Remainder value's memory.
1888     if (Remainder->BitWidth != RHS.BitWidth) {
1889       if (Remainder->isSingleWord())
1890         Remainder->VAL = 0;
1891       else
1892         delete [] Remainder->pVal;
1893       Remainder->BitWidth = RHS.BitWidth;
1894       if (!Remainder->isSingleWord())
1895         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1896     } else
1897       Remainder->clearAllBits();
1898
1899     // The remainder is in R. Reconstitute the remainder into Remainder's low
1900     // order words.
1901     if (rhsWords == 1) {
1902       uint64_t tmp =
1903         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1904       if (Remainder->isSingleWord())
1905         Remainder->VAL = tmp;
1906       else
1907         Remainder->pVal[0] = tmp;
1908     } else {
1909       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1910       for (unsigned i = 0; i < rhsWords; ++i)
1911         Remainder->pVal[i] =
1912           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1913     }
1914   }
1915
1916   // Clean up the memory we allocated.
1917   if (U != &SPACE[0]) {
1918     delete [] U;
1919     delete [] V;
1920     delete [] Q;
1921     delete [] R;
1922   }
1923 }
1924
1925 APInt APInt::udiv(const APInt& RHS) const {
1926   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1927
1928   // First, deal with the easy case
1929   if (isSingleWord()) {
1930     assert(RHS.VAL != 0 && "Divide by zero?");
1931     return APInt(BitWidth, VAL / RHS.VAL);
1932   }
1933
1934   // Get some facts about the LHS and RHS number of bits and words
1935   unsigned rhsBits = RHS.getActiveBits();
1936   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1937   assert(rhsWords && "Divided by zero???");
1938   unsigned lhsBits = this->getActiveBits();
1939   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1940
1941   // Deal with some degenerate cases
1942   if (!lhsWords)
1943     // 0 / X ===> 0
1944     return APInt(BitWidth, 0);
1945   else if (lhsWords < rhsWords || this->ult(RHS)) {
1946     // X / Y ===> 0, iff X < Y
1947     return APInt(BitWidth, 0);
1948   } else if (*this == RHS) {
1949     // X / X ===> 1
1950     return APInt(BitWidth, 1);
1951   } else if (lhsWords == 1 && rhsWords == 1) {
1952     // All high words are zero, just use native divide
1953     return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1954   }
1955
1956   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1957   APInt Quotient(1,0); // to hold result.
1958   divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1959   return Quotient;
1960 }
1961
1962 APInt APInt::urem(const APInt& RHS) const {
1963   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1964   if (isSingleWord()) {
1965     assert(RHS.VAL != 0 && "Remainder by zero?");
1966     return APInt(BitWidth, VAL % RHS.VAL);
1967   }
1968
1969   // Get some facts about the LHS
1970   unsigned lhsBits = getActiveBits();
1971   unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1972
1973   // Get some facts about the RHS
1974   unsigned rhsBits = RHS.getActiveBits();
1975   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1976   assert(rhsWords && "Performing remainder operation by zero ???");
1977
1978   // Check the degenerate cases
1979   if (lhsWords == 0) {
1980     // 0 % Y ===> 0
1981     return APInt(BitWidth, 0);
1982   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1983     // X % Y ===> X, iff X < Y
1984     return *this;
1985   } else if (*this == RHS) {
1986     // X % X == 0;
1987     return APInt(BitWidth, 0);
1988   } else if (lhsWords == 1) {
1989     // All high words are zero, just use native remainder
1990     return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1991   }
1992
1993   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1994   APInt Remainder(1,0);
1995   divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1996   return Remainder;
1997 }
1998
1999 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
2000                     APInt &Quotient, APInt &Remainder) {
2001   // Get some size facts about the dividend and divisor
2002   unsigned lhsBits  = LHS.getActiveBits();
2003   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
2004   unsigned rhsBits  = RHS.getActiveBits();
2005   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
2006
2007   // Check the degenerate cases
2008   if (lhsWords == 0) {
2009     Quotient = 0;                // 0 / Y ===> 0
2010     Remainder = 0;               // 0 % Y ===> 0
2011     return;
2012   }
2013
2014   if (lhsWords < rhsWords || LHS.ult(RHS)) {
2015     Remainder = LHS;            // X % Y ===> X, iff X < Y
2016     Quotient = 0;               // X / Y ===> 0, iff X < Y
2017     return;
2018   }
2019
2020   if (LHS == RHS) {
2021     Quotient  = 1;              // X / X ===> 1
2022     Remainder = 0;              // X % X ===> 0;
2023     return;
2024   }
2025
2026   if (lhsWords == 1 && rhsWords == 1) {
2027     // There is only one word to consider so use the native versions.
2028     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
2029     uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
2030     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
2031     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
2032     return;
2033   }
2034
2035   // Okay, lets do it the long way
2036   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2037 }
2038
2039 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
2040   APInt Res = *this+RHS;
2041   Overflow = isNonNegative() == RHS.isNonNegative() &&
2042              Res.isNonNegative() != isNonNegative();
2043   return Res;
2044 }
2045
2046 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
2047   APInt Res = *this+RHS;
2048   Overflow = Res.ult(RHS);
2049   return Res;
2050 }
2051
2052 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
2053   APInt Res = *this - RHS;
2054   Overflow = isNonNegative() != RHS.isNonNegative() &&
2055              Res.isNonNegative() != isNonNegative();
2056   return Res;
2057 }
2058
2059 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
2060   APInt Res = *this-RHS;
2061   Overflow = Res.ugt(*this);
2062   return Res;
2063 }
2064
2065 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
2066   // MININT/-1  -->  overflow.
2067   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
2068   return sdiv(RHS);
2069 }
2070
2071 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
2072   APInt Res = *this * RHS;
2073
2074   if (*this != 0 && RHS != 0)
2075     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
2076   else
2077     Overflow = false;
2078   return Res;
2079 }
2080
2081 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
2082   APInt Res = *this * RHS;
2083
2084   if (*this != 0 && RHS != 0)
2085     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
2086   else
2087     Overflow = false;
2088   return Res;
2089 }
2090
2091 APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
2092   Overflow = ShAmt >= getBitWidth();
2093   if (Overflow)
2094     ShAmt = getBitWidth()-1;
2095
2096   if (isNonNegative()) // Don't allow sign change.
2097     Overflow = ShAmt >= countLeadingZeros();
2098   else
2099     Overflow = ShAmt >= countLeadingOnes();
2100
2101   return *this << ShAmt;
2102 }
2103
2104
2105
2106
2107 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2108   // Check our assumptions here
2109   assert(!str.empty() && "Invalid string length");
2110   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2111          "Radix should be 2, 8, 10, or 16!");
2112
2113   StringRef::iterator p = str.begin();
2114   size_t slen = str.size();
2115   bool isNeg = *p == '-';
2116   if (*p == '-' || *p == '+') {
2117     p++;
2118     slen--;
2119     assert(slen && "String is only a sign, needs a value.");
2120   }
2121   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2122   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2123   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2124   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2125          "Insufficient bit width");
2126
2127   // Allocate memory
2128   if (!isSingleWord())
2129     pVal = getClearedMemory(getNumWords());
2130
2131   // Figure out if we can shift instead of multiply
2132   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2133
2134   // Set up an APInt for the digit to add outside the loop so we don't
2135   // constantly construct/destruct it.
2136   APInt apdigit(getBitWidth(), 0);
2137   APInt apradix(getBitWidth(), radix);
2138
2139   // Enter digit traversal loop
2140   for (StringRef::iterator e = str.end(); p != e; ++p) {
2141     unsigned digit = getDigit(*p, radix);
2142     assert(digit < radix && "Invalid character in digit string");
2143
2144     // Shift or multiply the value by the radix
2145     if (slen > 1) {
2146       if (shift)
2147         *this <<= shift;
2148       else
2149         *this *= apradix;
2150     }
2151
2152     // Add in the digit we just interpreted
2153     if (apdigit.isSingleWord())
2154       apdigit.VAL = digit;
2155     else
2156       apdigit.pVal[0] = digit;
2157     *this += apdigit;
2158   }
2159   // If its negative, put it in two's complement form
2160   if (isNeg) {
2161     (*this)--;
2162     this->flipAllBits();
2163   }
2164 }
2165
2166 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2167                      bool Signed, bool formatAsCLiteral) const {
2168   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2169          "Radix should be 2, 8, 10, or 16!");
2170
2171   const char *Prefix = "";
2172   if (formatAsCLiteral) {
2173     switch (Radix) {
2174       case 2:
2175         // Binary literals are a non-standard extension added in gcc 4.3:
2176         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2177         Prefix = "0b";
2178         break;
2179       case 8:
2180         Prefix = "0";
2181         break;
2182       case 16:
2183         Prefix = "0x";
2184         break;
2185     }
2186   }
2187
2188   // First, check for a zero value and just short circuit the logic below.
2189   if (*this == 0) {
2190     while (*Prefix) {
2191       Str.push_back(*Prefix);
2192       ++Prefix;
2193     };
2194     Str.push_back('0');
2195     return;
2196   }
2197
2198   static const char Digits[] = "0123456789ABCDEF";
2199
2200   if (isSingleWord()) {
2201     char Buffer[65];
2202     char *BufPtr = Buffer+65;
2203
2204     uint64_t N;
2205     if (!Signed) {
2206       N = getZExtValue();
2207     } else {
2208       int64_t I = getSExtValue();
2209       if (I >= 0) {
2210         N = I;
2211       } else {
2212         Str.push_back('-');
2213         N = -(uint64_t)I;
2214       }
2215     }
2216
2217     while (*Prefix) {
2218       Str.push_back(*Prefix);
2219       ++Prefix;
2220     };
2221
2222     while (N) {
2223       *--BufPtr = Digits[N % Radix];
2224       N /= Radix;
2225     }
2226     Str.append(BufPtr, Buffer+65);
2227     return;
2228   }
2229
2230   APInt Tmp(*this);
2231
2232   if (Signed && isNegative()) {
2233     // They want to print the signed version and it is a negative value
2234     // Flip the bits and add one to turn it into the equivalent positive
2235     // value and put a '-' in the result.
2236     Tmp.flipAllBits();
2237     Tmp++;
2238     Str.push_back('-');
2239   }
2240
2241   while (*Prefix) {
2242     Str.push_back(*Prefix);
2243     ++Prefix;
2244   };
2245
2246   // We insert the digits backward, then reverse them to get the right order.
2247   unsigned StartDig = Str.size();
2248
2249   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2250   // because the number of bits per digit (1, 3 and 4 respectively) divides
2251   // equaly.  We just shift until the value is zero.
2252   if (Radix != 10) {
2253     // Just shift tmp right for each digit width until it becomes zero
2254     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2255     unsigned MaskAmt = Radix - 1;
2256
2257     while (Tmp != 0) {
2258       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2259       Str.push_back(Digits[Digit]);
2260       Tmp = Tmp.lshr(ShiftAmt);
2261     }
2262   } else {
2263     APInt divisor(4, 10);
2264     while (Tmp != 0) {
2265       APInt APdigit(1, 0);
2266       APInt tmp2(Tmp.getBitWidth(), 0);
2267       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2268              &APdigit);
2269       unsigned Digit = (unsigned)APdigit.getZExtValue();
2270       assert(Digit < Radix && "divide failed");
2271       Str.push_back(Digits[Digit]);
2272       Tmp = tmp2;
2273     }
2274   }
2275
2276   // Reverse the digits before returning.
2277   std::reverse(Str.begin()+StartDig, Str.end());
2278 }
2279
2280 /// toString - This returns the APInt as a std::string.  Note that this is an
2281 /// inefficient method.  It is better to pass in a SmallVector/SmallString
2282 /// to the methods above.
2283 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2284   SmallString<40> S;
2285   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2286   return S.str();
2287 }
2288
2289
2290 void APInt::dump() const {
2291   SmallString<40> S, U;
2292   this->toStringUnsigned(U);
2293   this->toStringSigned(S);
2294   dbgs() << "APInt(" << BitWidth << "b, "
2295          << U.str() << "u " << S.str() << "s)";
2296 }
2297
2298 void APInt::print(raw_ostream &OS, bool isSigned) const {
2299   SmallString<40> S;
2300   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2301   OS << S.str();
2302 }
2303
2304 // This implements a variety of operations on a representation of
2305 // arbitrary precision, two's-complement, bignum integer values.
2306
2307 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2308 // and unrestricting assumption.
2309 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2310 COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2311
2312 /* Some handy functions local to this file.  */
2313 namespace {
2314
2315   /* Returns the integer part with the least significant BITS set.
2316      BITS cannot be zero.  */
2317   static inline integerPart
2318   lowBitMask(unsigned int bits)
2319   {
2320     assert(bits != 0 && bits <= integerPartWidth);
2321
2322     return ~(integerPart) 0 >> (integerPartWidth - bits);
2323   }
2324
2325   /* Returns the value of the lower half of PART.  */
2326   static inline integerPart
2327   lowHalf(integerPart part)
2328   {
2329     return part & lowBitMask(integerPartWidth / 2);
2330   }
2331
2332   /* Returns the value of the upper half of PART.  */
2333   static inline integerPart
2334   highHalf(integerPart part)
2335   {
2336     return part >> (integerPartWidth / 2);
2337   }
2338
2339   /* Returns the bit number of the most significant set bit of a part.
2340      If the input number has no bits set -1U is returned.  */
2341   static unsigned int
2342   partMSB(integerPart value)
2343   {
2344     unsigned int n, msb;
2345
2346     if (value == 0)
2347       return -1U;
2348
2349     n = integerPartWidth / 2;
2350
2351     msb = 0;
2352     do {
2353       if (value >> n) {
2354         value >>= n;
2355         msb += n;
2356       }
2357
2358       n >>= 1;
2359     } while (n);
2360
2361     return msb;
2362   }
2363
2364   /* Returns the bit number of the least significant set bit of a
2365      part.  If the input number has no bits set -1U is returned.  */
2366   static unsigned int
2367   partLSB(integerPart value)
2368   {
2369     unsigned int n, lsb;
2370
2371     if (value == 0)
2372       return -1U;
2373
2374     lsb = integerPartWidth - 1;
2375     n = integerPartWidth / 2;
2376
2377     do {
2378       if (value << n) {
2379         value <<= n;
2380         lsb -= n;
2381       }
2382
2383       n >>= 1;
2384     } while (n);
2385
2386     return lsb;
2387   }
2388 }
2389
2390 /* Sets the least significant part of a bignum to the input value, and
2391    zeroes out higher parts.  */
2392 void
2393 APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2394 {
2395   unsigned int i;
2396
2397   assert(parts > 0);
2398
2399   dst[0] = part;
2400   for (i = 1; i < parts; i++)
2401     dst[i] = 0;
2402 }
2403
2404 /* Assign one bignum to another.  */
2405 void
2406 APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2407 {
2408   unsigned int i;
2409
2410   for (i = 0; i < parts; i++)
2411     dst[i] = src[i];
2412 }
2413
2414 /* Returns true if a bignum is zero, false otherwise.  */
2415 bool
2416 APInt::tcIsZero(const integerPart *src, unsigned int parts)
2417 {
2418   unsigned int i;
2419
2420   for (i = 0; i < parts; i++)
2421     if (src[i])
2422       return false;
2423
2424   return true;
2425 }
2426
2427 /* Extract the given bit of a bignum; returns 0 or 1.  */
2428 int
2429 APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2430 {
2431   return (parts[bit / integerPartWidth] &
2432           ((integerPart) 1 << bit % integerPartWidth)) != 0;
2433 }
2434
2435 /* Set the given bit of a bignum. */
2436 void
2437 APInt::tcSetBit(integerPart *parts, unsigned int bit)
2438 {
2439   parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2440 }
2441
2442 /* Clears the given bit of a bignum. */
2443 void
2444 APInt::tcClearBit(integerPart *parts, unsigned int bit)
2445 {
2446   parts[bit / integerPartWidth] &=
2447     ~((integerPart) 1 << (bit % integerPartWidth));
2448 }
2449
2450 /* Returns the bit number of the least significant set bit of a
2451    number.  If the input number has no bits set -1U is returned.  */
2452 unsigned int
2453 APInt::tcLSB(const integerPart *parts, unsigned int n)
2454 {
2455   unsigned int i, lsb;
2456
2457   for (i = 0; i < n; i++) {
2458       if (parts[i] != 0) {
2459           lsb = partLSB(parts[i]);
2460
2461           return lsb + i * integerPartWidth;
2462       }
2463   }
2464
2465   return -1U;
2466 }
2467
2468 /* Returns the bit number of the most significant set bit of a number.
2469    If the input number has no bits set -1U is returned.  */
2470 unsigned int
2471 APInt::tcMSB(const integerPart *parts, unsigned int n)
2472 {
2473   unsigned int msb;
2474
2475   do {
2476     --n;
2477
2478     if (parts[n] != 0) {
2479       msb = partMSB(parts[n]);
2480
2481       return msb + n * integerPartWidth;
2482     }
2483   } while (n);
2484
2485   return -1U;
2486 }
2487
2488 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2489    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2490    the least significant bit of DST.  All high bits above srcBITS in
2491    DST are zero-filled.  */
2492 void
2493 APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2494                  unsigned int srcBits, unsigned int srcLSB)
2495 {
2496   unsigned int firstSrcPart, dstParts, shift, n;
2497
2498   dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2499   assert(dstParts <= dstCount);
2500
2501   firstSrcPart = srcLSB / integerPartWidth;
2502   tcAssign (dst, src + firstSrcPart, dstParts);
2503
2504   shift = srcLSB % integerPartWidth;
2505   tcShiftRight (dst, dstParts, shift);
2506
2507   /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2508      in DST.  If this is less that srcBits, append the rest, else
2509      clear the high bits.  */
2510   n = dstParts * integerPartWidth - shift;
2511   if (n < srcBits) {
2512     integerPart mask = lowBitMask (srcBits - n);
2513     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2514                           << n % integerPartWidth);
2515   } else if (n > srcBits) {
2516     if (srcBits % integerPartWidth)
2517       dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2518   }
2519
2520   /* Clear high parts.  */
2521   while (dstParts < dstCount)
2522     dst[dstParts++] = 0;
2523 }
2524
2525 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2526 integerPart
2527 APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2528              integerPart c, unsigned int parts)
2529 {
2530   unsigned int i;
2531
2532   assert(c <= 1);
2533
2534   for (i = 0; i < parts; i++) {
2535     integerPart l;
2536
2537     l = dst[i];
2538     if (c) {
2539       dst[i] += rhs[i] + 1;
2540       c = (dst[i] <= l);
2541     } else {
2542       dst[i] += rhs[i];
2543       c = (dst[i] < l);
2544     }
2545   }
2546
2547   return c;
2548 }
2549
2550 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2551 integerPart
2552 APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2553                   integerPart c, unsigned int parts)
2554 {
2555   unsigned int i;
2556
2557   assert(c <= 1);
2558
2559   for (i = 0; i < parts; i++) {
2560     integerPart l;
2561
2562     l = dst[i];
2563     if (c) {
2564       dst[i] -= rhs[i] + 1;
2565       c = (dst[i] >= l);
2566     } else {
2567       dst[i] -= rhs[i];
2568       c = (dst[i] > l);
2569     }
2570   }
2571
2572   return c;
2573 }
2574
2575 /* Negate a bignum in-place.  */
2576 void
2577 APInt::tcNegate(integerPart *dst, unsigned int parts)
2578 {
2579   tcComplement(dst, parts);
2580   tcIncrement(dst, parts);
2581 }
2582
2583 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2584     DST  = SRC * MULTIPLIER + CARRY   if add is false
2585
2586     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2587     they must start at the same point, i.e. DST == SRC.
2588
2589     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2590     returned.  Otherwise DST is filled with the least significant
2591     DSTPARTS parts of the result, and if all of the omitted higher
2592     parts were zero return zero, otherwise overflow occurred and
2593     return one.  */
2594 int
2595 APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2596                       integerPart multiplier, integerPart carry,
2597                       unsigned int srcParts, unsigned int dstParts,
2598                       bool add)
2599 {
2600   unsigned int i, n;
2601
2602   /* Otherwise our writes of DST kill our later reads of SRC.  */
2603   assert(dst <= src || dst >= src + srcParts);
2604   assert(dstParts <= srcParts + 1);
2605
2606   /* N loops; minimum of dstParts and srcParts.  */
2607   n = dstParts < srcParts ? dstParts: srcParts;
2608
2609   for (i = 0; i < n; i++) {
2610     integerPart low, mid, high, srcPart;
2611
2612       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2613
2614          This cannot overflow, because
2615
2616          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2617
2618          which is less than n^2.  */
2619
2620     srcPart = src[i];
2621
2622     if (multiplier == 0 || srcPart == 0)        {
2623       low = carry;
2624       high = 0;
2625     } else {
2626       low = lowHalf(srcPart) * lowHalf(multiplier);
2627       high = highHalf(srcPart) * highHalf(multiplier);
2628
2629       mid = lowHalf(srcPart) * highHalf(multiplier);
2630       high += highHalf(mid);
2631       mid <<= integerPartWidth / 2;
2632       if (low + mid < low)
2633         high++;
2634       low += mid;
2635
2636       mid = highHalf(srcPart) * lowHalf(multiplier);
2637       high += highHalf(mid);
2638       mid <<= integerPartWidth / 2;
2639       if (low + mid < low)
2640         high++;
2641       low += mid;
2642
2643       /* Now add carry.  */
2644       if (low + carry < low)
2645         high++;
2646       low += carry;
2647     }
2648
2649     if (add) {
2650       /* And now DST[i], and store the new low part there.  */
2651       if (low + dst[i] < low)
2652         high++;
2653       dst[i] += low;
2654     } else
2655       dst[i] = low;
2656
2657     carry = high;
2658   }
2659
2660   if (i < dstParts) {
2661     /* Full multiplication, there is no overflow.  */
2662     assert(i + 1 == dstParts);
2663     dst[i] = carry;
2664     return 0;
2665   } else {
2666     /* We overflowed if there is carry.  */
2667     if (carry)
2668       return 1;
2669
2670     /* We would overflow if any significant unwritten parts would be
2671        non-zero.  This is true if any remaining src parts are non-zero
2672        and the multiplier is non-zero.  */
2673     if (multiplier)
2674       for (; i < srcParts; i++)
2675         if (src[i])
2676           return 1;
2677
2678     /* We fitted in the narrow destination.  */
2679     return 0;
2680   }
2681 }
2682
2683 /* DST = LHS * RHS, where DST has the same width as the operands and
2684    is filled with the least significant parts of the result.  Returns
2685    one if overflow occurred, otherwise zero.  DST must be disjoint
2686    from both operands.  */
2687 int
2688 APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2689                   const integerPart *rhs, unsigned int parts)
2690 {
2691   unsigned int i;
2692   int overflow;
2693
2694   assert(dst != lhs && dst != rhs);
2695
2696   overflow = 0;
2697   tcSet(dst, 0, parts);
2698
2699   for (i = 0; i < parts; i++)
2700     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2701                                parts - i, true);
2702
2703   return overflow;
2704 }
2705
2706 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2707    operands.  No overflow occurs.  DST must be disjoint from both
2708    operands.  Returns the number of parts required to hold the
2709    result.  */
2710 unsigned int
2711 APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2712                       const integerPart *rhs, unsigned int lhsParts,
2713                       unsigned int rhsParts)
2714 {
2715   /* Put the narrower number on the LHS for less loops below.  */
2716   if (lhsParts > rhsParts) {
2717     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2718   } else {
2719     unsigned int n;
2720
2721     assert(dst != lhs && dst != rhs);
2722
2723     tcSet(dst, 0, rhsParts);
2724
2725     for (n = 0; n < lhsParts; n++)
2726       tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2727
2728     n = lhsParts + rhsParts;
2729
2730     return n - (dst[n - 1] == 0);
2731   }
2732 }
2733
2734 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2735    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2736    set REMAINDER to the remainder, return zero.  i.e.
2737
2738    OLD_LHS = RHS * LHS + REMAINDER
2739
2740    SCRATCH is a bignum of the same size as the operands and result for
2741    use by the routine; its contents need not be initialized and are
2742    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2743 */
2744 int
2745 APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2746                 integerPart *remainder, integerPart *srhs,
2747                 unsigned int parts)
2748 {
2749   unsigned int n, shiftCount;
2750   integerPart mask;
2751
2752   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2753
2754   shiftCount = tcMSB(rhs, parts) + 1;
2755   if (shiftCount == 0)
2756     return true;
2757
2758   shiftCount = parts * integerPartWidth - shiftCount;
2759   n = shiftCount / integerPartWidth;
2760   mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2761
2762   tcAssign(srhs, rhs, parts);
2763   tcShiftLeft(srhs, parts, shiftCount);
2764   tcAssign(remainder, lhs, parts);
2765   tcSet(lhs, 0, parts);
2766
2767   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2768      the total.  */
2769   for (;;) {
2770       int compare;
2771
2772       compare = tcCompare(remainder, srhs, parts);
2773       if (compare >= 0) {
2774         tcSubtract(remainder, srhs, 0, parts);
2775         lhs[n] |= mask;
2776       }
2777
2778       if (shiftCount == 0)
2779         break;
2780       shiftCount--;
2781       tcShiftRight(srhs, parts, 1);
2782       if ((mask >>= 1) == 0)
2783         mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2784   }
2785
2786   return false;
2787 }
2788
2789 /* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2790    There are no restrictions on COUNT.  */
2791 void
2792 APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2793 {
2794   if (count) {
2795     unsigned int jump, shift;
2796
2797     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2798     jump = count / integerPartWidth;
2799     shift = count % integerPartWidth;
2800
2801     while (parts > jump) {
2802       integerPart part;
2803
2804       parts--;
2805
2806       /* dst[i] comes from the two parts src[i - jump] and, if we have
2807          an intra-part shift, src[i - jump - 1].  */
2808       part = dst[parts - jump];
2809       if (shift) {
2810         part <<= shift;
2811         if (parts >= jump + 1)
2812           part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2813       }
2814
2815       dst[parts] = part;
2816     }
2817
2818     while (parts > 0)
2819       dst[--parts] = 0;
2820   }
2821 }
2822
2823 /* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2824    zero.  There are no restrictions on COUNT.  */
2825 void
2826 APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2827 {
2828   if (count) {
2829     unsigned int i, jump, shift;
2830
2831     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2832     jump = count / integerPartWidth;
2833     shift = count % integerPartWidth;
2834
2835     /* Perform the shift.  This leaves the most significant COUNT bits
2836        of the result at zero.  */
2837     for (i = 0; i < parts; i++) {
2838       integerPart part;
2839
2840       if (i + jump >= parts) {
2841         part = 0;
2842       } else {
2843         part = dst[i + jump];
2844         if (shift) {
2845           part >>= shift;
2846           if (i + jump + 1 < parts)
2847             part |= dst[i + jump + 1] << (integerPartWidth - shift);
2848         }
2849       }
2850
2851       dst[i] = part;
2852     }
2853   }
2854 }
2855
2856 /* Bitwise and of two bignums.  */
2857 void
2858 APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2859 {
2860   unsigned int i;
2861
2862   for (i = 0; i < parts; i++)
2863     dst[i] &= rhs[i];
2864 }
2865
2866 /* Bitwise inclusive or of two bignums.  */
2867 void
2868 APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2869 {
2870   unsigned int i;
2871
2872   for (i = 0; i < parts; i++)
2873     dst[i] |= rhs[i];
2874 }
2875
2876 /* Bitwise exclusive or of two bignums.  */
2877 void
2878 APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2879 {
2880   unsigned int i;
2881
2882   for (i = 0; i < parts; i++)
2883     dst[i] ^= rhs[i];
2884 }
2885
2886 /* Complement a bignum in-place.  */
2887 void
2888 APInt::tcComplement(integerPart *dst, unsigned int parts)
2889 {
2890   unsigned int i;
2891
2892   for (i = 0; i < parts; i++)
2893     dst[i] = ~dst[i];
2894 }
2895
2896 /* Comparison (unsigned) of two bignums.  */
2897 int
2898 APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2899                  unsigned int parts)
2900 {
2901   while (parts) {
2902       parts--;
2903       if (lhs[parts] == rhs[parts])
2904         continue;
2905
2906       if (lhs[parts] > rhs[parts])
2907         return 1;
2908       else
2909         return -1;
2910     }
2911
2912   return 0;
2913 }
2914
2915 /* Increment a bignum in-place, return the carry flag.  */
2916 integerPart
2917 APInt::tcIncrement(integerPart *dst, unsigned int parts)
2918 {
2919   unsigned int i;
2920
2921   for (i = 0; i < parts; i++)
2922     if (++dst[i] != 0)
2923       break;
2924
2925   return i == parts;
2926 }
2927
2928 /* Set the least significant BITS bits of a bignum, clear the
2929    rest.  */
2930 void
2931 APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2932                                  unsigned int bits)
2933 {
2934   unsigned int i;
2935
2936   i = 0;
2937   while (bits > integerPartWidth) {
2938     dst[i++] = ~(integerPart) 0;
2939     bits -= integerPartWidth;
2940   }
2941
2942   if (bits)
2943     dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2944
2945   while (i < parts)
2946     dst[i++] = 0;
2947 }