compiler-rt/lib/builtins/udivmodti4.c

   1 //===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements __udivmodti4 for the compiler_rt library.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "int_lib.h"
  14
  15 #ifdef CRT_HAS_128BIT
  16
  17 // Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
  18 // Remainder stored in r.
  19 // Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
  20 // fallback. For a correctness proof see the reference for this algorithm
  21 // in Knuth, Volume 2, section 4.3.1, Algorithm D.
  22 UNUSED
  23 static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
  24                                             du_int *r) {
  25   const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
  26   const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
  27   du_int un1, un0;                                // Norm. dividend LSD's
  28   du_int vn1, vn0;                                // Norm. divisor digits
  29   du_int q1, q0;                                  // Quotient digits
  30   du_int un64, un21, un10;                        // Dividend digit pairs
  31   du_int rhat;                                    // A remainder
  32   si_int s;                                       // Shift amount for normalization
  33
  34   s = __builtin_clzll(v);
  35   if (s > 0) {
  36     // Normalize the divisor.
  37     v = v << s;
  38     un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
  39     un10 = u0 << s; // Shift dividend left
  40   } else {
  41     // Avoid undefined behavior of (u0 >> 64).
  42     un64 = u1;
  43     un10 = u0;
  44   }
  45
  46   // Break divisor up into two 32-bit digits.
  47   vn1 = v >> (n_udword_bits / 2);
  48   vn0 = v & 0xFFFFFFFF;
  49
  50   // Break right half of dividend into two digits.
  51   un1 = un10 >> (n_udword_bits / 2);
  52   un0 = un10 & 0xFFFFFFFF;
  53
  54   // Compute the first quotient digit, q1.
  55   q1 = un64 / vn1;
  56   rhat = un64 - q1 * vn1;
  57
  58   // q1 has at most error 2. No more than 2 iterations.
  59   while (q1 >= b || q1 * vn0 > b * rhat + un1) {
  60     q1 = q1 - 1;
  61     rhat = rhat + vn1;
  62     if (rhat >= b)
  63       break;
  64   }
  65
  66   un21 = un64 * b + un1 - q1 * v;
  67
  68   // Compute the second quotient digit.
  69   q0 = un21 / vn1;
  70   rhat = un21 - q0 * vn1;
  71
  72   // q0 has at most error 2. No more than 2 iterations.
  73   while (q0 >= b || q0 * vn0 > b * rhat + un0) {
  74     q0 = q0 - 1;
  75     rhat = rhat + vn1;
  76     if (rhat >= b)
  77       break;
  78   }
  79
  80   *r = (un21 * b + un0 - q0 * v) >> s;
  81   return q1 * b + q0;
  82 }
  83
  84 static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
  85                                      du_int *r) {
  86 #if defined(__x86_64__)
  87   du_int result;
  88   __asm__("divq %[v]"
  89           : "=a"(result), "=d"(*r)
  90           : [ v ] "r"(v), "a"(u0), "d"(u1));
  91   return result;
  92 #else
  93   return udiv128by64to64default(u1, u0, v, r);
  94 #endif
  95 }
  96
  97 // Effects: if rem != 0, *rem = a % b
  98 // Returns: a / b
  99
 100 COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
 101   const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
 102   utwords dividend;
 103   dividend.all = a;
 104   utwords divisor;
 105   divisor.all = b;
 106   utwords quotient;
 107   utwords remainder;
 108   if (divisor.all > dividend.all) {
 109     if (rem)
 110       *rem = dividend.all;
 111     return 0;
 112   }
 113   // When the divisor fits in 64 bits, we can use an optimized path.
 114   if (divisor.s.high == 0) {
 115     remainder.s.high = 0;
 116     if (dividend.s.high < divisor.s.low) {
 117       // The result fits in 64 bits.
 118       quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
 119                                        divisor.s.low, &remainder.s.low);
 120       quotient.s.high = 0;
 121     } else {
 122       // First, divide with the high part to get the remainder in dividend.s.high.
 123       // After that dividend.s.high < divisor.s.low.
 124       quotient.s.high = dividend.s.high / divisor.s.low;
 125       dividend.s.high = dividend.s.high % divisor.s.low;
 126       quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
 127                                        divisor.s.low, &remainder.s.low);
 128     }
 129     if (rem)
 130       *rem = remainder.all;
 131     return quotient.all;
 132   }
 133   // 0 <= shift <= 63.
 134   si_int shift =
 135       __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
 136   divisor.all <<= shift;
 137   quotient.s.high = 0;
 138   quotient.s.low = 0;
 139   for (; shift >= 0; --shift) {
 140     quotient.s.low <<= 1;
 141     // Branch free version of.
 142     // if (dividend.all >= divisor.all)
 143     // {
 144     //    dividend.all -= divisor.all;
 145     //    carry = 1;
 146     // }
 147     const ti_int s =
 148         (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
 149     quotient.s.low |= s & 1;
 150     dividend.all -= divisor.all & s;
 151     divisor.all >>= 1;
 152   }
 153   if (rem)
 154     *rem = dividend.all;
 155   return quotient.all;
 156 }
 157
 158 #endif // CRT_HAS_128BIT