lib/udivmoddi4.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, see the file COPYING, or write
  16  * to the Free Software Foundation, Inc.
  17  */
  18
  19 #include <linux/libgcc.h>
  20
  21 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz(X))
  22
  23 #define W_TYPE_SIZE 32
  24
  25 #define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2))
  26 #define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1))
  27 #define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2))
  28
  29 /* If we still don't have umul_ppmm, define it using plain C. */
  30 #if !defined(umul_ppmm)
  31 #define umul_ppmm(w1, w0, u, v)                                         \
  32         do {                                                            \
  33                 unsigned long __x0, __x1, __x2, __x3;                   \
  34                 unsigned short __ul, __vl, __uh, __vh;                  \
  35                                                                         \
  36                 __ul = __ll_lowpart(u);                                 \
  37                 __uh = __ll_highpart(u);                                \
  38                 __vl = __ll_lowpart(v);                                 \
  39                 __vh = __ll_highpart(v);                                \
  40                                                                         \
  41                 __x0 = (unsigned long) __ul * __vl;                     \
  42                 __x1 = (unsigned long) __ul * __vh;                     \
  43                 __x2 = (unsigned long) __uh * __vl;                     \
  44                 __x3 = (unsigned long) __uh * __vh;                     \
  45                                                                         \
  46                 __x1 += __ll_highpart(__x0);                            \
  47                 __x1 += __x2;                                           \
  48                 if (__x1 < __x2)                                        \
  49                         __x3 += __ll_B;                                 \
  50                                                                         \
  51                 (w1) = __x3 + __ll_highpart(__x1);                      \
  52                 (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
  53         } while (0)
  54 #endif
  55
  56 #if !defined(sub_ddmmss)
  57 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
  58         do {                                                            \
  59                 unsigned long __x;                                      \
  60                 __x = (al) - (bl);                                      \
  61                 (sh) = (ah) - (bh) - (__x > (al));                      \
  62                 (sl) = __x;                                             \
  63         } while (0)
  64 #endif
  65
  66 /* Define this unconditionally, so it can be used for debugging. */
  67 #define __udiv_qrnnd_c(q, r, n1, n0, d)                                 \
  68         do {                                                            \
  69                 unsigned long __d1, __d0, __q1, __q0;                   \
  70                 unsigned long __r1, __r0, __m;                          \
  71                 __d1 = __ll_highpart(d);                                \
  72                 __d0 = __ll_lowpart(d);                         \
  73                                                                         \
  74                 __r1 = (n1) % __d1;                                     \
  75                 __q1 = (n1) / __d1;                                     \
  76                 __m = (unsigned long) __q1 * __d0;                      \
  77                 __r1 = __r1 * __ll_B | __ll_highpart(n0);               \
  78                 if (__r1 < __m) {                                       \
  79                         __q1--, __r1 += (d);                            \
  80                         if (__r1 >= (d))                                \
  81                                 if (__r1 < __m)                         \
  82                                         __q1--, __r1 += (d);            \
  83                 }                                                       \
  84                 __r1 -= __m;                                            \
  85                                                                         \
  86                 __r0 = __r1 % __d1;                                     \
  87                 __q0 = __r1 / __d1;                                     \
  88                 __m = (unsigned long) __q0 * __d0;                      \
  89                 __r0 = __r0 * __ll_B | __ll_lowpart(n0);                \
  90                 if (__r0 < __m) {                                       \
  91                         __q0--, __r0 += (d);                            \
  92                         if (__r0 >= (d))                                \
  93                                 if (__r0 < __m)                         \
  94                                         __q0--, __r0 += (d);            \
  95                 }                                                       \
  96                 __r0 -= __m;                                            \
  97                                                                         \
  98                 (q) = (unsigned long) __q1 * __ll_B | __q0;             \
  99                 (r) = __r0;                                             \
 100         } while (0)
 101
 102 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
 103 #if !defined(udiv_qrnnd)
 104 #define UDIV_NEEDS_NORMALIZATION 1
 105 #define udiv_qrnnd __udiv_qrnnd_c
 106 #endif
 107
 108 unsigned long long __udivmoddi4(unsigned long long u, unsigned long long v,
 109                                 unsigned long long *rp)
 110 {
 111         const DWunion nn = {.ll = u };
 112         const DWunion dd = {.ll = v };
 113         DWunion rr, ww;
 114         unsigned long d0, d1, n0, n1, n2;
 115         unsigned long q0 = 0, q1 = 0;
 116         unsigned long b, bm;
 117
 118         d0 = dd.s.low;
 119         d1 = dd.s.high;
 120         n0 = nn.s.low;
 121         n1 = nn.s.high;
 122
 123 #if !UDIV_NEEDS_NORMALIZATION
 124
 125         if (d1 == 0) {
 126                 if (d0 > n1) {
 127                         /* 0q = nn / 0D */
 128
 129                         udiv_qrnnd(q0, n0, n1, n0, d0);
 130                         q1 = 0;
 131
 132                         /* Remainder in n0. */
 133                 } else {
 134                         /* qq = NN / 0d */
 135
 136                         if (d0 == 0)
 137                                 /* Divide intentionally by zero. */
 138                                 d0 = 1 / d0;
 139
 140                         udiv_qrnnd(q1, n1, 0, n1, d0);
 141                         udiv_qrnnd(q0, n0, n1, n0, d0);
 142
 143                         /* Remainder in n0. */
 144                 }
 145
 146                 if (rp != 0) {
 147                         rr.s.low = n0;
 148                         rr.s.high = 0;
 149                         *rp = rr.ll;
 150                 }
 151
 152 #else /* UDIV_NEEDS_NORMALIZATION */
 153
 154         if (d1 == 0) {
 155                 if (d0 > n1) {
 156                         /* 0q = nn / 0D */
 157
 158                         count_leading_zeros(bm, d0);
 159
 160                         if (bm != 0) {
 161                                 /*
 162                                  * Normalize, i.e. make the most significant bit
 163                                  * of the denominator set.
 164                                  */
 165
 166                                 d0 = d0 << bm;
 167                                 n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
 168                                 n0 = n0 << bm;
 169                         }
 170
 171                         udiv_qrnnd(q0, n0, n1, n0, d0);
 172                         q1 = 0;
 173
 174                         /* Remainder in n0 >> bm. */
 175                 } else {
 176                         /* qq = NN / 0d */
 177
 178                         if (d0 == 0)
 179                                 /* Divide intentionally by zero. */
 180                                 d0 = 1 / d0;
 181
 182                         count_leading_zeros(bm, d0);
 183
 184                         if (bm == 0) {
 185                                 /*
 186                                  * From (n1 >= d0) /\ (the most significant bit
 187                                  * of d0 is set), conclude (the most significant
 188                                  * bit of n1 is set) /\ (theleading quotient
 189                                  * digit q1 = 1).
 190                                  *
 191                                  * This special case is necessary, not an
 192                                  * optimization. (Shifts counts of W_TYPE_SIZE
 193                                  * are undefined.)
 194                                  */
 195
 196                                 n1 -= d0;
 197                                 q1 = 1;
 198                         } else {
 199                                 /* Normalize. */
 200
 201                                 b = W_TYPE_SIZE - bm;
 202
 203                                 d0 = d0 << bm;
 204                                 n2 = n1 >> b;
 205                                 n1 = (n1 << bm) | (n0 >> b);
 206                                 n0 = n0 << bm;
 207
 208                                 udiv_qrnnd(q1, n1, n2, n1, d0);
 209                         }
 210
 211                         /* n1 != d0... */
 212
 213                         udiv_qrnnd(q0, n0, n1, n0, d0);
 214
 215                         /* Remainder in n0 >> bm. */
 216                 }
 217
 218                 if (rp != 0) {
 219                         rr.s.low = n0 >> bm;
 220                         rr.s.high = 0;
 221                         *rp = rr.ll;
 222                 }
 223
 224 #endif /* UDIV_NEEDS_NORMALIZATION */
 225
 226         } else {
 227                 if (d1 > n1) {
 228                         /* 00 = nn / DD */
 229
 230                         q0 = 0;
 231                         q1 = 0;
 232
 233                         /* Remainder in n1n0. */
 234                         if (rp != 0) {
 235                                 rr.s.low = n0;
 236                                 rr.s.high = n1;
 237                                 *rp = rr.ll;
 238                         }
 239                 } else {
 240                         /* 0q = NN / dd */
 241
 242                         count_leading_zeros(bm, d1);
 243                         if (bm == 0) {
 244                                 /*
 245                                  * From (n1 >= d1) /\ (the most significant bit
 246                                  * of d1 is set), conclude (the most significant
 247                                  * bit of n1 is set) /\ (the quotient digit q0 =
 248                                  * 0 or 1).
 249                                  *
 250                                  * This special case is necessary, not an
 251                                  * optimization.
 252                                  */
 253
 254                                 /*
 255                                  * The condition on the next line takes
 256                                  * advantage of that n1 >= d1 (true due to
 257                                  * program flow).
 258                                  */
 259                                 if (n1 > d1 || n0 >= d0) {
 260                                         q0 = 1;
 261                                         sub_ddmmss(n1, n0, n1, n0, d1, d0);
 262                                 } else {
 263                                         q0 = 0;
 264                                 }
 265
 266                                 q1 = 0;
 267
 268                                 if (rp != 0) {
 269                                         rr.s.low = n0;
 270                                         rr.s.high = n1;
 271                                         *rp = rr.ll;
 272                                 }
 273                         } else {
 274                                 unsigned long m1, m0;
 275                                 /* Normalize. */
 276
 277                                 b = W_TYPE_SIZE - bm;
 278
 279                                 d1 = (d1 << bm) | (d0 >> b);
 280                                 d0 = d0 << bm;
 281                                 n2 = n1 >> b;
 282                                 n1 = (n1 << bm) | (n0 >> b);
 283                                 n0 = n0 << bm;
 284
 285                                 udiv_qrnnd(q0, n1, n2, n1, d1);
 286                                 umul_ppmm(m1, m0, q0, d0);
 287
 288                                 if (m1 > n1 || (m1 == n1 && m0 > n0)) {
 289                                         q0--;
 290                                         sub_ddmmss(m1, m0, m1, m0, d1, d0);
 291                                 }
 292
 293                                 q1 = 0;
 294
 295                                 /* Remainder in (n1n0 - m1m0) >> bm. */
 296                                 if (rp != 0) {
 297                                         sub_ddmmss(n1, n0, n1, n0, m1, m0);
 298                                         rr.s.low = (n1 << b) | (n0 >> bm);
 299                                         rr.s.high = n1 >> bm;
 300                                         *rp = rr.ll;
 301                                 }
 302                         }
 303                 }
 304         }
 305
 306         ww.s.low = q0;
 307         ww.s.high = q1;
 308
 309         return ww.ll;
 310 }