arch/blackfin/lib/udivdi3.S

   1 /*
   2  * udivdi3.S - unsigned long long division
   3  *
   4  * Copyright 2003-2007 Analog Devices Inc.
   5  * Enter bugs at http://blackfin.uclinux.org/
   6  *
   7  * Licensed under the GPLv2 or later.
   8  */
   9
  10 #include <linux/linkage.h>
  11
  12 #define CARRY AC0
  13
  14 #ifdef CONFIG_ARITHMETIC_OPS_L1
  15 .section .l1.text
  16 #else
  17 .text
  18 #endif
  19
  20
  21 ENTRY(___udivdi3)
  22    R3 = [SP + 12];
  23    [--SP] = (R7:4, P5:3);
  24
  25    /* Attempt to use divide primitive first; these will handle
  26    **  most cases, and they're quick - avoids stalls incurred by
  27    ** testing for identities.
  28    */
  29
  30    R4 = R2 | R3;
  31    CC = R4 == 0;
  32    IF CC JUMP .LDIV_BY_ZERO;
  33
  34    R4.H = 0x8000;
  35    R4 >>>= 16;                  // R4 now 0xFFFF8000
  36    R5 = R0 | R2;                // If either dividend or
  37    R4 = R5 & R4;                // divisor have bits in
  38    CC = R4;                     // top half or low half's sign
  39    IF CC JUMP .LIDENTS;          // bit, skip builtins.
  40    R4 = R1 | R3;                // Also check top halves
  41    CC = R4;
  42    IF CC JUMP .LIDENTS;
  43
  44    /* Can use the builtins. */
  45
  46    AQ = CC;                     // Clear AQ (CC==0)
  47    DIVQ(R0, R2);
  48    DIVQ(R0, R2);
  49    DIVQ(R0, R2);
  50    DIVQ(R0, R2);
  51    DIVQ(R0, R2);
  52    DIVQ(R0, R2);
  53    DIVQ(R0, R2);
  54    DIVQ(R0, R2);
  55    DIVQ(R0, R2);
  56    DIVQ(R0, R2);
  57    DIVQ(R0, R2);
  58    DIVQ(R0, R2);
  59    DIVQ(R0, R2);
  60    DIVQ(R0, R2);
  61    DIVQ(R0, R2);
  62    DIVQ(R0, R2);
  63    DIVQ(R0, R2);
  64    R0 = R0.L (Z);
  65    R1 = 0;
  66    (R7:4, P5:3) = [SP++];
  67    RTS;
  68
  69 .LIDENTS:
  70    /* Test for common identities. Value to be returned is
  71    ** placed in R6,R7.
  72    */
  73                                 // Check for 0/y, return 0
  74    R4 = R0 | R1;
  75    CC = R4 == 0;
  76    IF CC JUMP .LRETURN_R0;
  77
  78                                 // Check for x/x, return 1
  79    R6 = R0 - R2;                // If x == y, then both R6 and R7 will be zero
  80    R7 = R1 - R3;
  81    R4 = R6 | R7;                // making R4 zero.
  82    R6 += 1;                     // which would now make R6:R7==1.
  83    CC = R4 == 0;
  84    IF CC JUMP .LRETURN_IDENT;
  85
  86                                 // Check for x/1, return x
  87    R6 = R0;
  88    R7 = R1;
  89    CC = R3 == 0;
  90    IF !CC JUMP .Lnexttest;
  91    CC = R2 == 1;
  92    IF CC JUMP .LRETURN_IDENT;
  93
  94 .Lnexttest:
  95    R4.L = ONES R2;              // check for div by power of two which
  96    R5.L = ONES R3;              // can be done using a shift
  97    R6 = PACK (R5.L, R4.L);
  98    CC = R6 == 1;
  99    IF CC JUMP .Lpower_of_two_upper_zero;
 100    R6 = PACK (R4.L, R5.L);
 101    CC = R6 == 1;
 102    IF CC JUMP .Lpower_of_two_lower_zero;
 103
 104                                 // Check for x < y, return 0
 105    R6 = 0;
 106    R7 = R6;
 107    CC = R1 < R3 (IU);
 108    IF CC JUMP .LRETURN_IDENT;
 109    CC = R1 == R3;
 110    IF !CC JUMP .Lno_idents;
 111    CC = R0 < R2 (IU);
 112    IF CC JUMP .LRETURN_IDENT;
 113
 114 .Lno_idents:                    // Idents don't match. Go for the full operation
 115
 116
 117    // If X, or X and Y have high bit set, it'll affect the
 118    // results, so shift right one to stop this. Note: we've already
 119    // checked that X >= Y, so Y's msb won't be set unless X's
 120    // is.
 121
 122    R4 = 0;
 123    CC = R1 < 0;
 124    IF !CC JUMP .Lx_msb_clear;
 125    CC = !CC;                   // 1 -> 0;
 126    R1 = ROT R1 BY -1;          // Shift X >> 1
 127    R0 = ROT R0 BY -1;          // lsb -> CC
 128    BITSET(R4,31);              // to record only x msb was set
 129    CC = R3 < 0;
 130    IF !CC JUMP .Ly_msb_clear;
 131    CC = !CC;
 132    R3 = ROT R3 BY -1;          // Shift Y >> 1
 133    R2 = ROT R2 BY -1;
 134    BITCLR(R4,31);              // clear bit to record only x msb was set
 135
 136 .Ly_msb_clear:
 137 .Lx_msb_clear:
 138    // Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits
 139    // were lost, so we should shift result left by one.
 140
 141    [--SP] = R4;                // save for later
 142
 143    // In the loop that follows, each iteration we add
 144    // either Y' or -Y' to the Remainder. We compute the
 145    // negated Y', and store, for convenience. Y' goes
 146    // into P0:P1, while -Y' goes into P2:P3.
 147
 148    P0 = R2;
 149    P1 = R3;
 150    R2 = -R2;
 151    CC = CARRY;
 152    CC = !CC;
 153    R4 = CC;
 154    R3 = -R3;
 155    R3 = R3 - R4;
 156
 157    R6 = 0;                     // remainder = 0
 158    R7 = R6;
 159
 160    [--SP] = R2; P2 = SP;
 161    [--SP] = R3; P3 = SP;
 162    [--SP] = R6; P5 = SP;       // AQ = 0
 163    [--SP] = P1;
 164
 165    /* In the loop that follows, we use the following
 166    ** register assignments:
 167    ** R0,R1 X, workspace
 168    ** R2,R3 Y, workspace
 169    ** R4,R5 partial Div
 170    ** R6,R7 partial remainder
 171    ** P5 AQ
 172    ** The remainder and div form a 128-bit number, with
 173    ** the remainder in the high 64-bits.
 174    */
 175    R4 = R0;                    // Div = X'
 176    R5 = R1;
 177    R3 = 0;
 178
 179    P4 = 64;                    // Iterate once per bit
 180    LSETUP(.LULST,.LULEND) LC0 = P4;
 181 .LULST:
 182         /* Shift Div and remainder up by one. The bit shifted
 183         ** out of the top of the quotient is shifted into the bottom
 184         ** of the remainder.
 185         */
 186         CC = R3;
 187         R4 = ROT R4 BY 1;
 188         R5 = ROT R5 BY 1 ||        // low q to high q
 189              R2 = [P5];            // load saved AQ
 190         R6 = ROT R6 BY 1 ||        // high q to low r
 191              R0 = [P2];            // load -Y'
 192         R7 = ROT R7 BY 1 ||        // low r to high r
 193              R1 = [P3];
 194
 195                                    // Assume add -Y'
 196         CC = R2 < 0;               // But if AQ is set...
 197         IF CC R0 = P0;             // then add Y' instead
 198         IF CC R1 = P1;
 199
 200         R6 = R6 + R0;              // Rem += (Y' or -Y')
 201         CC = CARRY;
 202         R0 = CC;
 203         R7 = R7 + R1;
 204         R7 = R7 + R0 (NS) ||
 205              R1 = [SP];
 206                                    // Set the next AQ bit
 207         R1 = R7 ^ R1;              // from Remainder and Y'
 208         R1 = R1 >> 31 ||           // Negate AQ's value, and
 209              [P5] = R1;            // save next AQ
 210         BITTGL(R1, 0);             // add neg AQ  to the Div
 211 .LULEND: R4 = R4 + R1;
 212
 213    R6 = [SP + 16];
 214
 215    R0 = R4;
 216    R1 = R5;
 217    CC = BITTST(R6,30);         // Just set CC=0
 218    R4 = ROT R0 BY 1;           // but if we had to shift X,
 219    R5 = ROT R1 BY 1;           // and didn't shift any bits out,
 220    CC = BITTST(R6,31);         // then the result will be half as
 221    IF CC R0 = R4;              // much as required, so shift left
 222    IF CC R1 = R5;              // one space.
 223
 224    SP += 20;
 225    (R7:4, P5:3) = [SP++];
 226    RTS;
 227
 228 .Lpower_of_two:
 229    /* Y has a single bit set, which means it's a power of two.
 230    ** That means we can perform the division just by shifting
 231    ** X to the right the appropriate number of bits
 232    */
 233
 234    /* signbits returns the number of sign bits, minus one.
 235    ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
 236    ** to shift right n-signbits spaces. It also means 0x80000000
 237    ** is a special case, because that *also* gives a signbits of 0
 238    */
 239 .Lpower_of_two_lower_zero:
 240    R7 = 0;
 241    R6 = R1 >> 31;
 242    CC = R3 < 0;
 243    IF CC JUMP .LRETURN_IDENT;
 244
 245    R2.L = SIGNBITS R3;
 246    R2 = R2.L (Z);
 247    R2 += -62;
 248    (R7:4, P5:3) = [SP++];
 249    JUMP ___lshftli;
 250
 251 .Lpower_of_two_upper_zero:
 252    CC = R2 < 0;
 253    IF CC JUMP .Lmaxint_shift;
 254
 255    R2.L = SIGNBITS R2;
 256    R2 = R2.L (Z);
 257    R2 += -30;
 258    (R7:4, P5:3) = [SP++];
 259    JUMP ___lshftli;
 260
 261 .Lmaxint_shift:
 262    R2 = -31;
 263    (R7:4, P5:3) = [SP++];
 264    JUMP ___lshftli;
 265
 266 .LRETURN_IDENT:
 267    R0 = R6;
 268    R1 = R7;
 269 .LRETURN_R0:
 270    (R7:4, P5:3) = [SP++];
 271    RTS;
 272 .LDIV_BY_ZERO:
 273    R0 = ~R2;
 274    R1 = R0;
 275    (R7:4, P5:3) = [SP++];
 276    RTS;
 277
 278 ENDPROC(___udivdi3)
 279
 280
 281 ENTRY(___lshftli)
 282         CC = R2 == 0;
 283         IF CC JUMP .Lfinished;  // nothing to do
 284         CC = R2 < 0;
 285         IF CC JUMP .Lrshift;
 286         R3 = 64;
 287         CC = R2 < R3;
 288         IF !CC JUMP .Lretzero;
 289
 290         // We're shifting left, and it's less than 64 bits, so
 291         // a valid result will be returned.
 292
 293         R3 >>= 1;       // R3 now 32
 294         CC = R2 < R3;
 295
 296         IF !CC JUMP .Lzerohalf;
 297
 298         // We're shifting left, between 1 and 31 bits, which means
 299         // some of the low half will be shifted into the high half.
 300         // Work out how much.
 301
 302         R3 = R3 - R2;
 303
 304         // Save that much data from the bottom half.
 305
 306         P1 = R7;
 307         R7 = R0;
 308         R7 >>= R3;
 309
 310         // Adjust both parts of the parameter.
 311
 312         R0 <<= R2;
 313         R1 <<= R2;
 314
 315         // And include the bits moved across.
 316
 317         R1 = R1 | R7;
 318         R7 = P1;
 319         RTS;
 320
 321 .Lzerohalf:
 322         // We're shifting left, between 32 and 63 bits, so the
 323         // bottom half will become zero, and the top half will
 324         // lose some bits. How many?
 325
 326         R2 = R2 - R3;   // N - 32
 327         R1 = LSHIFT R0 BY R2.L;
 328         R0 = R0 - R0;
 329         RTS;
 330
 331 .Lretzero:
 332         R0 = R0 - R0;
 333         R1 = R0;
 334 .Lfinished:
 335         RTS;
 336
 337 .Lrshift:
 338         // We're shifting right, but by how much?
 339         R2 = -R2;
 340         R3 = 64;
 341         CC = R2 < R3;
 342         IF !CC JUMP .Lretzero;
 343
 344         // Shifting right less than 64 bits, so some result bits will
 345         // be retained.
 346
 347         R3 >>= 1;       // R3 now 32
 348         CC = R2 < R3;
 349         IF !CC JUMP .Lsignhalf;
 350
 351         // Shifting right between 1 and 31 bits, so need to copy
 352         // data across words.
 353
 354         P1 = R7;
 355         R3 = R3 - R2;
 356         R7 = R1;
 357         R7 <<= R3;
 358         R1 >>= R2;
 359         R0 >>= R2;
 360         R0 = R7 | R0;
 361         R7 = P1;
 362         RTS;
 363
 364 .Lsignhalf:
 365         // Shifting right between 32 and 63 bits, so the top half
 366         // will become all zero-bits, and the bottom half is some
 367         // of the top half. But how much?
 368
 369         R2 = R2 - R3;
 370         R0 = R1;
 371         R0 >>= R2;
 372         R1 = 0;
 373         RTS;
 374
 375 ENDPROC(___lshftli)