arch/arm/lib/div64.S

   1 /*
   2  *  linux/arch/arm/lib/div64.S
   3  *
   4  *  Optimized computation of 64-bit dividend / 32-bit divisor
   5  *
   6  *  Author:     Nicolas Pitre
   7  *  Created:    Oct 5, 2003
   8  *  Copyright:  Monta Vista Software, Inc.
   9  *
  10  *  This program is free software; you can redistribute it and/or modify
  11  *  it under the terms of the GNU General Public License version 2 as
  12  *  published by the Free Software Foundation.
  13  */
  14
  15 #include <linux/linkage.h>
  16 #include <asm/unwind.h>
  17
  18 #ifdef __ARMEB__
  19 #define xh r0
  20 #define xl r1
  21 #define yh r2
  22 #define yl r3
  23 #else
  24 #define xl r0
  25 #define xh r1
  26 #define yl r2
  27 #define yh r3
  28 #endif
  29
  30 /*
  31  * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  32  *
  33  * Note: Calling convention is totally non standard for optimal code.
  34  *       This is meant to be used by do_div() from include/asm/div64.h only.
  35  *
  36  * Input parameters:
  37  *      xh-xl   = dividend (clobbered)
  38  *      r4      = divisor (preserved)
  39  *
  40  * Output values:
  41  *      yh-yl   = result
  42  *      xh      = remainder
  43  *
  44  * Clobbered regs: xl, ip
  45  */
  46
  47 ENTRY(__do_div64)
  48 UNWIND(.fnstart)
  49
  50         @ Test for easy paths first.
  51         subs    ip, r4, #1
  52         bls     9f                      @ divisor is 0 or 1
  53         tst     ip, r4
  54         beq     8f                      @ divisor is power of 2
  55
  56         @ See if we need to handle upper 32-bit result.
  57         cmp     xh, r4
  58         mov     yh, #0
  59         blo     3f
  60
  61         @ Align divisor with upper part of dividend.
  62         @ The aligned divisor is stored in yl preserving the original.
  63         @ The bit position is stored in ip.
  64
  65 #if __LINUX_ARM_ARCH__ >= 5
  66
  67         clz     yl, r4
  68         clz     ip, xh
  69         sub     yl, yl, ip
  70         mov     ip, #1
  71         mov     ip, ip, lsl yl
  72         mov     yl, r4, lsl yl
  73
  74 #else
  75
  76         mov     yl, r4
  77         mov     ip, #1
  78 1:      cmp     yl, #0x80000000
  79         cmpcc   yl, xh
  80         movcc   yl, yl, lsl #1
  81         movcc   ip, ip, lsl #1
  82         bcc     1b
  83
  84 #endif
  85
  86         @ The division loop for needed upper bit positions.
  87         @ Break out early if dividend reaches 0.
  88 2:      cmp     xh, yl
  89         orrcs   yh, yh, ip
  90         subcss  xh, xh, yl
  91         movnes  ip, ip, lsr #1
  92         mov     yl, yl, lsr #1
  93         bne     2b
  94
  95         @ See if we need to handle lower 32-bit result.
  96 3:      cmp     xh, #0
  97         mov     yl, #0
  98         cmpeq   xl, r4
  99         movlo   xh, xl
 100         movlo   pc, lr
 101
 102         @ The division loop for lower bit positions.
 103         @ Here we shift remainer bits leftwards rather than moving the
 104         @ divisor for comparisons, considering the carry-out bit as well.
 105         mov     ip, #0x80000000
 106 4:      movs    xl, xl, lsl #1
 107         adcs    xh, xh, xh
 108         beq     6f
 109         cmpcc   xh, r4
 110 5:      orrcs   yl, yl, ip
 111         subcs   xh, xh, r4
 112         movs    ip, ip, lsr #1
 113         bne     4b
 114         mov     pc, lr
 115
 116         @ The top part of remainder became zero.  If carry is set
 117         @ (the 33th bit) this is a false positive so resume the loop.
 118         @ Otherwise, if lower part is also null then we are done.
 119 6:      bcs     5b
 120         cmp     xl, #0
 121         moveq   pc, lr
 122
 123         @ We still have remainer bits in the low part.  Bring them up.
 124
 125 #if __LINUX_ARM_ARCH__ >= 5
 126
 127         clz     xh, xl                  @ we know xh is zero here so...
 128         add     xh, xh, #1
 129         mov     xl, xl, lsl xh
 130         mov     ip, ip, lsr xh
 131
 132 #else
 133
 134 7:      movs    xl, xl, lsl #1
 135         mov     ip, ip, lsr #1
 136         bcc     7b
 137
 138 #endif
 139
 140         @ Current remainder is now 1.  It is worthless to compare with
 141         @ divisor at this point since divisor can not be smaller than 3 here.
 142         @ If possible, branch for another shift in the division loop.
 143         @ If no bit position left then we are done.
 144         movs    ip, ip, lsr #1
 145         mov     xh, #1
 146         bne     4b
 147         mov     pc, lr
 148
 149 8:      @ Division by a power of 2: determine what that divisor order is
 150         @ then simply shift values around
 151
 152 #if __LINUX_ARM_ARCH__ >= 5
 153
 154         clz     ip, r4
 155         rsb     ip, ip, #31
 156
 157 #else
 158
 159         mov     yl, r4
 160         cmp     r4, #(1 << 16)
 161         mov     ip, #0
 162         movhs   yl, yl, lsr #16
 163         movhs   ip, #16
 164
 165         cmp     yl, #(1 << 8)
 166         movhs   yl, yl, lsr #8
 167         addhs   ip, ip, #8
 168
 169         cmp     yl, #(1 << 4)
 170         movhs   yl, yl, lsr #4
 171         addhs   ip, ip, #4
 172
 173         cmp     yl, #(1 << 2)
 174         addhi   ip, ip, #3
 175         addls   ip, ip, yl, lsr #1
 176
 177 #endif
 178
 179         mov     yh, xh, lsr ip
 180         mov     yl, xl, lsr ip
 181         rsb     ip, ip, #32
 182  ARM(   orr     yl, yl, xh, lsl ip      )
 183  THUMB( lsl     xh, xh, ip              )
 184  THUMB( orr     yl, yl, xh              )
 185         mov     xh, xl, lsl ip
 186         mov     xh, xh, lsr ip
 187         mov     pc, lr
 188
 189         @ eq -> division by 1: obvious enough...
 190 9:      moveq   yl, xl
 191         moveq   yh, xh
 192         moveq   xh, #0
 193         moveq   pc, lr
 194 UNWIND(.fnend)
 195
 196 UNWIND(.fnstart)
 197 UNWIND(.pad #4)
 198 UNWIND(.save {lr})
 199 Ldiv0_64:
 200         @ Division by 0:
 201         str     lr, [sp, #-8]!
 202         bl      __div0
 203
 204         @ as wrong as it could be...
 205         mov     yl, #0
 206         mov     yh, #0
 207         mov     xh, #0
 208         ldr     pc, [sp], #8
 209
 210 UNWIND(.fnend)
 211 ENDPROC(__do_div64)