arch/arm/lib/div64.S

   1 /*
   2  *  linux/arch/arm/lib/div64.S
   3  *
   4  *  Optimized computation of 64-bit dividend / 32-bit divisor
   5  *
   6  *  Author:     Nicolas Pitre
   7  *  Created:    Oct 5, 2003
   8  *  Copyright:  Monta Vista Software, Inc.
   9  *
  10  *  This program is free software; you can redistribute it and/or modify
  11  *  it under the terms of the GNU General Public License version 2 as
  12  *  published by the Free Software Foundation.
  13  */
  14
  15 #include <linux/linkage.h>
  16 #include <asm/assembler.h>
  17 #include <asm/unwind.h>
  18
  19 #ifdef __ARMEB__
  20 #define xh r0
  21 #define xl r1
  22 #define yh r2
  23 #define yl r3
  24 #else
  25 #define xl r0
  26 #define xh r1
  27 #define yl r2
  28 #define yh r3
  29 #endif
  30
  31 /*
  32  * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  33  *
  34  * Note: Calling convention is totally non standard for optimal code.
  35  *       This is meant to be used by do_div() from include/asm/div64.h only.
  36  *
  37  * Input parameters:
  38  *      xh-xl   = dividend (clobbered)
  39  *      r4      = divisor (preserved)
  40  *
  41  * Output values:
  42  *      yh-yl   = result
  43  *      xh      = remainder
  44  *
  45  * Clobbered regs: xl, ip
  46  */
  47
  48 ENTRY(__do_div64)
  49 UNWIND(.fnstart)
  50
  51         @ Test for easy paths first.
  52         subs    ip, r4, #1
  53         bls     9f                      @ divisor is 0 or 1
  54         tst     ip, r4
  55         beq     8f                      @ divisor is power of 2
  56
  57         @ See if we need to handle upper 32-bit result.
  58         cmp     xh, r4
  59         mov     yh, #0
  60         blo     3f
  61
  62         @ Align divisor with upper part of dividend.
  63         @ The aligned divisor is stored in yl preserving the original.
  64         @ The bit position is stored in ip.
  65
  66 #if __LINUX_ARM_ARCH__ >= 5
  67
  68         clz     yl, r4
  69         clz     ip, xh
  70         sub     yl, yl, ip
  71         mov     ip, #1
  72         mov     ip, ip, lsl yl
  73         mov     yl, r4, lsl yl
  74
  75 #else
  76
  77         mov     yl, r4
  78         mov     ip, #1
  79 1:      cmp     yl, #0x80000000
  80         cmpcc   yl, xh
  81         movcc   yl, yl, lsl #1
  82         movcc   ip, ip, lsl #1
  83         bcc     1b
  84
  85 #endif
  86
  87         @ The division loop for needed upper bit positions.
  88         @ Break out early if dividend reaches 0.
  89 2:      cmp     xh, yl
  90         orrcs   yh, yh, ip
  91         subcss  xh, xh, yl
  92         movnes  ip, ip, lsr #1
  93         mov     yl, yl, lsr #1
  94         bne     2b
  95
  96         @ See if we need to handle lower 32-bit result.
  97 3:      cmp     xh, #0
  98         mov     yl, #0
  99         cmpeq   xl, r4
 100         movlo   xh, xl
 101         retlo   lr
 102
 103         @ The division loop for lower bit positions.
 104         @ Here we shift remainer bits leftwards rather than moving the
 105         @ divisor for comparisons, considering the carry-out bit as well.
 106         mov     ip, #0x80000000
 107 4:      movs    xl, xl, lsl #1
 108         adcs    xh, xh, xh
 109         beq     6f
 110         cmpcc   xh, r4
 111 5:      orrcs   yl, yl, ip
 112         subcs   xh, xh, r4
 113         movs    ip, ip, lsr #1
 114         bne     4b
 115         ret     lr
 116
 117         @ The top part of remainder became zero.  If carry is set
 118         @ (the 33th bit) this is a false positive so resume the loop.
 119         @ Otherwise, if lower part is also null then we are done.
 120 6:      bcs     5b
 121         cmp     xl, #0
 122         reteq   lr
 123
 124         @ We still have remainer bits in the low part.  Bring them up.
 125
 126 #if __LINUX_ARM_ARCH__ >= 5
 127
 128         clz     xh, xl                  @ we know xh is zero here so...
 129         add     xh, xh, #1
 130         mov     xl, xl, lsl xh
 131         mov     ip, ip, lsr xh
 132
 133 #else
 134
 135 7:      movs    xl, xl, lsl #1
 136         mov     ip, ip, lsr #1
 137         bcc     7b
 138
 139 #endif
 140
 141         @ Current remainder is now 1.  It is worthless to compare with
 142         @ divisor at this point since divisor can not be smaller than 3 here.
 143         @ If possible, branch for another shift in the division loop.
 144         @ If no bit position left then we are done.
 145         movs    ip, ip, lsr #1
 146         mov     xh, #1
 147         bne     4b
 148         ret     lr
 149
 150 8:      @ Division by a power of 2: determine what that divisor order is
 151         @ then simply shift values around
 152
 153 #if __LINUX_ARM_ARCH__ >= 5
 154
 155         clz     ip, r4
 156         rsb     ip, ip, #31
 157
 158 #else
 159
 160         mov     yl, r4
 161         cmp     r4, #(1 << 16)
 162         mov     ip, #0
 163         movhs   yl, yl, lsr #16
 164         movhs   ip, #16
 165
 166         cmp     yl, #(1 << 8)
 167         movhs   yl, yl, lsr #8
 168         addhs   ip, ip, #8
 169
 170         cmp     yl, #(1 << 4)
 171         movhs   yl, yl, lsr #4
 172         addhs   ip, ip, #4
 173
 174         cmp     yl, #(1 << 2)
 175         addhi   ip, ip, #3
 176         addls   ip, ip, yl, lsr #1
 177
 178 #endif
 179
 180         mov     yh, xh, lsr ip
 181         mov     yl, xl, lsr ip
 182         rsb     ip, ip, #32
 183  ARM(   orr     yl, yl, xh, lsl ip      )
 184  THUMB( lsl     xh, xh, ip              )
 185  THUMB( orr     yl, yl, xh              )
 186         mov     xh, xl, lsl ip
 187         mov     xh, xh, lsr ip
 188         ret     lr
 189
 190         @ eq -> division by 1: obvious enough...
 191 9:      moveq   yl, xl
 192         moveq   yh, xh
 193         moveq   xh, #0
 194         reteq   lr
 195 UNWIND(.fnend)
 196
 197 UNWIND(.fnstart)
 198 UNWIND(.pad #4)
 199 UNWIND(.save {lr})
 200 Ldiv0_64:
 201         @ Division by 0:
 202         str     lr, [sp, #-8]!
 203         bl      __div0
 204
 205         @ as wrong as it could be...
 206         mov     yl, #0
 207         mov     yh, #0
 208         mov     xh, #0
 209         ldr     pc, [sp], #8
 210
 211 UNWIND(.fnend)
 212 ENDPROC(__do_div64)