1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2008 by Jens Arnold
12 * Optimised unsigned integer division for ARMv4
14 * Based on: libgcc routines for ARM cpu.
15 * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
16 * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
17 * Free Software Foundation, Inc.
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version 2
22 * of the License, or (at your option) any later version.
24 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
25 * KIND, either express or implied.
27 ****************************************************************************/
30 /* Codecs should not normally do this, but we need to check a macro, and
31 * codecs.h would confuse the assembler. */
33 .macro ARM_DIV_BODY dividend, divisor, result, curbit
35 mov \result, \dividend
36 mov \curbit, #90 @ 3 * 30, (calculating branch dest)
37 cmp \divisor, \result, lsr #16
38 movls \result,\result, lsr #16
39 subls \curbit, \curbit, #48
40 cmp \divisor, \result, lsr #8
41 movls \result,\result, lsr #8
42 subls \curbit, \curbit, #24
43 cmp \divisor, \result, lsr #4
44 movls \result,\result, lsr #4
45 subls \curbit, \curbit, #12
46 cmp \divisor, \result, lsr #2
47 subls \curbit, \curbit, #6
48 @ Calculation is only done down to shift=2, because the shift=1 step
49 @ would need 3 more cycles, but would only gain 1.5 cycles on average.
51 add pc, pc, \curbit, lsl #2
56 cmp \divisor, \dividend, lsr #shift
57 orrls \result, \result, #(1 << shift)
58 subls \dividend, \dividend, \divisor, lsl #shift
59 .endr @ shift==0 in the .rept would cause a warning for lsr #0
60 cmp \divisor, \dividend
61 orrls \result, \result, #1
62 @subls \dividend, \dividend, \divisor @ correct remainder not needed
65 .macro ARM_DIV2_ORDER divisor, order
67 @ There's exactly one bit set in the divisor, so ffs() can be used
68 @ This is the ffs algorithm devised by D.Seal and posted to
69 @ comp.sys.arm on 16 Feb 1994.
70 adr \order, L_ffs_table
71 orr \divisor, \divisor, \divisor, lsl #4 @ = X * 0x11
72 orr \divisor, \divisor, \divisor, lsl #6 @ = X * 0x451
73 rsb \divisor, \divisor, \divisor, lsl #16 @ = X * 0x0450fbaf
75 ldrb \order, [\order, \divisor, lsr #26]
80 .section .icode,"ax",%progbits
86 .type udiv32_arm,%function
97 ARM_DIV_BODY r0, r1, r2, r3
108 ARM_DIV2_ORDER r1, r2
114 @----------------------------------------------
115 .byte 32, 0, 1, 12, 2, 6, 0, 13 @ 0- 7
116 .byte 3, 0, 7, 0, 0, 0, 0, 14 @ 8-15
117 .byte 10, 4, 0, 0, 8, 0, 0, 25 @ 16-23
118 .byte 0, 0, 0, 0, 0, 21, 27, 15 @ 24-31
119 .byte 31, 11, 5, 0, 0, 0, 0, 0 @ 32-39
120 .byte 9, 0, 0, 24, 0, 0, 20, 26 @ 40-47
121 .byte 30, 0, 0, 0, 0, 23, 0, 19 @ 48-55
122 .byte 29, 0, 22, 18, 28, 17, 16, 0 @ 56-63