1 //===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the __udivsi3 (32-bit unsigned integer divide)
10 // function for the ARM 32-bit architecture.
12 //===----------------------------------------------------------------------===//
14 #include "../assembly.h"
22 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
24 @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
25 @ Calculate and return the quotient of the (unsigned) division.
27 DEFINE_COMPILERRT_FUNCTION(__udivsi3)
28 #if __ARM_ARCH_EXT_IDIV__
30 beq LOCAL_LABEL(divby0)
35 // Use movs for compatibility with v8-m.base.
43 #else // ! __ARM_ARCH_EXT_IDIV__
45 bcc LOCAL_LABEL(divby0)
46 #if defined(USE_THUMB_1)
47 bne LOCAL_LABEL(num_neq_denom)
49 LOCAL_LABEL(num_neq_denom):
55 #if defined(USE_THUMB_1)
56 bhs LOCAL_LABEL(num_ge_denom)
59 LOCAL_LABEL(num_ge_denom):
66 // Implement division using binary long division algorithm.
68 // r0 is the numerator, r1 the denominator.
70 // The code before JMP computes the correct shift I, so that
71 // r0 and (r1 << I) have the highest bit set in the same position.
72 // At the time of JMP, ip := .Ldiv0block - 12 * I.
73 // This depends on the fixed instruction size of block.
74 // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
76 // block(shift) implements the test-and-update-quotient core.
77 // It assumes (r0 << shift) can be computed without overflow and
78 // that (r0 << shift) < 2 * r1. The quotient is stored in r3.
80 # if defined(__ARM_FEATURE_CLZ)
83 // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
85 # if defined(USE_THUMB_2)
86 adr ip, LOCAL_LABEL(div0block) + 1
87 sub ip, ip, r3, lsl #1
89 adr ip, LOCAL_LABEL(div0block)
91 sub ip, ip, r3, lsl #2
92 sub ip, ip, r3, lsl #3
95 # else // No CLZ Feature
96 # if defined(USE_THUMB_2)
97 # error THUMB mode requires CLZ or UDIV
99 # if defined(USE_THUMB_1)
100 # define BLOCK_SIZE 10
102 # define BLOCK_SIZE 12
106 # if defined(USE_THUMB_1)
108 adr r0, LOCAL_LABEL(div0block)
111 adr ip, LOCAL_LABEL(div0block)
115 # if defined(USE_THUMB_1)
116 blo LOCAL_LABEL(skip_16)
118 subs r0, r0, #(16 * BLOCK_SIZE)
119 LOCAL_LABEL(skip_16):
122 subhs ip, ip, #(16 * BLOCK_SIZE)
127 # if defined(USE_THUMB_1)
128 blo LOCAL_LABEL(skip_8)
130 subs r0, r0, #(8 * BLOCK_SIZE)
134 subhs ip, ip, #(8 * BLOCK_SIZE)
139 # if defined(USE_THUMB_1)
140 blo LOCAL_LABEL(skip_4)
142 subs r0, r0, #(4 * BLOCK_SIZE)
146 subhs ip, #(4 * BLOCK_SIZE)
151 # if defined(USE_THUMB_1)
152 blo LOCAL_LABEL(skip_2)
154 subs r0, r0, #(2 * BLOCK_SIZE)
158 subhs ip, ip, #(2 * BLOCK_SIZE)
161 // Last block, no need to update r2 or r3.
162 # if defined(USE_THUMB_1)
165 blo LOCAL_LABEL(skip_1)
166 subs r0, r0, #(1 * BLOCK_SIZE)
175 subls ip, ip, #(1 * BLOCK_SIZE)
181 # endif // __ARM_FEATURE_CLZ
185 // due to the range limit of branch in Thumb1, we have to place the
189 # if defined(__ARM_EABI__)
191 bl __aeabi_idiv0 // due to relocation limit, can't use b.
198 #if defined(USE_THUMB_1)
199 #define block(shift) \
200 lsls r2, r1, IMM shift; \
202 blo LOCAL_LABEL(block_skip_##shift); \
204 LOCAL_LABEL(block_skip_##shift) :; \
205 adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2.
207 // TODO: if current location counter is not word aligned, we don't
208 // need the .p2align and nop
209 // Label div0block must be word-aligned. First align block 31
211 nop // Padding to align div0block as 31 blocks = 310 bytes
214 #define block(shift) \
215 cmp r0, r1, lsl IMM shift; \
217 WIDE(addhs) r3, r3, IMM (1 << shift); \
218 WIDE(subhs) r0, r0, r1, lsl IMM shift
252 LOCAL_LABEL(div0block):
257 #endif // __ARM_ARCH_EXT_IDIV__
259 END_COMPILERRT_FUNCTION(__udivsi3)
261 NO_EXEC_STACK_DIRECTIVE