1 /* SPDX-License-Identifier: GPL-2.0 */
2 .section .text..SHmedia32,"ax"
10 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
14 sub r63,r22,r20 // r63 == 64 % 64
22 bgt/u r9,r63,tr0 // large_divisor
31 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
32 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
33 the case may be, %0000000000000000 000.11111111111, still */
34 muls.l r1,r4,r4 /* leaving at least one sign bit. */
39 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
41 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
50 mcmpgt.l r21,r63,r21 // See Note 1
56 /* small divisor: need a third divide step */
66 /* could test r3 here to check for divide by zero. */
78 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
79 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
80 the case may be, %0000000000000000 000.11111111111, still */
81 muls.l r1,r4,r4 /* leaving at least one sign bit. */
86 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
88 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
99 bgtu/u r7,r25,tr0 // no_lo_adj
105 /* large_divisor: only needs a few adjustments. */
113 /* Note 1: To shift the result of the second divide stage so that the result
114 always fits into 32 bits, yet we still reduce the rest sufficiently
115 would require a lot of instructions to do the shifts just right. Using
116 the full 64 bit shift result to multiply with the divisor would require
117 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
118 Fortunately, if the upper 32 bits of the shift result are nonzero, we
119 know that the rest after taking this partial result into account will
120 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
121 upper 32 bits of the partial result are nonzero. */