8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / cmd / sgs / rtld.4.x / rem.s
blobb9d64b3173c729bca3b1ac51be5da527eb409031
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 ! .seg "data"
24 ! .asciz "Copyr 1986 Sun Micro"
25 .seg "text"
27 #ident "%Z%%M% %I% %E% SMI"
30 * Copyright 1986 Sun Microsystems, Inc. All rights reserved.
31 * Use is subject to license terms.
35 * divison/remainder
37 * Input is:
38 * dividend -- the thing being divided
39 * divisor -- how many ways to divide
40 * Important parameters:
41 * N -- how many bits per iteration we try to get
42 * as our current guess:
43 * WORDSIZE -- how many bits altogether we're talking about:
44 * obviously:
45 * A derived constant:
46 * TOPBITS -- how many bits are in the top "decade" of a number:
48 * Important variables are:
49 * Q -- the partial quotient under development -- initally 0
50 * R -- the remainder so far -- initially == the dividend
51 * ITER -- number of iterations of the main division loop will
52 * be required. Equal to CEIL( lg2(quotient)/4 )
53 * Note that this is log_base_(2^4) of the quotient.
54 * V -- the current comparand -- initially divisor*2^(ITER*4-1)
55 * Cost:
56 * current estimate for non-large dividend is
57 * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
58 * a large dividend is one greater than 2^(31-4 ) and takes a
59 * different path, as the upper bits of the quotient must be developed
60 * one bit at a time.
63 #include <sys/trap.h>
64 #include <sys/asm_linkage.h>
73 ! working variable
77 * this is the recursive definition of how we develop quotient digits.
78 * it takes three important parameters:
79 * $1 -- the current depth, 1<=$1<=4
80 * $2 -- the current accumulation of quotient bits
81 * 4 -- max depth
82 * We add a new bit to $2 and either recurse or
83 * insert the bits in the quotient.
84 * Dynamic input:
85 * %o3 -- current remainder
86 * %o2 -- current quotient
87 * %o5 -- current comparand
88 * cc -- set on current value of %o3
89 * Dynamic output:
90 * %o3', %o2', %o5', cc'
96 ! RTENTRY(.urem) ! UNSIGNED REMAINDER
97 .global .urem
98 .urem:
99 b divide
100 mov 0,%g1 ! result always positive
102 ! RTENTRY(.rem) ! SIGNED REMAINDER
103 .global .rem
104 .rem:
105 orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
106 bge divide ! if not, skip this junk
107 mov %o0,%g1 ! record sign of result in sign of %g1
108 tst %o1
109 bge 2f
110 tst %o0
111 ! %o1 < 0
112 bge divide
113 neg %o1
115 ! %o0 < 0
116 neg %o0
117 ! FALL THROUGH
120 divide:
121 ! compute size of quotient, scale comparand
122 orcc %o1,%g0,%o5 ! movcc %o1,%o5
123 bnz 0f ! if %o1 != 0
124 mov %o0,%o3
125 ba zero_divide
128 cmp %o3,%o5
129 blu got_result ! if %o3<%o5 already, there's no point in continuing
130 mov 0,%o2
131 sethi %hi(1<<(32-4 -1)),%g2
132 cmp %o3,%g2
133 blu not_really_big
134 mov 0,%o4
136 ! here, the %o0 is >= 2^(31-4) or so. We must be careful here, as
137 ! our usual 4-at-a-shot divide step will cause overflow and havoc. The
138 ! total number of bits in the result here is 4*%o4+%g3, where %g3 <= 4.
139 ! compute %o4, in an unorthodox manner: know we need to Shift %o5 into
140 ! the top decade: so don't even bother to compare to %o3.
142 cmp %o5,%g2
143 bgeu 3f
144 mov 1,%g3
145 sll %o5,4,%o5
146 b 1b
147 inc %o4
148 ! now compute %g3
149 2: addcc %o5,%o5,%o5
150 bcc not_too_big ! bcc not_too_big
151 add %g3,1,%g3
153 ! here if the %o1 overflowed when Shifting
154 ! this means that %o3 has the high-order bit set
155 ! restore %o5 and subtract from %o3
156 sll %g2,4 ,%g2 ! high order bit
157 srl %o5,1,%o5 ! rest of %o5
158 add %o5,%g2,%o5
159 b do_single_div
160 sub %g3,1,%g3
161 not_too_big:
162 3: cmp %o5,%o3
163 blu 2b
165 be do_single_div
167 ! %o5 > %o3: went too far: back up 1 step
168 ! srl %o5,1,%o5
169 ! dec %g3
170 ! do single-bit divide steps
172 ! we have to be careful here. We know that %o3 >= %o5, so we can do the
173 ! first divide step without thinking. BUT, the others are conditional,
174 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
175 ! order bit set in the first step, just falling into the regular
176 ! division loop will mess up the first time around.
177 ! So we unroll slightly...
178 do_single_div:
179 deccc %g3
180 bl end_regular_divide
182 sub %o3,%o5,%o3
183 mov 1,%o2
184 b,a end_single_divloop
185 single_divloop:
186 sll %o2,1,%o2
187 bl 1f
188 srl %o5,1,%o5
189 ! %o3 >= 0
190 sub %o3,%o5,%o3
191 b 2f
192 inc %o2
193 1: ! %o3 < 0
194 add %o3,%o5,%o3
195 dec %o2
197 end_single_divloop:
198 deccc %g3
199 bge single_divloop
200 tst %o3
201 b,a end_regular_divide
203 not_really_big:
205 sll %o5,4,%o5
206 cmp %o5,%o3
207 bleu 1b
208 inccc %o4
209 be got_result
210 dec %o4
211 do_regular_divide:
213 ! do the main division iteration
214 tst %o3
215 ! fall through into divide loop
216 divloop:
217 sll %o2,4,%o2
218 !depth 1, accumulated bits 0
219 bl L.1.16
220 srl %o5,1,%o5
221 ! remainder is positive
222 subcc %o3,%o5,%o3
223 !depth 2, accumulated bits 1
224 bl L.2.17
225 srl %o5,1,%o5
226 ! remainder is positive
227 subcc %o3,%o5,%o3
228 !depth 3, accumulated bits 3
229 bl L.3.19
230 srl %o5,1,%o5
231 ! remainder is positive
232 subcc %o3,%o5,%o3
233 !depth 4, accumulated bits 7
234 bl L.4.23
235 srl %o5,1,%o5
236 ! remainder is positive
237 subcc %o3,%o5,%o3
238 b 9f
239 add %o2, (7*2+1), %o2
241 L.4.23: ! remainder is negative
242 addcc %o3,%o5,%o3
243 b 9f
244 add %o2, (7*2-1), %o2
249 L.3.19: ! remainder is negative
250 addcc %o3,%o5,%o3
251 !depth 4, accumulated bits 5
252 bl L.4.21
253 srl %o5,1,%o5
254 ! remainder is positive
255 subcc %o3,%o5,%o3
256 b 9f
257 add %o2, (5*2+1), %o2
259 L.4.21: ! remainder is negative
260 addcc %o3,%o5,%o3
261 b 9f
262 add %o2, (5*2-1), %o2
270 L.2.17: ! remainder is negative
271 addcc %o3,%o5,%o3
272 !depth 3, accumulated bits 1
273 bl L.3.17
274 srl %o5,1,%o5
275 ! remainder is positive
276 subcc %o3,%o5,%o3
277 !depth 4, accumulated bits 3
278 bl L.4.19
279 srl %o5,1,%o5
280 ! remainder is positive
281 subcc %o3,%o5,%o3
282 b 9f
283 add %o2, (3*2+1), %o2
285 L.4.19: ! remainder is negative
286 addcc %o3,%o5,%o3
287 b 9f
288 add %o2, (3*2-1), %o2
293 L.3.17: ! remainder is negative
294 addcc %o3,%o5,%o3
295 !depth 4, accumulated bits 1
296 bl L.4.17
297 srl %o5,1,%o5
298 ! remainder is positive
299 subcc %o3,%o5,%o3
300 b 9f
301 add %o2, (1*2+1), %o2
303 L.4.17: ! remainder is negative
304 addcc %o3,%o5,%o3
305 b 9f
306 add %o2, (1*2-1), %o2
317 L.1.16: ! remainder is negative
318 addcc %o3,%o5,%o3
319 !depth 2, accumulated bits -1
320 bl L.2.15
321 srl %o5,1,%o5
322 ! remainder is positive
323 subcc %o3,%o5,%o3
324 !depth 3, accumulated bits -1
325 bl L.3.15
326 srl %o5,1,%o5
327 ! remainder is positive
328 subcc %o3,%o5,%o3
329 !depth 4, accumulated bits -1
330 bl L.4.15
331 srl %o5,1,%o5
332 ! remainder is positive
333 subcc %o3,%o5,%o3
334 b 9f
335 add %o2, (-1*2+1), %o2
337 L.4.15: ! remainder is negative
338 addcc %o3,%o5,%o3
339 b 9f
340 add %o2, (-1*2-1), %o2
345 L.3.15: ! remainder is negative
346 addcc %o3,%o5,%o3
347 !depth 4, accumulated bits -3
348 bl L.4.13
349 srl %o5,1,%o5
350 ! remainder is positive
351 subcc %o3,%o5,%o3
352 b 9f
353 add %o2, (-3*2+1), %o2
355 L.4.13: ! remainder is negative
356 addcc %o3,%o5,%o3
357 b 9f
358 add %o2, (-3*2-1), %o2
366 L.2.15: ! remainder is negative
367 addcc %o3,%o5,%o3
368 !depth 3, accumulated bits -3
369 bl L.3.13
370 srl %o5,1,%o5
371 ! remainder is positive
372 subcc %o3,%o5,%o3
373 !depth 4, accumulated bits -5
374 bl L.4.11
375 srl %o5,1,%o5
376 ! remainder is positive
377 subcc %o3,%o5,%o3
378 b 9f
379 add %o2, (-5*2+1), %o2
381 L.4.11: ! remainder is negative
382 addcc %o3,%o5,%o3
383 b 9f
384 add %o2, (-5*2-1), %o2
389 L.3.13: ! remainder is negative
390 addcc %o3,%o5,%o3
391 !depth 4, accumulated bits -7
392 bl L.4.9
393 srl %o5,1,%o5
394 ! remainder is positive
395 subcc %o3,%o5,%o3
396 b 9f
397 add %o2, (-7*2+1), %o2
399 L.4.9: ! remainder is negative
400 addcc %o3,%o5,%o3
401 b 9f
402 add %o2, (-7*2-1), %o2
415 end_regular_divide:
416 deccc %o4
417 bge divloop
418 tst %o3
419 bl,a got_result
420 add %o3,%o1,%o3
423 got_result:
424 tst %g1
425 bl,a 1f
426 neg %o3 ! remainder <- -%o3
429 retl
430 mov %o3,%o0 ! remainder <- %o3
433 zero_divide:
434 ta ST_DIV0 ! divide by zero trap
435 retl ! if handled, ignored, return
436 mov 0, %o0