8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / cmd / sgs / rtld.4.x / div.s
blob5a09c9d960690b25e7f1af0634c152147442ccbf
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 ! .seg "data"
24 ! .asciz "Copyr 1986 Sun Micro"
25 .seg "text"
27 #ident "%Z%%M% %I% %E% SMI"
30 * Copyright 1986 Sun Microsystems, Inc. All rights reserved.
31 * Use is subject to license terms.
35 * divison/remainder
37 * Input is:
38 * dividend -- the thing being divided
39 * divisor -- how many ways to divide
40 * Important parameters:
41 * N -- how many bits per iteration we try to get
42 * as our current guess:
43 * WORDSIZE -- how many bits altogether we're talking about:
44 * obviously:
45 * A derived constant:
46 * TOPBITS -- how many bits are in the top "decade" of a number:
48 * Important variables are:
49 * Q -- the partial quotient under development -- initally 0
50 * R -- the remainder so far -- initially == the dividend
51 * ITER -- number of iterations of the main division loop will
52 * be required. Equal to CEIL( lg2(quotient)/4 )
53 * Note that this is log_base_(2^4) of the quotient.
54 * V -- the current comparand -- initially divisor*2^(ITER*4-1)
55 * Cost:
56 * current estimate for non-large dividend is
57 * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
58 * a large dividend is one greater than 2^(31-4 ) and takes a
59 * different path, as the upper bits of the quotient must be developed
60 * one bit at a time.
63 #include <sys/trap.h>
64 #include <sys/asm_linkage.h>
73 ! working variable
77 * this is the recursive definition of how we develop quotient digits.
78 * it takes three important parameters:
79 * $1 -- the current depth, 1<=$1<=4
80 * $2 -- the current accumulation of quotient bits
81 * 4 -- max depth
82 * We add a new bit to $2 and either recurse or
83 * insert the bits in the quotient.
84 * Dynamic input:
85 * %o3 -- current remainder
86 * %o2 -- current quotient
87 * %o5 -- current comparand
88 * cc -- set on current value of %o3
89 * Dynamic output:
90 * %o3', %o2', %o5', cc'
97 ! RTENTRY(.udiv) ! unsigned divide
98 .global .udiv
99 .udiv:
100 b divide
101 mov 0,%g1 ! result always positive
103 ! RTENTRY(.div) ! SIGNED DIVIDE
104 .global .div
105 .div:
106 orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
107 bge divide ! if not, skip this junk
108 xor %o1,%o0,%g1 ! record sign of result in sign of %g1
109 tst %o1
110 bge 2f
111 tst %o0
112 ! %o1 < 0
113 bge divide
114 neg %o1
116 ! %o0 < 0
117 neg %o0
118 ! FALL THROUGH
121 divide:
122 ! compute size of quotient, scale comparand
123 orcc %o1,%g0,%o5 ! movcc %o1,%o5
124 bnz 0f ! if %o1 != 0
125 mov %o0,%o3
126 ba zero_divide
129 cmp %o3,%o5
130 blu got_result ! if %o3<%o5 already, there's no point in continuing
131 mov 0,%o2
132 sethi %hi(1<<(32-4 -1)),%g2
133 cmp %o3,%g2
134 blu not_really_big
135 mov 0,%o4
137 ! here, the %o0 is >= 2^(31-4) or so. We must be careful here, as
138 ! our usual 4-at-a-shot divide step will cause overflow and havoc. The
139 ! total number of bits in the result here is 4*%o4+%g3, where %g3 <= 4.
140 ! compute %o4, in an unorthodox manner: know we need to Shift %o5 into
141 ! the top decade: so don't even bother to compare to %o3.
143 cmp %o5,%g2
144 bgeu 3f
145 mov 1,%g3
146 sll %o5,4,%o5
147 b 1b
148 inc %o4
149 ! now compute %g3
150 2: addcc %o5,%o5,%o5
151 bcc not_too_big ! bcc not_too_big
152 add %g3,1,%g3
154 ! here if the %o1 overflowed when Shifting
155 ! this means that %o3 has the high-order bit set
156 ! restore %o5 and subtract from %o3
157 sll %g2,4 ,%g2 ! high order bit
158 srl %o5,1,%o5 ! rest of %o5
159 add %o5,%g2,%o5
160 b do_single_div
161 sub %g3,1,%g3
162 not_too_big:
163 3: cmp %o5,%o3
164 blu 2b
166 be do_single_div
168 ! %o5 > %o3: went too far: back up 1 step
169 ! srl %o5,1,%o5
170 ! dec %g3
171 ! do single-bit divide steps
173 ! we have to be careful here. We know that %o3 >= %o5, so we can do the
174 ! first divide step without thinking. BUT, the others are conditional,
175 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
176 ! order bit set in the first step, just falling into the regular
177 ! division loop will mess up the first time around.
178 ! So we unroll slightly...
179 do_single_div:
180 deccc %g3
181 bl end_regular_divide
183 sub %o3,%o5,%o3
184 mov 1,%o2
185 b,a end_single_divloop
186 single_divloop:
187 sll %o2,1,%o2
188 bl 1f
189 srl %o5,1,%o5
190 ! %o3 >= 0
191 sub %o3,%o5,%o3
192 b 2f
193 inc %o2
194 1: ! %o3 < 0
195 add %o3,%o5,%o3
196 dec %o2
198 end_single_divloop:
199 deccc %g3
200 bge single_divloop
201 tst %o3
202 b,a end_regular_divide
204 not_really_big:
206 sll %o5,4,%o5
207 cmp %o5,%o3
208 bleu 1b
209 inccc %o4
210 be got_result
211 dec %o4
212 do_regular_divide:
214 ! do the main division iteration
215 tst %o3
216 ! fall through into divide loop
217 divloop:
218 sll %o2,4,%o2
219 !depth 1, accumulated bits 0
220 bl L.1.16
221 srl %o5,1,%o5
222 ! remainder is positive
223 subcc %o3,%o5,%o3
224 !depth 2, accumulated bits 1
225 bl L.2.17
226 srl %o5,1,%o5
227 ! remainder is positive
228 subcc %o3,%o5,%o3
229 !depth 3, accumulated bits 3
230 bl L.3.19
231 srl %o5,1,%o5
232 ! remainder is positive
233 subcc %o3,%o5,%o3
234 !depth 4, accumulated bits 7
235 bl L.4.23
236 srl %o5,1,%o5
237 ! remainder is positive
238 subcc %o3,%o5,%o3
239 b 9f
240 add %o2, (7*2+1), %o2
242 L.4.23: ! remainder is negative
243 addcc %o3,%o5,%o3
244 b 9f
245 add %o2, (7*2-1), %o2
250 L.3.19: ! remainder is negative
251 addcc %o3,%o5,%o3
252 !depth 4, accumulated bits 5
253 bl L.4.21
254 srl %o5,1,%o5
255 ! remainder is positive
256 subcc %o3,%o5,%o3
257 b 9f
258 add %o2, (5*2+1), %o2
260 L.4.21: ! remainder is negative
261 addcc %o3,%o5,%o3
262 b 9f
263 add %o2, (5*2-1), %o2
271 L.2.17: ! remainder is negative
272 addcc %o3,%o5,%o3
273 !depth 3, accumulated bits 1
274 bl L.3.17
275 srl %o5,1,%o5
276 ! remainder is positive
277 subcc %o3,%o5,%o3
278 !depth 4, accumulated bits 3
279 bl L.4.19
280 srl %o5,1,%o5
281 ! remainder is positive
282 subcc %o3,%o5,%o3
283 b 9f
284 add %o2, (3*2+1), %o2
286 L.4.19: ! remainder is negative
287 addcc %o3,%o5,%o3
288 b 9f
289 add %o2, (3*2-1), %o2
294 L.3.17: ! remainder is negative
295 addcc %o3,%o5,%o3
296 !depth 4, accumulated bits 1
297 bl L.4.17
298 srl %o5,1,%o5
299 ! remainder is positive
300 subcc %o3,%o5,%o3
301 b 9f
302 add %o2, (1*2+1), %o2
304 L.4.17: ! remainder is negative
305 addcc %o3,%o5,%o3
306 b 9f
307 add %o2, (1*2-1), %o2
318 L.1.16: ! remainder is negative
319 addcc %o3,%o5,%o3
320 !depth 2, accumulated bits -1
321 bl L.2.15
322 srl %o5,1,%o5
323 ! remainder is positive
324 subcc %o3,%o5,%o3
325 !depth 3, accumulated bits -1
326 bl L.3.15
327 srl %o5,1,%o5
328 ! remainder is positive
329 subcc %o3,%o5,%o3
330 !depth 4, accumulated bits -1
331 bl L.4.15
332 srl %o5,1,%o5
333 ! remainder is positive
334 subcc %o3,%o5,%o3
335 b 9f
336 add %o2, (-1*2+1), %o2
338 L.4.15: ! remainder is negative
339 addcc %o3,%o5,%o3
340 b 9f
341 add %o2, (-1*2-1), %o2
346 L.3.15: ! remainder is negative
347 addcc %o3,%o5,%o3
348 !depth 4, accumulated bits -3
349 bl L.4.13
350 srl %o5,1,%o5
351 ! remainder is positive
352 subcc %o3,%o5,%o3
353 b 9f
354 add %o2, (-3*2+1), %o2
356 L.4.13: ! remainder is negative
357 addcc %o3,%o5,%o3
358 b 9f
359 add %o2, (-3*2-1), %o2
367 L.2.15: ! remainder is negative
368 addcc %o3,%o5,%o3
369 !depth 3, accumulated bits -3
370 bl L.3.13
371 srl %o5,1,%o5
372 ! remainder is positive
373 subcc %o3,%o5,%o3
374 !depth 4, accumulated bits -5
375 bl L.4.11
376 srl %o5,1,%o5
377 ! remainder is positive
378 subcc %o3,%o5,%o3
379 b 9f
380 add %o2, (-5*2+1), %o2
382 L.4.11: ! remainder is negative
383 addcc %o3,%o5,%o3
384 b 9f
385 add %o2, (-5*2-1), %o2
390 L.3.13: ! remainder is negative
391 addcc %o3,%o5,%o3
392 !depth 4, accumulated bits -7
393 bl L.4.9
394 srl %o5,1,%o5
395 ! remainder is positive
396 subcc %o3,%o5,%o3
397 b 9f
398 add %o2, (-7*2+1), %o2
400 L.4.9: ! remainder is negative
401 addcc %o3,%o5,%o3
402 b 9f
403 add %o2, (-7*2-1), %o2
416 end_regular_divide:
417 deccc %o4
418 bge divloop
419 tst %o3
420 bl,a got_result
421 dec %o2
424 got_result:
425 tst %g1
426 bl,a 1f
427 neg %o2 ! quotient <- -%o2
430 retl
431 mov %o2,%o0 ! quotient <- %o2
434 zero_divide:
435 ta ST_DIV0 ! divide by zero trap
436 retl ! if handled, ignored, return
437 mov 0, %o0