1 # $NetBSD: bn_asm_vax.S,v 1.2 2008/05/11 16:45:19 christos Exp $
7 # ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {
10 # for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;
14 .globl bn_mul_add_words
15 .type bn_mul_add_words@function
24 clrl %r6 # return value ("carry")
26 0: emul %r5,(%r3),(%r2),%r0 # w * a[0] + r[0] -> r0
28 # fixup for "negative" r[]
31 incl %r1 # add 1 to highword
33 1: # add saved carry to result
37 # combined fixup for "negative" w, a[]
38 tstl %r5 # if w is negative...
40 addl2 (%r3),%r1 # ...add a[0] again to highword
41 1: tstl (%r3) # if a[0] is negative...
43 addl2 %r5,%r1 # ...add w again to highword
45 movl %r0,(%r2)+ # save low word in dest & advance *r
46 addl2 $4,%r3 # advance *a
47 movl %r1,%r6 # high word in r6 for return value
54 # .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64
60 #; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {
63 #; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;
68 .type bn_mul_words@function
78 0: emul %r5,(%r3),%r6,%r0 # w * a[0] + carry -> r0
80 # fixup for "negative" carry
85 1: # combined fixup for "negative" w, a[]
104 # .title vax_bn_sqr_words unsigned square, 32*32=>64
106 #; w.j.m. 15-jan-1999
110 #; void bn_sqr_words(ULONG r[],ULONG a[],int n) {
112 #; for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;
116 .type bn_sqr_words@function
124 0: movl (%r3)+,%r5 # r5 = a[] & advance
126 emul %r5,%r5,$0,%r0 # a[0] * a[0] + 0 -> r0
128 # fixup for "negative" a[]
134 1: movq %r0,(%r2)+ # store 64-bit result
141 # .title vax_bn_div_words unsigned divide
143 #; Richard Levitte 20-Nov-2000
145 #; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
147 #; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
150 #; Using EDIV would be very easy, if it didn't do signed calculations.
151 #; Any time any of the input numbers are signed, there are problems,
152 #; usually with integer overflow, at which point it returns useless
153 #; data (the quotient gets the value of l, and the remainder becomes 0).
155 #; If it was just for the dividend, it would be very easy, just divide
156 #; it by 2 (unsigned), do the division, multiply the resulting quotient
157 #; and remainder by 2, add the bit that was dropped when dividing by 2
158 #; to the remainder, and do some adjustment so the remainder doesn't
159 #; end up larger than the divisor. For some cases when the divisor is
160 #; negative (from EDIV's point of view, i.e. when the highest bit is set),
161 #; dividing the dividend by 2 isn't enough, and since some operations
162 #; might generate integer overflows even when the dividend is divided by
163 #; 4 (when the high part of the shifted down dividend ends up being exactly
164 #; half of the divisor, the result is the quotient 0x80000000, which is
165 #; negative...) it needs to be divided by 8. Furthermore, the divisor needs
166 #; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.
167 #; In this case, a little extra fiddling with the remainder is required.
169 #; So, the simplest way to handle this is always to divide the dividend
170 #; by 8, and to divide the divisor by 2 if it's highest bit is set.
171 #; After EDIV has been used, the quotient gets multiplied by 8 if the
172 #; original divisor was positive, otherwise 4. The remainder, oddly
173 #; enough, is *always* multiplied by 8.
174 #; NOTE: in the case mentioned above, where the high part of the shifted
175 #; down dividend ends up being exactly half the shifted down divisor, we
176 #; end up with a 33 bit quotient. That's no problem however, it usually
177 #; means we have ended up with a too large remainder as well, and the
178 #; problem is fixed by the last part of the algorithm (next paragraph).
180 #; The routine ends with comparing the resulting remainder with the
181 #; original divisor and if the remainder is larger, subtract the
182 #; original divisor from it, and increase the quotient by 1. This is
183 #; done until the remainder is smaller than the divisor.
185 #; The complete algorithm looks like this:
189 #; [h,l] = [h,l] >> 3
190 #; [q,r] = floor([h,l] / d) # This is the EDIV operation
191 #; if (q < 0) q = -q # I doubt this is necessary any more
204 #; [r',r] = [r',r] - q
205 #; while ([r',r] < 0)
207 #; [r',r] = [r',r] + d
208 #; [q',q] = [q',q] - 1
212 #; while ([r',r] >= d')
214 #; [r',r] = [r',r] - d'
215 #; [q',q] = [q',q] + 1
229 .type bn_div_words@function
237 bicl3 $-8,%r2,%r5 # l' = l & 7
245 rotl $-3,%r2,%r2 # l = l >> 3
246 rotl $-3,%r3,%r3 # h = h >> 3
248 movl %r4,%r7 # d' = d
254 beql 0f # Uh-oh, the divisor is 0...
256 rotl $-1,%r4,%r4 # If d is negative, shift it right.
257 bicl2 $0x80000000,%r4 # Since d is then a large number, the
258 # lowest bit is insignificant
259 # (contradict that, and I'll fix the problem!)
261 ediv %r4,%r2,%r2,%r3 # Do the actual division
265 mnegl %r2,%r2 # if q < 0, negate it
269 rotl $3,%r2,%r2 # q = q << 3
270 bicl3 $-8,%r2,%r8 # q' gets the high bits from q
275 rotl $2,%r2,%r2 # q = q << 2
276 bicl3 $-4,%r2,%r8 # q' gets the high bits from q
279 rotl $3,%r3,%r3 # r = r << 3
280 bicl3 $-8,%r3,%r6 # r' gets the high bits from r
282 addl2 %r5,%r3 # r = r + l'
287 beql 5f # if d' < 0 && d' & 1
288 subl2 %r2,%r3 # [r',r] = [r',r] - [q',q]
291 bgeq 5f # while r < 0
292 decl %r2 # [q',q] = [q',q] - 1
294 addl2 %r7,%r3 # [r',r] = [r',r] + d'
298 # The return points are placed in the middle to keep a short distance from
299 # all the branch points
311 blssu 1b # while [r',r] >= d'
313 subl2 %r7,%r3 # [r',r] = [r',r] - d'
315 incl %r2 # [q',q] = [q',q] + 1
321 # .title vax_bn_add_words unsigned add of two arrays
323 #; Richard Levitte 20-Nov-2000
325 #; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
328 #; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
334 .type bn_add_words@function
347 0: movl (%r3)+,%r1 # carry untouched
348 adwc (%r4)+,%r1 # carry used and touched
349 movl %r1,(%r2)+ # carry untouched
350 sobgtr %r5,0b # carry untouched
356 #; Richard Levitte 20-Nov-2000
358 #; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
361 #; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
366 .type bn_sub_words@function
379 0: movl (%r3)+,%r6 # carry untouched
380 sbwc (%r4)+,%r6 # carry used and touched
381 movl %r6,(%r2)+ # carry untouched
382 sobgtr %r5,0b # carry untouched
390 # Multiply a vector of 4/8 longword by another.
391 # Uses two loops and 16/64 emuls.
394 .type bn_mul_comba4@function
401 .type bn_mul_comba8@function
406 6: movl 8(%ap),%r3 # a[]
407 movl 12(%ap),%r7 # b[]
411 .type bn_sqr_comba4@function
418 .type bn_sqr_comba8@function
424 movl 8(%ap),%r3 # a[]
427 5: movl 4(%ap),%r5 # r[]
430 clrq (%r5) # clear destinatino, for add.
432 clrq 16(%r5) # these only needed for comba8
436 movl %r9,%r6 # inner loop count
437 movl (%r7)+,%r2 # value to multiply with
439 1: emul %r2,(%r3),%r4,%r0
450 3: addl2 %r0,(%r5)+ # add to destination
451 adwc $0,%r1 # remember carry
452 movl %r1,%r4 # add carry in next emul
456 movl %r4,(%r5) # save highest add result