1 .section ".text",#alloc,#execinstr
3 .global bn_mul_mont_int
6 cmp %o5,4 ! 128 bits minimum
8 sethi %hi(0xffffffff),%g1
14 sll %i5,2,%i5 ! num*=4
15 or %g1,%lo(0xffffffff),%g1
22 add %sp,0,%o7 ! real top of stack
23 ld [%i1],%o0 ! ap[0] ! redundant in squaring context
25 ld [%i1+4],%l5 ! ap[1]
28 sub %o7,0,%sp ! alloca
29 ld [%i3+4],%l6 ! np[1]
30 be,pt %icc,.Lbn_sqr_mont
33 mulx %o0,%l2,%o0 ! ap[0]*bp[0]
34 mulx %l5,%l2,%g4 !prologue! ap[1]*bp[0]
37 ld [%i1+8],%l5 !prologue!
39 mulx %i4,%o3,%l3 ! "t[0]"*n0
42 mulx %o1,%l3,%o1 ! np[0]*"t[0]"*n0
43 mulx %l6,%l3,%o4 !prologue! np[1]*"t[0]"*n0
46 ld [%i3+8],%l6 !prologue!
48 mov %g4,%o3 !prologue!
54 ld [%i1+%l1],%l5 ! ap[j]
57 ld [%i3+%l1],%l6 ! np[j]
70 mulx %l5,%l2,%g4 !epilogue!
93 ld [%i2+4],%l2 ! bp[1]
97 ld [%i1+4],%l5 ! ap[1]
99 ld [%i3+4],%l6 ! np[1]
101 ld [%l4+4],%l7 ! tp[1]
105 mulx %l5,%l2,%g4 !prologue!
107 ld [%i1+8],%l5 !prologue!
114 mulx %l6,%l3,%o4 !prologue!
117 ld [%i3+8],%l6 !prologue!
119 mov %g4,%o3 !prologue!
125 ld [%i1+%l1],%l5 ! ap[j]
128 ld [%i3+%l1],%l6 ! np[j]
130 ld [%l4+8],%l7 ! tp[j]
135 st %o1,[%l4] ! tp[j-1]
143 mulx %l5,%l2,%g4 !epilogue!
147 ld [%l4+8],%l7 ! tp[j]
152 st %o1,[%l4] ! tp[j-1]
160 st %o1,[%l4+4] ! tp[j-1]
172 ld [%i2+%l0],%l2 ! bp[i]
181 sub %g0,%i5,%o7 ! k=-num
183 subcc %g0,%g0,%g0 ! clear %icc.c
188 subccc %o0,%o1,%o1 ! tp[j]-np[j]
193 subc %o2,0,%o2 ! handle upmost overflow bit
200 ld [%i1+%o7],%o0 ! copy or in-place refresh
201 st %g0,[%l4+%o7] ! zap tp
211 mulx %l2,%l2,%o0 ! ap[0]*ap[0]
212 mulx %l5,%l2,%g4 !prologue!
215 ld [%i1+8],%l5 !prologue!
217 mulx %i4,%o3,%l3 ! "t[0]"*n0
221 mulx %o1,%l3,%o1 ! np[0]*"t[0]"*n0
222 mulx %l6,%l3,%o4 !prologue!
224 ld [%i3+8],%l6 !prologue!
228 mov %g4,%o3 !prologue!
233 add %o3,%o0,%o0 ! ap[j]*a0+c0
235 ld [%i1+%l1],%l5 ! ap[j]
237 ld [%i3+%l1],%l6 ! np[j]
254 mulx %l5,%l2,%g4 ! epilogue
256 add %o3,%o0,%o0 ! ap[j]*a0+c0
268 add %g4,%o0,%o0 ! ap[j]*a0+c0
286 ld [%sp+0+128],%g4 ! tp[0]
287 ld [%sp+0+128+4],%g5 ! tp[1]
288 ld [%sp+0+128+8],%l7 ! tp[2]
289 ld [%i1+4],%l2 ! ap[1]
290 ld [%i1+8],%l5 ! ap[2]
292 ld [%i3+4],%l6 ! np[1]
302 ld [%i3+8],%l6 ! np[2]
311 st %o1,[%sp+0+128] ! tp[0]=
320 ld [%i1+%l1],%l5 ! ap[j]
322 ld [%i3+%l1],%l6 ! np[j]
325 ld [%l4+8],%l7 ! tp[j]
333 st %o1,[%l4] ! tp[j-1]
351 st %o1,[%l4] ! tp[j-1]
361 ld [%sp+0+128],%g5 ! tp[0]
362 ld [%sp+0+128+4],%l7 ! tp[1]
363 ld [%i1+8],%l2 ! ap[2]
365 ld [%i3+4],%l6 ! np[1]
397 ld [%i1+%l1],%l5 ! ap[j]
400 ld [%i3+%l1],%l6 ! np[j]
402 ld [%l4+8],%l7 ! tp[j]
409 be,pn %icc,.Lsqr_no_inner2
417 ld [%i1+%l1],%l5 ! ap[j]
419 ld [%i3+%l1],%l6 ! np[j]
422 ld [%l4+8],%l7 ! tp[j]
430 st %o1,[%l4] ! tp[j-1]
448 st %o1,[%l4] ! tp[j-1]
459 ld [%sp+0+128],%g5 ! tp[0]
460 ld [%sp+0+128+4],%l7 ! tp[1]
461 ld [%i1+%l0],%l2 ! ap[j]
463 ld [%i3+4],%l6 ! np[1]
478 cmp %g4,%i5 ! i<num-1
503 add %o0,%o0,%o0 ! recover %o0
512 .type bn_mul_mont_int,#function
513 .size bn_mul_mont_int,(.-bn_mul_mont_int)
514 .asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro@openssl.org>"