Roll BoringSSL.
[chromium-blink-merge.git] / third_party / boringssl / win-x86_64 / crypto / bn / x86_64-mont5.asm
blob90c6100e4a48abf00799153651f20a517b5aa47d
1 OPTION DOTNAME
2 .text$ SEGMENT ALIGN(256) 'CODE'
4 EXTERN OPENSSL_ia32cap_P:NEAR
6 PUBLIC bn_mul_mont_gather5
8 ALIGN 64
9 bn_mul_mont_gather5 PROC PUBLIC
10 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
11 mov QWORD PTR[16+rsp],rsi
12 mov rax,rsp
13 $L$SEH_begin_bn_mul_mont_gather5::
14 mov rdi,rcx
15 mov rsi,rdx
16 mov rdx,r8
17 mov rcx,r9
18 mov r8,QWORD PTR[40+rsp]
19 mov r9,QWORD PTR[48+rsp]
22 test r9d,7
23 jnz $L$mul_enter
24 jmp $L$mul4x_enter
26 ALIGN 16
27 $L$mul_enter::
28 mov r9d,r9d
29 mov rax,rsp
30 mov r10d,DWORD PTR[56+rsp]
31 push rbx
32 push rbp
33 push r12
34 push r13
35 push r14
36 push r15
37 lea rsp,QWORD PTR[((-40))+rsp]
38 movaps XMMWORD PTR[rsp],xmm6
39 movaps XMMWORD PTR[16+rsp],xmm7
40 lea r11,QWORD PTR[2+r9]
41 neg r11
42 lea rsp,QWORD PTR[r11*8+rsp]
43 and rsp,-1024
45 mov QWORD PTR[8+r9*8+rsp],rax
46 $L$mul_body::
47 mov r12,rdx
48 mov r11,r10
49 shr r10,3
50 and r11,7
51 not r10
52 lea rax,QWORD PTR[$L$magic_masks]
53 and r10,3
54 lea r12,QWORD PTR[96+r11*8+r12]
55 movq xmm4,QWORD PTR[r10*8+rax]
56 movq xmm5,QWORD PTR[8+r10*8+rax]
57 movq xmm6,QWORD PTR[16+r10*8+rax]
58 movq xmm7,QWORD PTR[24+r10*8+rax]
60 movq xmm0,QWORD PTR[(((-96)))+r12]
61 movq xmm1,QWORD PTR[((-32))+r12]
62 pand xmm0,xmm4
63 movq xmm2,QWORD PTR[32+r12]
64 pand xmm1,xmm5
65 movq xmm3,QWORD PTR[96+r12]
66 pand xmm2,xmm6
67 por xmm0,xmm1
68 pand xmm3,xmm7
69 por xmm0,xmm2
70 lea r12,QWORD PTR[256+r12]
71 por xmm0,xmm3
73 DB 102,72,15,126,195
75 mov r8,QWORD PTR[r8]
76 mov rax,QWORD PTR[rsi]
78 xor r14,r14
79 xor r15,r15
81 movq xmm0,QWORD PTR[(((-96)))+r12]
82 movq xmm1,QWORD PTR[((-32))+r12]
83 pand xmm0,xmm4
84 movq xmm2,QWORD PTR[32+r12]
85 pand xmm1,xmm5
87 mov rbp,r8
88 mul rbx
89 mov r10,rax
90 mov rax,QWORD PTR[rcx]
92 movq xmm3,QWORD PTR[96+r12]
93 pand xmm2,xmm6
94 por xmm0,xmm1
95 pand xmm3,xmm7
97 imul rbp,r10
98 mov r11,rdx
100 por xmm0,xmm2
101 lea r12,QWORD PTR[256+r12]
102 por xmm0,xmm3
104 mul rbp
105 add r10,rax
106 mov rax,QWORD PTR[8+rsi]
107 adc rdx,0
108 mov r13,rdx
110 lea r15,QWORD PTR[1+r15]
111 jmp $L$1st_enter
113 ALIGN 16
114 $L$1st::
115 add r13,rax
116 mov rax,QWORD PTR[r15*8+rsi]
117 adc rdx,0
118 add r13,r11
119 mov r11,r10
120 adc rdx,0
121 mov QWORD PTR[((-16))+r15*8+rsp],r13
122 mov r13,rdx
124 $L$1st_enter::
125 mul rbx
126 add r11,rax
127 mov rax,QWORD PTR[r15*8+rcx]
128 adc rdx,0
129 lea r15,QWORD PTR[1+r15]
130 mov r10,rdx
132 mul rbp
133 cmp r15,r9
134 jne $L$1st
136 DB 102,72,15,126,195
138 add r13,rax
139 mov rax,QWORD PTR[rsi]
140 adc rdx,0
141 add r13,r11
142 adc rdx,0
143 mov QWORD PTR[((-16))+r15*8+rsp],r13
144 mov r13,rdx
145 mov r11,r10
147 xor rdx,rdx
148 add r13,r11
149 adc rdx,0
150 mov QWORD PTR[((-8))+r9*8+rsp],r13
151 mov QWORD PTR[r9*8+rsp],rdx
153 lea r14,QWORD PTR[1+r14]
154 jmp $L$outer
155 ALIGN 16
156 $L$outer::
157 xor r15,r15
158 mov rbp,r8
159 mov r10,QWORD PTR[rsp]
161 movq xmm0,QWORD PTR[(((-96)))+r12]
162 movq xmm1,QWORD PTR[((-32))+r12]
163 pand xmm0,xmm4
164 movq xmm2,QWORD PTR[32+r12]
165 pand xmm1,xmm5
167 mul rbx
168 add r10,rax
169 mov rax,QWORD PTR[rcx]
170 adc rdx,0
172 movq xmm3,QWORD PTR[96+r12]
173 pand xmm2,xmm6
174 por xmm0,xmm1
175 pand xmm3,xmm7
177 imul rbp,r10
178 mov r11,rdx
180 por xmm0,xmm2
181 lea r12,QWORD PTR[256+r12]
182 por xmm0,xmm3
184 mul rbp
185 add r10,rax
186 mov rax,QWORD PTR[8+rsi]
187 adc rdx,0
188 mov r10,QWORD PTR[8+rsp]
189 mov r13,rdx
191 lea r15,QWORD PTR[1+r15]
192 jmp $L$inner_enter
194 ALIGN 16
195 $L$inner::
196 add r13,rax
197 mov rax,QWORD PTR[r15*8+rsi]
198 adc rdx,0
199 add r13,r10
200 mov r10,QWORD PTR[r15*8+rsp]
201 adc rdx,0
202 mov QWORD PTR[((-16))+r15*8+rsp],r13
203 mov r13,rdx
205 $L$inner_enter::
206 mul rbx
207 add r11,rax
208 mov rax,QWORD PTR[r15*8+rcx]
209 adc rdx,0
210 add r10,r11
211 mov r11,rdx
212 adc r11,0
213 lea r15,QWORD PTR[1+r15]
215 mul rbp
216 cmp r15,r9
217 jne $L$inner
219 DB 102,72,15,126,195
221 add r13,rax
222 mov rax,QWORD PTR[rsi]
223 adc rdx,0
224 add r13,r10
225 mov r10,QWORD PTR[r15*8+rsp]
226 adc rdx,0
227 mov QWORD PTR[((-16))+r15*8+rsp],r13
228 mov r13,rdx
230 xor rdx,rdx
231 add r13,r11
232 adc rdx,0
233 add r13,r10
234 adc rdx,0
235 mov QWORD PTR[((-8))+r9*8+rsp],r13
236 mov QWORD PTR[r9*8+rsp],rdx
238 lea r14,QWORD PTR[1+r14]
239 cmp r14,r9
240 jb $L$outer
242 xor r14,r14
243 mov rax,QWORD PTR[rsp]
244 lea rsi,QWORD PTR[rsp]
245 mov r15,r9
246 jmp $L$sub
247 ALIGN 16
248 $L$sub:: sbb rax,QWORD PTR[r14*8+rcx]
249 mov QWORD PTR[r14*8+rdi],rax
250 mov rax,QWORD PTR[8+r14*8+rsi]
251 lea r14,QWORD PTR[1+r14]
252 dec r15
253 jnz $L$sub
255 sbb rax,0
256 xor r14,r14
257 mov r15,r9
258 ALIGN 16
259 $L$copy::
260 mov rsi,QWORD PTR[r14*8+rsp]
261 mov rcx,QWORD PTR[r14*8+rdi]
262 xor rsi,rcx
263 and rsi,rax
264 xor rsi,rcx
265 mov QWORD PTR[r14*8+rsp],r14
266 mov QWORD PTR[r14*8+rdi],rsi
267 lea r14,QWORD PTR[1+r14]
268 sub r15,1
269 jnz $L$copy
271 mov rsi,QWORD PTR[8+r9*8+rsp]
272 mov rax,1
273 movaps xmm6,XMMWORD PTR[((-88))+rsi]
274 movaps xmm7,XMMWORD PTR[((-72))+rsi]
275 mov r15,QWORD PTR[((-48))+rsi]
276 mov r14,QWORD PTR[((-40))+rsi]
277 mov r13,QWORD PTR[((-32))+rsi]
278 mov r12,QWORD PTR[((-24))+rsi]
279 mov rbp,QWORD PTR[((-16))+rsi]
280 mov rbx,QWORD PTR[((-8))+rsi]
281 lea rsp,QWORD PTR[rsi]
282 $L$mul_epilogue::
283 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
284 mov rsi,QWORD PTR[16+rsp]
285 DB 0F3h,0C3h ;repret
286 $L$SEH_end_bn_mul_mont_gather5::
287 bn_mul_mont_gather5 ENDP
289 ALIGN 32
290 bn_mul4x_mont_gather5 PROC PRIVATE
291 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
292 mov QWORD PTR[16+rsp],rsi
293 mov rax,rsp
294 $L$SEH_begin_bn_mul4x_mont_gather5::
295 mov rdi,rcx
296 mov rsi,rdx
297 mov rdx,r8
298 mov rcx,r9
299 mov r8,QWORD PTR[40+rsp]
300 mov r9,QWORD PTR[48+rsp]
303 $L$mul4x_enter::
304 DB 067h
305 mov rax,rsp
306 push rbx
307 push rbp
308 push r12
309 push r13
310 push r14
311 push r15
312 lea rsp,QWORD PTR[((-40))+rsp]
313 movaps XMMWORD PTR[rsp],xmm6
314 movaps XMMWORD PTR[16+rsp],xmm7
315 DB 067h
316 mov r10d,r9d
317 shl r9d,3
318 shl r10d,3+2
319 neg r9
328 lea r11,QWORD PTR[((-64))+r9*2+rsp]
329 sub r11,rsi
330 and r11,4095
331 cmp r10,r11
332 jb $L$mul4xsp_alt
333 sub rsp,r11
334 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
335 jmp $L$mul4xsp_done
337 ALIGN 32
338 $L$mul4xsp_alt::
339 lea r10,QWORD PTR[((4096-64))+r9*2]
340 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
341 sub r11,r10
342 mov r10,0
343 cmovc r11,r10
344 sub rsp,r11
345 $L$mul4xsp_done::
346 and rsp,-64
347 neg r9
349 mov QWORD PTR[40+rsp],rax
350 $L$mul4x_body::
352 call mul4x_internal
354 mov rsi,QWORD PTR[40+rsp]
355 mov rax,1
356 movaps xmm6,XMMWORD PTR[((-88))+rsi]
357 movaps xmm7,XMMWORD PTR[((-72))+rsi]
358 mov r15,QWORD PTR[((-48))+rsi]
359 mov r14,QWORD PTR[((-40))+rsi]
360 mov r13,QWORD PTR[((-32))+rsi]
361 mov r12,QWORD PTR[((-24))+rsi]
362 mov rbp,QWORD PTR[((-16))+rsi]
363 mov rbx,QWORD PTR[((-8))+rsi]
364 lea rsp,QWORD PTR[rsi]
365 $L$mul4x_epilogue::
366 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
367 mov rsi,QWORD PTR[16+rsp]
368 DB 0F3h,0C3h ;repret
369 $L$SEH_end_bn_mul4x_mont_gather5::
370 bn_mul4x_mont_gather5 ENDP
373 ALIGN 32
374 mul4x_internal PROC PRIVATE
375 shl r9,5
376 mov r10d,DWORD PTR[56+rax]
377 lea r13,QWORD PTR[256+r9*1+rdx]
378 shr r9,5
379 mov r11,r10
380 shr r10,3
381 and r11,7
382 not r10
383 lea rax,QWORD PTR[$L$magic_masks]
384 and r10,3
385 lea r12,QWORD PTR[96+r11*8+rdx]
386 movq xmm4,QWORD PTR[r10*8+rax]
387 movq xmm5,QWORD PTR[8+r10*8+rax]
388 add r11,7
389 movq xmm6,QWORD PTR[16+r10*8+rax]
390 movq xmm7,QWORD PTR[24+r10*8+rax]
391 and r11,7
393 movq xmm0,QWORD PTR[(((-96)))+r12]
394 lea r14,QWORD PTR[256+r12]
395 movq xmm1,QWORD PTR[((-32))+r12]
396 pand xmm0,xmm4
397 movq xmm2,QWORD PTR[32+r12]
398 pand xmm1,xmm5
399 movq xmm3,QWORD PTR[96+r12]
400 pand xmm2,xmm6
401 DB 067h
402 por xmm0,xmm1
403 movq xmm1,QWORD PTR[((-96))+r14]
404 DB 067h
405 pand xmm3,xmm7
406 DB 067h
407 por xmm0,xmm2
408 movq xmm2,QWORD PTR[((-32))+r14]
409 DB 067h
410 pand xmm1,xmm4
411 DB 067h
412 por xmm0,xmm3
413 movq xmm3,QWORD PTR[32+r14]
415 DB 102,72,15,126,195
416 movq xmm0,QWORD PTR[96+r14]
417 mov QWORD PTR[((16+8))+rsp],r13
418 mov QWORD PTR[((56+8))+rsp],rdi
420 mov r8,QWORD PTR[r8]
421 mov rax,QWORD PTR[rsi]
422 lea rsi,QWORD PTR[r9*1+rsi]
423 neg r9
425 mov rbp,r8
426 mul rbx
427 mov r10,rax
428 mov rax,QWORD PTR[rcx]
430 pand xmm2,xmm5
431 pand xmm3,xmm6
432 por xmm1,xmm2
434 imul rbp,r10
442 lea r14,QWORD PTR[((64+8))+r11*8+rsp]
443 mov r11,rdx
445 pand xmm0,xmm7
446 por xmm1,xmm3
447 lea r12,QWORD PTR[512+r12]
448 por xmm0,xmm1
450 mul rbp
451 add r10,rax
452 mov rax,QWORD PTR[8+r9*1+rsi]
453 adc rdx,0
454 mov rdi,rdx
456 mul rbx
457 add r11,rax
458 mov rax,QWORD PTR[16+rcx]
459 adc rdx,0
460 mov r10,rdx
462 mul rbp
463 add rdi,rax
464 mov rax,QWORD PTR[16+r9*1+rsi]
465 adc rdx,0
466 add rdi,r11
467 lea r15,QWORD PTR[32+r9]
468 lea rcx,QWORD PTR[64+rcx]
469 adc rdx,0
470 mov QWORD PTR[r14],rdi
471 mov r13,rdx
472 jmp $L$1st4x
474 ALIGN 32
475 $L$1st4x::
476 mul rbx
477 add r10,rax
478 mov rax,QWORD PTR[((-32))+rcx]
479 lea r14,QWORD PTR[32+r14]
480 adc rdx,0
481 mov r11,rdx
483 mul rbp
484 add r13,rax
485 mov rax,QWORD PTR[((-8))+r15*1+rsi]
486 adc rdx,0
487 add r13,r10
488 adc rdx,0
489 mov QWORD PTR[((-24))+r14],r13
490 mov rdi,rdx
492 mul rbx
493 add r11,rax
494 mov rax,QWORD PTR[((-16))+rcx]
495 adc rdx,0
496 mov r10,rdx
498 mul rbp
499 add rdi,rax
500 mov rax,QWORD PTR[r15*1+rsi]
501 adc rdx,0
502 add rdi,r11
503 adc rdx,0
504 mov QWORD PTR[((-16))+r14],rdi
505 mov r13,rdx
507 mul rbx
508 add r10,rax
509 mov rax,QWORD PTR[rcx]
510 adc rdx,0
511 mov r11,rdx
513 mul rbp
514 add r13,rax
515 mov rax,QWORD PTR[8+r15*1+rsi]
516 adc rdx,0
517 add r13,r10
518 adc rdx,0
519 mov QWORD PTR[((-8))+r14],r13
520 mov rdi,rdx
522 mul rbx
523 add r11,rax
524 mov rax,QWORD PTR[16+rcx]
525 adc rdx,0
526 mov r10,rdx
528 mul rbp
529 add rdi,rax
530 mov rax,QWORD PTR[16+r15*1+rsi]
531 adc rdx,0
532 add rdi,r11
533 lea rcx,QWORD PTR[64+rcx]
534 adc rdx,0
535 mov QWORD PTR[r14],rdi
536 mov r13,rdx
538 add r15,32
539 jnz $L$1st4x
541 mul rbx
542 add r10,rax
543 mov rax,QWORD PTR[((-32))+rcx]
544 lea r14,QWORD PTR[32+r14]
545 adc rdx,0
546 mov r11,rdx
548 mul rbp
549 add r13,rax
550 mov rax,QWORD PTR[((-8))+rsi]
551 adc rdx,0
552 add r13,r10
553 adc rdx,0
554 mov QWORD PTR[((-24))+r14],r13
555 mov rdi,rdx
557 mul rbx
558 add r11,rax
559 mov rax,QWORD PTR[((-16))+rcx]
560 adc rdx,0
561 mov r10,rdx
563 mul rbp
564 add rdi,rax
565 mov rax,QWORD PTR[r9*1+rsi]
566 adc rdx,0
567 add rdi,r11
568 adc rdx,0
569 mov QWORD PTR[((-16))+r14],rdi
570 mov r13,rdx
572 DB 102,72,15,126,195
573 lea rcx,QWORD PTR[r9*2+rcx]
575 xor rdi,rdi
576 add r13,r10
577 adc rdi,0
578 mov QWORD PTR[((-8))+r14],r13
580 jmp $L$outer4x
582 ALIGN 32
583 $L$outer4x::
584 mov r10,QWORD PTR[r9*1+r14]
585 mov rbp,r8
586 mul rbx
587 add r10,rax
588 mov rax,QWORD PTR[rcx]
589 adc rdx,0
591 movq xmm0,QWORD PTR[(((-96)))+r12]
592 movq xmm1,QWORD PTR[((-32))+r12]
593 pand xmm0,xmm4
594 movq xmm2,QWORD PTR[32+r12]
595 pand xmm1,xmm5
596 movq xmm3,QWORD PTR[96+r12]
598 imul rbp,r10
599 DB 067h
600 mov r11,rdx
601 mov QWORD PTR[r14],rdi
603 pand xmm2,xmm6
604 por xmm0,xmm1
605 pand xmm3,xmm7
606 por xmm0,xmm2
607 lea r14,QWORD PTR[r9*1+r14]
608 lea r12,QWORD PTR[256+r12]
609 por xmm0,xmm3
611 mul rbp
612 add r10,rax
613 mov rax,QWORD PTR[8+r9*1+rsi]
614 adc rdx,0
615 mov rdi,rdx
617 mul rbx
618 add r11,rax
619 mov rax,QWORD PTR[16+rcx]
620 adc rdx,0
621 add r11,QWORD PTR[8+r14]
622 adc rdx,0
623 mov r10,rdx
625 mul rbp
626 add rdi,rax
627 mov rax,QWORD PTR[16+r9*1+rsi]
628 adc rdx,0
629 add rdi,r11
630 lea r15,QWORD PTR[32+r9]
631 lea rcx,QWORD PTR[64+rcx]
632 adc rdx,0
633 mov r13,rdx
634 jmp $L$inner4x
636 ALIGN 32
637 $L$inner4x::
638 mul rbx
639 add r10,rax
640 mov rax,QWORD PTR[((-32))+rcx]
641 adc rdx,0
642 add r10,QWORD PTR[16+r14]
643 lea r14,QWORD PTR[32+r14]
644 adc rdx,0
645 mov r11,rdx
647 mul rbp
648 add r13,rax
649 mov rax,QWORD PTR[((-8))+r15*1+rsi]
650 adc rdx,0
651 add r13,r10
652 adc rdx,0
653 mov QWORD PTR[((-32))+r14],rdi
654 mov rdi,rdx
656 mul rbx
657 add r11,rax
658 mov rax,QWORD PTR[((-16))+rcx]
659 adc rdx,0
660 add r11,QWORD PTR[((-8))+r14]
661 adc rdx,0
662 mov r10,rdx
664 mul rbp
665 add rdi,rax
666 mov rax,QWORD PTR[r15*1+rsi]
667 adc rdx,0
668 add rdi,r11
669 adc rdx,0
670 mov QWORD PTR[((-24))+r14],r13
671 mov r13,rdx
673 mul rbx
674 add r10,rax
675 mov rax,QWORD PTR[rcx]
676 adc rdx,0
677 add r10,QWORD PTR[r14]
678 adc rdx,0
679 mov r11,rdx
681 mul rbp
682 add r13,rax
683 mov rax,QWORD PTR[8+r15*1+rsi]
684 adc rdx,0
685 add r13,r10
686 adc rdx,0
687 mov QWORD PTR[((-16))+r14],rdi
688 mov rdi,rdx
690 mul rbx
691 add r11,rax
692 mov rax,QWORD PTR[16+rcx]
693 adc rdx,0
694 add r11,QWORD PTR[8+r14]
695 adc rdx,0
696 mov r10,rdx
698 mul rbp
699 add rdi,rax
700 mov rax,QWORD PTR[16+r15*1+rsi]
701 adc rdx,0
702 add rdi,r11
703 lea rcx,QWORD PTR[64+rcx]
704 adc rdx,0
705 mov QWORD PTR[((-8))+r14],r13
706 mov r13,rdx
708 add r15,32
709 jnz $L$inner4x
711 mul rbx
712 add r10,rax
713 mov rax,QWORD PTR[((-32))+rcx]
714 adc rdx,0
715 add r10,QWORD PTR[16+r14]
716 lea r14,QWORD PTR[32+r14]
717 adc rdx,0
718 mov r11,rdx
720 mul rbp
721 add r13,rax
722 mov rax,QWORD PTR[((-8))+rsi]
723 adc rdx,0
724 add r13,r10
725 adc rdx,0
726 mov QWORD PTR[((-32))+r14],rdi
727 mov rdi,rdx
729 mul rbx
730 add r11,rax
731 mov rax,rbp
732 mov rbp,QWORD PTR[((-16))+rcx]
733 adc rdx,0
734 add r11,QWORD PTR[((-8))+r14]
735 adc rdx,0
736 mov r10,rdx
738 mul rbp
739 add rdi,rax
740 mov rax,QWORD PTR[r9*1+rsi]
741 adc rdx,0
742 add rdi,r11
743 adc rdx,0
744 mov QWORD PTR[((-24))+r14],r13
745 mov r13,rdx
747 DB 102,72,15,126,195
748 mov QWORD PTR[((-16))+r14],rdi
749 lea rcx,QWORD PTR[r9*2+rcx]
751 xor rdi,rdi
752 add r13,r10
753 adc rdi,0
754 add r13,QWORD PTR[r14]
755 adc rdi,0
756 mov QWORD PTR[((-8))+r14],r13
758 cmp r12,QWORD PTR[((16+8))+rsp]
759 jb $L$outer4x
760 sub rbp,r13
761 adc r15,r15
762 or rdi,r15
763 xor rdi,1
764 lea rbx,QWORD PTR[r9*1+r14]
765 lea rbp,QWORD PTR[rdi*8+rcx]
766 mov rcx,r9
767 sar rcx,3+2
768 mov rdi,QWORD PTR[((56+8))+rsp]
769 jmp $L$sqr4x_sub
770 mul4x_internal ENDP
771 PUBLIC bn_power5
773 ALIGN 32
774 bn_power5 PROC PUBLIC
775 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
776 mov QWORD PTR[16+rsp],rsi
777 mov rax,rsp
778 $L$SEH_begin_bn_power5::
779 mov rdi,rcx
780 mov rsi,rdx
781 mov rdx,r8
782 mov rcx,r9
783 mov r8,QWORD PTR[40+rsp]
784 mov r9,QWORD PTR[48+rsp]
787 mov rax,rsp
788 push rbx
789 push rbp
790 push r12
791 push r13
792 push r14
793 push r15
794 lea rsp,QWORD PTR[((-40))+rsp]
795 movaps XMMWORD PTR[rsp],xmm6
796 movaps XMMWORD PTR[16+rsp],xmm7
797 mov r10d,r9d
798 shl r9d,3
799 shl r10d,3+2
800 neg r9
801 mov r8,QWORD PTR[r8]
809 lea r11,QWORD PTR[((-64))+r9*2+rsp]
810 sub r11,rsi
811 and r11,4095
812 cmp r10,r11
813 jb $L$pwr_sp_alt
814 sub rsp,r11
815 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
816 jmp $L$pwr_sp_done
818 ALIGN 32
819 $L$pwr_sp_alt::
820 lea r10,QWORD PTR[((4096-64))+r9*2]
821 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
822 sub r11,r10
823 mov r10,0
824 cmovc r11,r10
825 sub rsp,r11
826 $L$pwr_sp_done::
827 and rsp,-64
828 mov r10,r9
829 neg r9
840 mov QWORD PTR[32+rsp],r8
841 mov QWORD PTR[40+rsp],rax
842 $L$power5_body::
843 DB 102,72,15,110,207
844 DB 102,72,15,110,209
845 DB 102,73,15,110,218
846 DB 102,72,15,110,226
848 call __bn_sqr8x_internal
849 call __bn_sqr8x_internal
850 call __bn_sqr8x_internal
851 call __bn_sqr8x_internal
852 call __bn_sqr8x_internal
854 DB 102,72,15,126,209
855 DB 102,72,15,126,226
856 mov rdi,rsi
857 mov rax,QWORD PTR[40+rsp]
858 lea r8,QWORD PTR[32+rsp]
860 call mul4x_internal
862 mov rsi,QWORD PTR[40+rsp]
863 mov rax,1
864 mov r15,QWORD PTR[((-48))+rsi]
865 mov r14,QWORD PTR[((-40))+rsi]
866 mov r13,QWORD PTR[((-32))+rsi]
867 mov r12,QWORD PTR[((-24))+rsi]
868 mov rbp,QWORD PTR[((-16))+rsi]
869 mov rbx,QWORD PTR[((-8))+rsi]
870 lea rsp,QWORD PTR[rsi]
871 $L$power5_epilogue::
872 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
873 mov rsi,QWORD PTR[16+rsp]
874 DB 0F3h,0C3h ;repret
875 $L$SEH_end_bn_power5::
876 bn_power5 ENDP
878 PUBLIC bn_sqr8x_internal
881 ALIGN 32
882 bn_sqr8x_internal PROC PUBLIC
883 __bn_sqr8x_internal::
957 lea rbp,QWORD PTR[32+r10]
958 lea rsi,QWORD PTR[r9*1+rsi]
960 mov rcx,r9
963 mov r14,QWORD PTR[((-32))+rbp*1+rsi]
964 lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
965 mov rax,QWORD PTR[((-24))+rbp*1+rsi]
966 lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
967 mov rbx,QWORD PTR[((-16))+rbp*1+rsi]
968 mov r15,rax
970 mul r14
971 mov r10,rax
972 mov rax,rbx
973 mov r11,rdx
974 mov QWORD PTR[((-24))+rbp*1+rdi],r10
976 mul r14
977 add r11,rax
978 mov rax,rbx
979 adc rdx,0
980 mov QWORD PTR[((-16))+rbp*1+rdi],r11
981 mov r10,rdx
984 mov rbx,QWORD PTR[((-8))+rbp*1+rsi]
985 mul r15
986 mov r12,rax
987 mov rax,rbx
988 mov r13,rdx
990 lea rcx,QWORD PTR[rbp]
991 mul r14
992 add r10,rax
993 mov rax,rbx
994 mov r11,rdx
995 adc r11,0
996 add r10,r12
997 adc r11,0
998 mov QWORD PTR[((-8))+rcx*1+rdi],r10
999 jmp $L$sqr4x_1st
1001 ALIGN 32
1002 $L$sqr4x_1st::
1003 mov rbx,QWORD PTR[rcx*1+rsi]
1004 mul r15
1005 add r13,rax
1006 mov rax,rbx
1007 mov r12,rdx
1008 adc r12,0
1010 mul r14
1011 add r11,rax
1012 mov rax,rbx
1013 mov rbx,QWORD PTR[8+rcx*1+rsi]
1014 mov r10,rdx
1015 adc r10,0
1016 add r11,r13
1017 adc r10,0
1020 mul r15
1021 add r12,rax
1022 mov rax,rbx
1023 mov QWORD PTR[rcx*1+rdi],r11
1024 mov r13,rdx
1025 adc r13,0
1027 mul r14
1028 add r10,rax
1029 mov rax,rbx
1030 mov rbx,QWORD PTR[16+rcx*1+rsi]
1031 mov r11,rdx
1032 adc r11,0
1033 add r10,r12
1034 adc r11,0
1036 mul r15
1037 add r13,rax
1038 mov rax,rbx
1039 mov QWORD PTR[8+rcx*1+rdi],r10
1040 mov r12,rdx
1041 adc r12,0
1043 mul r14
1044 add r11,rax
1045 mov rax,rbx
1046 mov rbx,QWORD PTR[24+rcx*1+rsi]
1047 mov r10,rdx
1048 adc r10,0
1049 add r11,r13
1050 adc r10,0
1053 mul r15
1054 add r12,rax
1055 mov rax,rbx
1056 mov QWORD PTR[16+rcx*1+rdi],r11
1057 mov r13,rdx
1058 adc r13,0
1059 lea rcx,QWORD PTR[32+rcx]
1061 mul r14
1062 add r10,rax
1063 mov rax,rbx
1064 mov r11,rdx
1065 adc r11,0
1066 add r10,r12
1067 adc r11,0
1068 mov QWORD PTR[((-8))+rcx*1+rdi],r10
1070 cmp rcx,0
1071 jne $L$sqr4x_1st
1073 mul r15
1074 add r13,rax
1075 lea rbp,QWORD PTR[16+rbp]
1076 adc rdx,0
1077 add r13,r11
1078 adc rdx,0
1080 mov QWORD PTR[rdi],r13
1081 mov r12,rdx
1082 mov QWORD PTR[8+rdi],rdx
1083 jmp $L$sqr4x_outer
1085 ALIGN 32
1086 $L$sqr4x_outer::
1087 mov r14,QWORD PTR[((-32))+rbp*1+rsi]
1088 lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
1089 mov rax,QWORD PTR[((-24))+rbp*1+rsi]
1090 lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
1091 mov rbx,QWORD PTR[((-16))+rbp*1+rsi]
1092 mov r15,rax
1094 mul r14
1095 mov r10,QWORD PTR[((-24))+rbp*1+rdi]
1096 add r10,rax
1097 mov rax,rbx
1098 adc rdx,0
1099 mov QWORD PTR[((-24))+rbp*1+rdi],r10
1100 mov r11,rdx
1102 mul r14
1103 add r11,rax
1104 mov rax,rbx
1105 adc rdx,0
1106 add r11,QWORD PTR[((-16))+rbp*1+rdi]
1107 mov r10,rdx
1108 adc r10,0
1109 mov QWORD PTR[((-16))+rbp*1+rdi],r11
1111 xor r12,r12
1113 mov rbx,QWORD PTR[((-8))+rbp*1+rsi]
1114 mul r15
1115 add r12,rax
1116 mov rax,rbx
1117 adc rdx,0
1118 add r12,QWORD PTR[((-8))+rbp*1+rdi]
1119 mov r13,rdx
1120 adc r13,0
1122 mul r14
1123 add r10,rax
1124 mov rax,rbx
1125 adc rdx,0
1126 add r10,r12
1127 mov r11,rdx
1128 adc r11,0
1129 mov QWORD PTR[((-8))+rbp*1+rdi],r10
1131 lea rcx,QWORD PTR[rbp]
1132 jmp $L$sqr4x_inner
1134 ALIGN 32
1135 $L$sqr4x_inner::
1136 mov rbx,QWORD PTR[rcx*1+rsi]
1137 mul r15
1138 add r13,rax
1139 mov rax,rbx
1140 mov r12,rdx
1141 adc r12,0
1142 add r13,QWORD PTR[rcx*1+rdi]
1143 adc r12,0
1145 DB 067h
1146 mul r14
1147 add r11,rax
1148 mov rax,rbx
1149 mov rbx,QWORD PTR[8+rcx*1+rsi]
1150 mov r10,rdx
1151 adc r10,0
1152 add r11,r13
1153 adc r10,0
1155 mul r15
1156 add r12,rax
1157 mov QWORD PTR[rcx*1+rdi],r11
1158 mov rax,rbx
1159 mov r13,rdx
1160 adc r13,0
1161 add r12,QWORD PTR[8+rcx*1+rdi]
1162 lea rcx,QWORD PTR[16+rcx]
1163 adc r13,0
1165 mul r14
1166 add r10,rax
1167 mov rax,rbx
1168 adc rdx,0
1169 add r10,r12
1170 mov r11,rdx
1171 adc r11,0
1172 mov QWORD PTR[((-8))+rcx*1+rdi],r10
1174 cmp rcx,0
1175 jne $L$sqr4x_inner
1177 DB 067h
1178 mul r15
1179 add r13,rax
1180 adc rdx,0
1181 add r13,r11
1182 adc rdx,0
1184 mov QWORD PTR[rdi],r13
1185 mov r12,rdx
1186 mov QWORD PTR[8+rdi],rdx
1188 add rbp,16
1189 jnz $L$sqr4x_outer
1192 mov r14,QWORD PTR[((-32))+rsi]
1193 lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
1194 mov rax,QWORD PTR[((-24))+rsi]
1195 lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
1196 mov rbx,QWORD PTR[((-16))+rsi]
1197 mov r15,rax
1199 mul r14
1200 add r10,rax
1201 mov rax,rbx
1202 mov r11,rdx
1203 adc r11,0
1205 mul r14
1206 add r11,rax
1207 mov rax,rbx
1208 mov QWORD PTR[((-24))+rdi],r10
1209 mov r10,rdx
1210 adc r10,0
1211 add r11,r13
1212 mov rbx,QWORD PTR[((-8))+rsi]
1213 adc r10,0
1215 mul r15
1216 add r12,rax
1217 mov rax,rbx
1218 mov QWORD PTR[((-16))+rdi],r11
1219 mov r13,rdx
1220 adc r13,0
1222 mul r14
1223 add r10,rax
1224 mov rax,rbx
1225 mov r11,rdx
1226 adc r11,0
1227 add r10,r12
1228 adc r11,0
1229 mov QWORD PTR[((-8))+rdi],r10
1231 mul r15
1232 add r13,rax
1233 mov rax,QWORD PTR[((-16))+rsi]
1234 adc rdx,0
1235 add r13,r11
1236 adc rdx,0
1238 mov QWORD PTR[rdi],r13
1239 mov r12,rdx
1240 mov QWORD PTR[8+rdi],rdx
1242 mul rbx
1243 add rbp,16
1244 xor r14,r14
1245 sub rbp,r9
1246 xor r15,r15
1248 add rax,r12
1249 adc rdx,0
1250 mov QWORD PTR[8+rdi],rax
1251 mov QWORD PTR[16+rdi],rdx
1252 mov QWORD PTR[24+rdi],r15
1254 mov rax,QWORD PTR[((-16))+rbp*1+rsi]
1255 lea rdi,QWORD PTR[((48+8))+rsp]
1256 xor r10,r10
1257 mov r11,QWORD PTR[8+rdi]
1259 lea r12,QWORD PTR[r10*2+r14]
1260 shr r10,63
1261 lea r13,QWORD PTR[r11*2+rcx]
1262 shr r11,63
1263 or r13,r10
1264 mov r10,QWORD PTR[16+rdi]
1265 mov r14,r11
1266 mul rax
1267 neg r15
1268 mov r11,QWORD PTR[24+rdi]
1269 adc r12,rax
1270 mov rax,QWORD PTR[((-8))+rbp*1+rsi]
1271 mov QWORD PTR[rdi],r12
1272 adc r13,rdx
1274 lea rbx,QWORD PTR[r10*2+r14]
1275 mov QWORD PTR[8+rdi],r13
1276 sbb r15,r15
1277 shr r10,63
1278 lea r8,QWORD PTR[r11*2+rcx]
1279 shr r11,63
1280 or r8,r10
1281 mov r10,QWORD PTR[32+rdi]
1282 mov r14,r11
1283 mul rax
1284 neg r15
1285 mov r11,QWORD PTR[40+rdi]
1286 adc rbx,rax
1287 mov rax,QWORD PTR[rbp*1+rsi]
1288 mov QWORD PTR[16+rdi],rbx
1289 adc r8,rdx
1290 lea rbp,QWORD PTR[16+rbp]
1291 mov QWORD PTR[24+rdi],r8
1292 sbb r15,r15
1293 lea rdi,QWORD PTR[64+rdi]
1294 jmp $L$sqr4x_shift_n_add
1296 ALIGN 32
1297 $L$sqr4x_shift_n_add::
1298 lea r12,QWORD PTR[r10*2+r14]
1299 shr r10,63
1300 lea r13,QWORD PTR[r11*2+rcx]
1301 shr r11,63
1302 or r13,r10
1303 mov r10,QWORD PTR[((-16))+rdi]
1304 mov r14,r11
1305 mul rax
1306 neg r15
1307 mov r11,QWORD PTR[((-8))+rdi]
1308 adc r12,rax
1309 mov rax,QWORD PTR[((-8))+rbp*1+rsi]
1310 mov QWORD PTR[((-32))+rdi],r12
1311 adc r13,rdx
1313 lea rbx,QWORD PTR[r10*2+r14]
1314 mov QWORD PTR[((-24))+rdi],r13
1315 sbb r15,r15
1316 shr r10,63
1317 lea r8,QWORD PTR[r11*2+rcx]
1318 shr r11,63
1319 or r8,r10
1320 mov r10,QWORD PTR[rdi]
1321 mov r14,r11
1322 mul rax
1323 neg r15
1324 mov r11,QWORD PTR[8+rdi]
1325 adc rbx,rax
1326 mov rax,QWORD PTR[rbp*1+rsi]
1327 mov QWORD PTR[((-16))+rdi],rbx
1328 adc r8,rdx
1330 lea r12,QWORD PTR[r10*2+r14]
1331 mov QWORD PTR[((-8))+rdi],r8
1332 sbb r15,r15
1333 shr r10,63
1334 lea r13,QWORD PTR[r11*2+rcx]
1335 shr r11,63
1336 or r13,r10
1337 mov r10,QWORD PTR[16+rdi]
1338 mov r14,r11
1339 mul rax
1340 neg r15
1341 mov r11,QWORD PTR[24+rdi]
1342 adc r12,rax
1343 mov rax,QWORD PTR[8+rbp*1+rsi]
1344 mov QWORD PTR[rdi],r12
1345 adc r13,rdx
1347 lea rbx,QWORD PTR[r10*2+r14]
1348 mov QWORD PTR[8+rdi],r13
1349 sbb r15,r15
1350 shr r10,63
1351 lea r8,QWORD PTR[r11*2+rcx]
1352 shr r11,63
1353 or r8,r10
1354 mov r10,QWORD PTR[32+rdi]
1355 mov r14,r11
1356 mul rax
1357 neg r15
1358 mov r11,QWORD PTR[40+rdi]
1359 adc rbx,rax
1360 mov rax,QWORD PTR[16+rbp*1+rsi]
1361 mov QWORD PTR[16+rdi],rbx
1362 adc r8,rdx
1363 mov QWORD PTR[24+rdi],r8
1364 sbb r15,r15
1365 lea rdi,QWORD PTR[64+rdi]
1366 add rbp,32
1367 jnz $L$sqr4x_shift_n_add
1369 lea r12,QWORD PTR[r10*2+r14]
1370 DB 067h
1371 shr r10,63
1372 lea r13,QWORD PTR[r11*2+rcx]
1373 shr r11,63
1374 or r13,r10
1375 mov r10,QWORD PTR[((-16))+rdi]
1376 mov r14,r11
1377 mul rax
1378 neg r15
1379 mov r11,QWORD PTR[((-8))+rdi]
1380 adc r12,rax
1381 mov rax,QWORD PTR[((-8))+rsi]
1382 mov QWORD PTR[((-32))+rdi],r12
1383 adc r13,rdx
1385 lea rbx,QWORD PTR[r10*2+r14]
1386 mov QWORD PTR[((-24))+rdi],r13
1387 sbb r15,r15
1388 shr r10,63
1389 lea r8,QWORD PTR[r11*2+rcx]
1390 shr r11,63
1391 or r8,r10
1392 mul rax
1393 neg r15
1394 adc rbx,rax
1395 adc r8,rdx
1396 mov QWORD PTR[((-16))+rdi],rbx
1397 mov QWORD PTR[((-8))+rdi],r8
1398 DB 102,72,15,126,213
1399 sqr8x_reduction::
1400 xor rax,rax
1401 lea rcx,QWORD PTR[r9*2+rbp]
1402 lea rdx,QWORD PTR[((48+8))+r9*2+rsp]
1403 mov QWORD PTR[((0+8))+rsp],rcx
1404 lea rdi,QWORD PTR[((48+8))+r9*1+rsp]
1405 mov QWORD PTR[((8+8))+rsp],rdx
1406 neg r9
1407 jmp $L$8x_reduction_loop
1409 ALIGN 32
1410 $L$8x_reduction_loop::
1411 lea rdi,QWORD PTR[r9*1+rdi]
1412 DB 066h
1413 mov rbx,QWORD PTR[rdi]
1414 mov r9,QWORD PTR[8+rdi]
1415 mov r10,QWORD PTR[16+rdi]
1416 mov r11,QWORD PTR[24+rdi]
1417 mov r12,QWORD PTR[32+rdi]
1418 mov r13,QWORD PTR[40+rdi]
1419 mov r14,QWORD PTR[48+rdi]
1420 mov r15,QWORD PTR[56+rdi]
1421 mov QWORD PTR[rdx],rax
1422 lea rdi,QWORD PTR[64+rdi]
1424 DB 067h
1425 mov r8,rbx
1426 imul rbx,QWORD PTR[((32+8))+rsp]
1427 mov rax,QWORD PTR[rbp]
1428 mov ecx,8
1429 jmp $L$8x_reduce
1431 ALIGN 32
1432 $L$8x_reduce::
1433 mul rbx
1434 mov rax,QWORD PTR[16+rbp]
1435 neg r8
1436 mov r8,rdx
1437 adc r8,0
1439 mul rbx
1440 add r9,rax
1441 mov rax,QWORD PTR[32+rbp]
1442 adc rdx,0
1443 add r8,r9
1444 mov QWORD PTR[((48-8+8))+rcx*8+rsp],rbx
1445 mov r9,rdx
1446 adc r9,0
1448 mul rbx
1449 add r10,rax
1450 mov rax,QWORD PTR[48+rbp]
1451 adc rdx,0
1452 add r9,r10
1453 mov rsi,QWORD PTR[((32+8))+rsp]
1454 mov r10,rdx
1455 adc r10,0
1457 mul rbx
1458 add r11,rax
1459 mov rax,QWORD PTR[64+rbp]
1460 adc rdx,0
1461 imul rsi,r8
1462 add r10,r11
1463 mov r11,rdx
1464 adc r11,0
1466 mul rbx
1467 add r12,rax
1468 mov rax,QWORD PTR[80+rbp]
1469 adc rdx,0
1470 add r11,r12
1471 mov r12,rdx
1472 adc r12,0
1474 mul rbx
1475 add r13,rax
1476 mov rax,QWORD PTR[96+rbp]
1477 adc rdx,0
1478 add r12,r13
1479 mov r13,rdx
1480 adc r13,0
1482 mul rbx
1483 add r14,rax
1484 mov rax,QWORD PTR[112+rbp]
1485 adc rdx,0
1486 add r13,r14
1487 mov r14,rdx
1488 adc r14,0
1490 mul rbx
1491 mov rbx,rsi
1492 add r15,rax
1493 mov rax,QWORD PTR[rbp]
1494 adc rdx,0
1495 add r14,r15
1496 mov r15,rdx
1497 adc r15,0
1499 dec ecx
1500 jnz $L$8x_reduce
1502 lea rbp,QWORD PTR[128+rbp]
1503 xor rax,rax
1504 mov rdx,QWORD PTR[((8+8))+rsp]
1505 cmp rbp,QWORD PTR[((0+8))+rsp]
1506 jae $L$8x_no_tail
1508 DB 066h
1509 add r8,QWORD PTR[rdi]
1510 adc r9,QWORD PTR[8+rdi]
1511 adc r10,QWORD PTR[16+rdi]
1512 adc r11,QWORD PTR[24+rdi]
1513 adc r12,QWORD PTR[32+rdi]
1514 adc r13,QWORD PTR[40+rdi]
1515 adc r14,QWORD PTR[48+rdi]
1516 adc r15,QWORD PTR[56+rdi]
1517 sbb rsi,rsi
1519 mov rbx,QWORD PTR[((48+56+8))+rsp]
1520 mov ecx,8
1521 mov rax,QWORD PTR[rbp]
1522 jmp $L$8x_tail
1524 ALIGN 32
1525 $L$8x_tail::
1526 mul rbx
1527 add r8,rax
1528 mov rax,QWORD PTR[16+rbp]
1529 mov QWORD PTR[rdi],r8
1530 mov r8,rdx
1531 adc r8,0
1533 mul rbx
1534 add r9,rax
1535 mov rax,QWORD PTR[32+rbp]
1536 adc rdx,0
1537 add r8,r9
1538 lea rdi,QWORD PTR[8+rdi]
1539 mov r9,rdx
1540 adc r9,0
1542 mul rbx
1543 add r10,rax
1544 mov rax,QWORD PTR[48+rbp]
1545 adc rdx,0
1546 add r9,r10
1547 mov r10,rdx
1548 adc r10,0
1550 mul rbx
1551 add r11,rax
1552 mov rax,QWORD PTR[64+rbp]
1553 adc rdx,0
1554 add r10,r11
1555 mov r11,rdx
1556 adc r11,0
1558 mul rbx
1559 add r12,rax
1560 mov rax,QWORD PTR[80+rbp]
1561 adc rdx,0
1562 add r11,r12
1563 mov r12,rdx
1564 adc r12,0
1566 mul rbx
1567 add r13,rax
1568 mov rax,QWORD PTR[96+rbp]
1569 adc rdx,0
1570 add r12,r13
1571 mov r13,rdx
1572 adc r13,0
1574 mul rbx
1575 add r14,rax
1576 mov rax,QWORD PTR[112+rbp]
1577 adc rdx,0
1578 add r13,r14
1579 mov r14,rdx
1580 adc r14,0
1582 mul rbx
1583 mov rbx,QWORD PTR[((48-16+8))+rcx*8+rsp]
1584 add r15,rax
1585 adc rdx,0
1586 add r14,r15
1587 mov rax,QWORD PTR[rbp]
1588 mov r15,rdx
1589 adc r15,0
1591 dec ecx
1592 jnz $L$8x_tail
1594 lea rbp,QWORD PTR[128+rbp]
1595 mov rdx,QWORD PTR[((8+8))+rsp]
1596 cmp rbp,QWORD PTR[((0+8))+rsp]
1597 jae $L$8x_tail_done
1599 mov rbx,QWORD PTR[((48+56+8))+rsp]
1600 neg rsi
1601 mov rax,QWORD PTR[rbp]
1602 adc r8,QWORD PTR[rdi]
1603 adc r9,QWORD PTR[8+rdi]
1604 adc r10,QWORD PTR[16+rdi]
1605 adc r11,QWORD PTR[24+rdi]
1606 adc r12,QWORD PTR[32+rdi]
1607 adc r13,QWORD PTR[40+rdi]
1608 adc r14,QWORD PTR[48+rdi]
1609 adc r15,QWORD PTR[56+rdi]
1610 sbb rsi,rsi
1612 mov ecx,8
1613 jmp $L$8x_tail
1615 ALIGN 32
1616 $L$8x_tail_done::
1617 add r8,QWORD PTR[rdx]
1618 xor rax,rax
1620 neg rsi
1621 $L$8x_no_tail::
1622 adc r8,QWORD PTR[rdi]
1623 adc r9,QWORD PTR[8+rdi]
1624 adc r10,QWORD PTR[16+rdi]
1625 adc r11,QWORD PTR[24+rdi]
1626 adc r12,QWORD PTR[32+rdi]
1627 adc r13,QWORD PTR[40+rdi]
1628 adc r14,QWORD PTR[48+rdi]
1629 adc r15,QWORD PTR[56+rdi]
1630 adc rax,0
1631 mov rcx,QWORD PTR[((-16))+rbp]
1632 xor rsi,rsi
1634 DB 102,72,15,126,213
1636 mov QWORD PTR[rdi],r8
1637 mov QWORD PTR[8+rdi],r9
1638 DB 102,73,15,126,217
1639 mov QWORD PTR[16+rdi],r10
1640 mov QWORD PTR[24+rdi],r11
1641 mov QWORD PTR[32+rdi],r12
1642 mov QWORD PTR[40+rdi],r13
1643 mov QWORD PTR[48+rdi],r14
1644 mov QWORD PTR[56+rdi],r15
1645 lea rdi,QWORD PTR[64+rdi]
1647 cmp rdi,rdx
1648 jb $L$8x_reduction_loop
1650 sub rcx,r15
1651 lea rbx,QWORD PTR[r9*1+rdi]
1652 adc rsi,rsi
1653 mov rcx,r9
1654 or rax,rsi
1655 DB 102,72,15,126,207
1656 xor rax,1
1657 DB 102,72,15,126,206
1658 lea rbp,QWORD PTR[rax*8+rbp]
1659 sar rcx,3+2
1660 jmp $L$sqr4x_sub
1662 ALIGN 32
1663 $L$sqr4x_sub::
1664 DB 066h
1665 mov r12,QWORD PTR[rbx]
1666 mov r13,QWORD PTR[8+rbx]
1667 sbb r12,QWORD PTR[rbp]
1668 mov r14,QWORD PTR[16+rbx]
1669 sbb r13,QWORD PTR[16+rbp]
1670 mov r15,QWORD PTR[24+rbx]
1671 lea rbx,QWORD PTR[32+rbx]
1672 sbb r14,QWORD PTR[32+rbp]
1673 mov QWORD PTR[rdi],r12
1674 sbb r15,QWORD PTR[48+rbp]
1675 lea rbp,QWORD PTR[64+rbp]
1676 mov QWORD PTR[8+rdi],r13
1677 mov QWORD PTR[16+rdi],r14
1678 mov QWORD PTR[24+rdi],r15
1679 lea rdi,QWORD PTR[32+rdi]
1681 inc rcx
1682 jnz $L$sqr4x_sub
1683 mov r10,r9
1684 neg r9
1685 DB 0F3h,0C3h ;repret
1686 bn_sqr8x_internal ENDP
1687 PUBLIC bn_from_montgomery
1689 ALIGN 32
1690 bn_from_montgomery PROC PUBLIC
1691 test DWORD PTR[48+rsp],7
1692 jz bn_from_mont8x
1693 xor eax,eax
1694 DB 0F3h,0C3h ;repret
1695 bn_from_montgomery ENDP
1698 ALIGN 32
1699 bn_from_mont8x PROC PRIVATE
1700 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
1701 mov QWORD PTR[16+rsp],rsi
1702 mov rax,rsp
1703 $L$SEH_begin_bn_from_mont8x::
1704 mov rdi,rcx
1705 mov rsi,rdx
1706 mov rdx,r8
1707 mov rcx,r9
1708 mov r8,QWORD PTR[40+rsp]
1709 mov r9,QWORD PTR[48+rsp]
1712 DB 067h
1713 mov rax,rsp
1714 push rbx
1715 push rbp
1716 push r12
1717 push r13
1718 push r14
1719 push r15
1720 lea rsp,QWORD PTR[((-40))+rsp]
1721 movaps XMMWORD PTR[rsp],xmm6
1722 movaps XMMWORD PTR[16+rsp],xmm7
1723 DB 067h
1724 mov r10d,r9d
1725 shl r9d,3
1726 shl r10d,3+2
1727 neg r9
1728 mov r8,QWORD PTR[r8]
1736 lea r11,QWORD PTR[((-64))+r9*2+rsp]
1737 sub r11,rsi
1738 and r11,4095
1739 cmp r10,r11
1740 jb $L$from_sp_alt
1741 sub rsp,r11
1742 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
1743 jmp $L$from_sp_done
1745 ALIGN 32
1746 $L$from_sp_alt::
1747 lea r10,QWORD PTR[((4096-64))+r9*2]
1748 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
1749 sub r11,r10
1750 mov r10,0
1751 cmovc r11,r10
1752 sub rsp,r11
1753 $L$from_sp_done::
1754 and rsp,-64
1755 mov r10,r9
1756 neg r9
1767 mov QWORD PTR[32+rsp],r8
1768 mov QWORD PTR[40+rsp],rax
1769 $L$from_body::
1770 mov r11,r9
1771 lea rax,QWORD PTR[48+rsp]
1772 pxor xmm0,xmm0
1773 jmp $L$mul_by_1
1775 ALIGN 32
1776 $L$mul_by_1::
1777 movdqu xmm1,XMMWORD PTR[rsi]
1778 movdqu xmm2,XMMWORD PTR[16+rsi]
1779 movdqu xmm3,XMMWORD PTR[32+rsi]
1780 movdqa XMMWORD PTR[r9*1+rax],xmm0
1781 movdqu xmm4,XMMWORD PTR[48+rsi]
1782 movdqa XMMWORD PTR[16+r9*1+rax],xmm0
1783 DB 048h,08dh,0b6h,040h,000h,000h,000h
1784 movdqa XMMWORD PTR[rax],xmm1
1785 movdqa XMMWORD PTR[32+r9*1+rax],xmm0
1786 movdqa XMMWORD PTR[16+rax],xmm2
1787 movdqa XMMWORD PTR[48+r9*1+rax],xmm0
1788 movdqa XMMWORD PTR[32+rax],xmm3
1789 movdqa XMMWORD PTR[48+rax],xmm4
1790 lea rax,QWORD PTR[64+rax]
1791 sub r11,64
1792 jnz $L$mul_by_1
1794 DB 102,72,15,110,207
1795 DB 102,72,15,110,209
1796 DB 067h
1797 mov rbp,rcx
1798 DB 102,73,15,110,218
1799 call sqr8x_reduction
1801 pxor xmm0,xmm0
1802 lea rax,QWORD PTR[48+rsp]
1803 mov rsi,QWORD PTR[40+rsp]
1804 jmp $L$from_mont_zero
1806 ALIGN 32
1807 $L$from_mont_zero::
1808 movdqa XMMWORD PTR[rax],xmm0
1809 movdqa XMMWORD PTR[16+rax],xmm0
1810 movdqa XMMWORD PTR[32+rax],xmm0
1811 movdqa XMMWORD PTR[48+rax],xmm0
1812 lea rax,QWORD PTR[64+rax]
1813 sub r9,32
1814 jnz $L$from_mont_zero
1816 mov rax,1
1817 mov r15,QWORD PTR[((-48))+rsi]
1818 mov r14,QWORD PTR[((-40))+rsi]
1819 mov r13,QWORD PTR[((-32))+rsi]
1820 mov r12,QWORD PTR[((-24))+rsi]
1821 mov rbp,QWORD PTR[((-16))+rsi]
1822 mov rbx,QWORD PTR[((-8))+rsi]
1823 lea rsp,QWORD PTR[rsi]
1824 $L$from_epilogue::
1825 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
1826 mov rsi,QWORD PTR[16+rsp]
1827 DB 0F3h,0C3h ;repret
1828 $L$SEH_end_bn_from_mont8x::
1829 bn_from_mont8x ENDP
1830 PUBLIC bn_scatter5
1832 ALIGN 16
1833 bn_scatter5 PROC PUBLIC
1834 cmp edx,0
1835 jz $L$scatter_epilogue
1836 lea r8,QWORD PTR[r9*8+r8]
1837 $L$scatter::
1838 mov rax,QWORD PTR[rcx]
1839 lea rcx,QWORD PTR[8+rcx]
1840 mov QWORD PTR[r8],rax
1841 lea r8,QWORD PTR[256+r8]
1842 sub edx,1
1843 jnz $L$scatter
1844 $L$scatter_epilogue::
1845 DB 0F3h,0C3h ;repret
1846 bn_scatter5 ENDP
1848 PUBLIC bn_gather5
1850 ALIGN 16
1851 bn_gather5 PROC PUBLIC
1852 $L$SEH_begin_bn_gather5::
1854 DB 048h,083h,0ech,028h
1855 DB 00fh,029h,034h,024h
1856 DB 00fh,029h,07ch,024h,010h
1857 mov r11d,r9d
1858 shr r9d,3
1859 and r11,7
1860 not r9d
1861 lea rax,QWORD PTR[$L$magic_masks]
1862 and r9d,3
1863 lea r8,QWORD PTR[128+r11*8+r8]
1864 movq xmm4,QWORD PTR[r9*8+rax]
1865 movq xmm5,QWORD PTR[8+r9*8+rax]
1866 movq xmm6,QWORD PTR[16+r9*8+rax]
1867 movq xmm7,QWORD PTR[24+r9*8+rax]
1868 jmp $L$gather
1869 ALIGN 16
1870 $L$gather::
1871 movq xmm0,QWORD PTR[(((-128)))+r8]
1872 movq xmm1,QWORD PTR[((-64))+r8]
1873 pand xmm0,xmm4
1874 movq xmm2,QWORD PTR[r8]
1875 pand xmm1,xmm5
1876 movq xmm3,QWORD PTR[64+r8]
1877 pand xmm2,xmm6
1878 por xmm0,xmm1
1879 pand xmm3,xmm7
1880 DB 067h,067h
1881 por xmm0,xmm2
1882 lea r8,QWORD PTR[256+r8]
1883 por xmm0,xmm3
1885 movq QWORD PTR[rcx],xmm0
1886 lea rcx,QWORD PTR[8+rcx]
1887 sub edx,1
1888 jnz $L$gather
1889 movaps xmm6,XMMWORD PTR[rsp]
1890 movaps xmm7,XMMWORD PTR[16+rsp]
1891 lea rsp,QWORD PTR[40+rsp]
1892 DB 0F3h,0C3h ;repret
1893 $L$SEH_end_bn_gather5::
1894 bn_gather5 ENDP
1895 ALIGN 64
1896 $L$magic_masks::
1897 DD 0,0,0,0,0,0,-1,-1
1898 DD 0,0,0,0,0,0,0,0
1899 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
1900 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
1901 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
1902 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
1903 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
1904 DB 112,101,110,115,115,108,46,111,114,103,62,0
1905 EXTERN __imp_RtlVirtualUnwind:NEAR
1907 ALIGN 16
1908 mul_handler PROC PRIVATE
1909 push rsi
1910 push rdi
1911 push rbx
1912 push rbp
1913 push r12
1914 push r13
1915 push r14
1916 push r15
1917 pushfq
1918 sub rsp,64
1920 mov rax,QWORD PTR[120+r8]
1921 mov rbx,QWORD PTR[248+r8]
1923 mov rsi,QWORD PTR[8+r9]
1924 mov r11,QWORD PTR[56+r9]
1926 mov r10d,DWORD PTR[r11]
1927 lea r10,QWORD PTR[r10*1+rsi]
1928 cmp rbx,r10
1929 jb $L$common_seh_tail
1931 mov rax,QWORD PTR[152+r8]
1933 mov r10d,DWORD PTR[4+r11]
1934 lea r10,QWORD PTR[r10*1+rsi]
1935 cmp rbx,r10
1936 jae $L$common_seh_tail
1938 lea r10,QWORD PTR[$L$mul_epilogue]
1939 cmp rbx,r10
1940 jb $L$body_40
1942 mov r10,QWORD PTR[192+r8]
1943 mov rax,QWORD PTR[8+r10*8+rax]
1944 jmp $L$body_proceed
1946 $L$body_40::
1947 mov rax,QWORD PTR[40+rax]
1948 $L$body_proceed::
1950 movaps xmm0,XMMWORD PTR[((-88))+rax]
1951 movaps xmm1,XMMWORD PTR[((-72))+rax]
1953 mov rbx,QWORD PTR[((-8))+rax]
1954 mov rbp,QWORD PTR[((-16))+rax]
1955 mov r12,QWORD PTR[((-24))+rax]
1956 mov r13,QWORD PTR[((-32))+rax]
1957 mov r14,QWORD PTR[((-40))+rax]
1958 mov r15,QWORD PTR[((-48))+rax]
1959 mov QWORD PTR[144+r8],rbx
1960 mov QWORD PTR[160+r8],rbp
1961 mov QWORD PTR[216+r8],r12
1962 mov QWORD PTR[224+r8],r13
1963 mov QWORD PTR[232+r8],r14
1964 mov QWORD PTR[240+r8],r15
1965 movups XMMWORD PTR[512+r8],xmm0
1966 movups XMMWORD PTR[528+r8],xmm1
1968 $L$common_seh_tail::
1969 mov rdi,QWORD PTR[8+rax]
1970 mov rsi,QWORD PTR[16+rax]
1971 mov QWORD PTR[152+r8],rax
1972 mov QWORD PTR[168+r8],rsi
1973 mov QWORD PTR[176+r8],rdi
1975 mov rdi,QWORD PTR[40+r9]
1976 mov rsi,r8
1977 mov ecx,154
1978 DD 0a548f3fch
1980 mov rsi,r9
1981 xor rcx,rcx
1982 mov rdx,QWORD PTR[8+rsi]
1983 mov r8,QWORD PTR[rsi]
1984 mov r9,QWORD PTR[16+rsi]
1985 mov r10,QWORD PTR[40+rsi]
1986 lea r11,QWORD PTR[56+rsi]
1987 lea r12,QWORD PTR[24+rsi]
1988 mov QWORD PTR[32+rsp],r10
1989 mov QWORD PTR[40+rsp],r11
1990 mov QWORD PTR[48+rsp],r12
1991 mov QWORD PTR[56+rsp],rcx
1992 call QWORD PTR[__imp_RtlVirtualUnwind]
1994 mov eax,1
1995 add rsp,64
1996 popfq
1997 pop r15
1998 pop r14
1999 pop r13
2000 pop r12
2001 pop rbp
2002 pop rbx
2003 pop rdi
2004 pop rsi
2005 DB 0F3h,0C3h ;repret
2006 mul_handler ENDP
2008 .text$ ENDS
2009 .pdata SEGMENT READONLY ALIGN(4)
2010 ALIGN 4
2011 DD imagerel $L$SEH_begin_bn_mul_mont_gather5
2012 DD imagerel $L$SEH_end_bn_mul_mont_gather5
2013 DD imagerel $L$SEH_info_bn_mul_mont_gather5
2015 DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5
2016 DD imagerel $L$SEH_end_bn_mul4x_mont_gather5
2017 DD imagerel $L$SEH_info_bn_mul4x_mont_gather5
2019 DD imagerel $L$SEH_begin_bn_power5
2020 DD imagerel $L$SEH_end_bn_power5
2021 DD imagerel $L$SEH_info_bn_power5
2023 DD imagerel $L$SEH_begin_bn_from_mont8x
2024 DD imagerel $L$SEH_end_bn_from_mont8x
2025 DD imagerel $L$SEH_info_bn_from_mont8x
2026 DD imagerel $L$SEH_begin_bn_gather5
2027 DD imagerel $L$SEH_end_bn_gather5
2028 DD imagerel $L$SEH_info_bn_gather5
2030 .pdata ENDS
2031 .xdata SEGMENT READONLY ALIGN(8)
2032 ALIGN 8
2033 $L$SEH_info_bn_mul_mont_gather5::
2034 DB 9,0,0,0
2035 DD imagerel mul_handler
2036 DD imagerel $L$mul_body,imagerel $L$mul_epilogue
2037 ALIGN 8
2038 $L$SEH_info_bn_mul4x_mont_gather5::
2039 DB 9,0,0,0
2040 DD imagerel mul_handler
2041 DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
2042 ALIGN 8
2043 $L$SEH_info_bn_power5::
2044 DB 9,0,0,0
2045 DD imagerel mul_handler
2046 DD imagerel $L$power5_body,imagerel $L$power5_epilogue
2047 ALIGN 8
2048 $L$SEH_info_bn_from_mont8x::
2049 DB 9,0,0,0
2050 DD imagerel mul_handler
2051 DD imagerel $L$from_body,imagerel $L$from_epilogue
2052 ALIGN 8
2053 $L$SEH_info_bn_gather5::
2054 DB 001h,00dh,005h,000h
2055 DB 00dh,078h,001h,000h
2056 DB 008h,068h,000h,000h
2057 DB 004h,042h,000h,000h
2058 ALIGN 8
2060 .xdata ENDS