Roll src/third_party/WebKit a3b4a2e:7441784 (svn 202551:202552)
[chromium-blink-merge.git] / third_party / boringssl / win-x86_64 / crypto / bn / rsaz-x86_64.asm
blob04d5e3915afb406c19996105d6e315417f4f2ca5
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
8 EXTERN OPENSSL_ia32cap_P
10 global rsaz_512_sqr
12 ALIGN 32
13 rsaz_512_sqr:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17 $L$SEH_begin_rsaz_512_sqr:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
22 mov r8,QWORD[40+rsp]
25 push rbx
26 push rbp
27 push r12
28 push r13
29 push r14
30 push r15
32 sub rsp,128+24
33 $L$sqr_body:
34 mov rbp,rdx
35 mov rdx,QWORD[rsi]
36 mov rax,QWORD[8+rsi]
37 mov QWORD[128+rsp],rcx
38 jmp NEAR $L$oop_sqr
40 ALIGN 32
41 $L$oop_sqr:
42 mov DWORD[((128+8))+rsp],r8d
44 mov rbx,rdx
45 mul rdx
46 mov r8,rax
47 mov rax,QWORD[16+rsi]
48 mov r9,rdx
50 mul rbx
51 add r9,rax
52 mov rax,QWORD[24+rsi]
53 mov r10,rdx
54 adc r10,0
56 mul rbx
57 add r10,rax
58 mov rax,QWORD[32+rsi]
59 mov r11,rdx
60 adc r11,0
62 mul rbx
63 add r11,rax
64 mov rax,QWORD[40+rsi]
65 mov r12,rdx
66 adc r12,0
68 mul rbx
69 add r12,rax
70 mov rax,QWORD[48+rsi]
71 mov r13,rdx
72 adc r13,0
74 mul rbx
75 add r13,rax
76 mov rax,QWORD[56+rsi]
77 mov r14,rdx
78 adc r14,0
80 mul rbx
81 add r14,rax
82 mov rax,rbx
83 mov r15,rdx
84 adc r15,0
86 add r8,r8
87 mov rcx,r9
88 adc r9,r9
90 mul rax
91 mov QWORD[rsp],rax
92 add r8,rdx
93 adc r9,0
95 mov QWORD[8+rsp],r8
96 shr rcx,63
99 mov r8,QWORD[8+rsi]
100 mov rax,QWORD[16+rsi]
101 mul r8
102 add r10,rax
103 mov rax,QWORD[24+rsi]
104 mov rbx,rdx
105 adc rbx,0
107 mul r8
108 add r11,rax
109 mov rax,QWORD[32+rsi]
110 adc rdx,0
111 add r11,rbx
112 mov rbx,rdx
113 adc rbx,0
115 mul r8
116 add r12,rax
117 mov rax,QWORD[40+rsi]
118 adc rdx,0
119 add r12,rbx
120 mov rbx,rdx
121 adc rbx,0
123 mul r8
124 add r13,rax
125 mov rax,QWORD[48+rsi]
126 adc rdx,0
127 add r13,rbx
128 mov rbx,rdx
129 adc rbx,0
131 mul r8
132 add r14,rax
133 mov rax,QWORD[56+rsi]
134 adc rdx,0
135 add r14,rbx
136 mov rbx,rdx
137 adc rbx,0
139 mul r8
140 add r15,rax
141 mov rax,r8
142 adc rdx,0
143 add r15,rbx
144 mov r8,rdx
145 mov rdx,r10
146 adc r8,0
148 add rdx,rdx
149 lea r10,[r10*2+rcx]
150 mov rbx,r11
151 adc r11,r11
153 mul rax
154 add r9,rax
155 adc r10,rdx
156 adc r11,0
158 mov QWORD[16+rsp],r9
159 mov QWORD[24+rsp],r10
160 shr rbx,63
163 mov r9,QWORD[16+rsi]
164 mov rax,QWORD[24+rsi]
165 mul r9
166 add r12,rax
167 mov rax,QWORD[32+rsi]
168 mov rcx,rdx
169 adc rcx,0
171 mul r9
172 add r13,rax
173 mov rax,QWORD[40+rsi]
174 adc rdx,0
175 add r13,rcx
176 mov rcx,rdx
177 adc rcx,0
179 mul r9
180 add r14,rax
181 mov rax,QWORD[48+rsi]
182 adc rdx,0
183 add r14,rcx
184 mov rcx,rdx
185 adc rcx,0
187 mul r9
188 mov r10,r12
189 lea r12,[r12*2+rbx]
190 add r15,rax
191 mov rax,QWORD[56+rsi]
192 adc rdx,0
193 add r15,rcx
194 mov rcx,rdx
195 adc rcx,0
197 mul r9
198 shr r10,63
199 add r8,rax
200 mov rax,r9
201 adc rdx,0
202 add r8,rcx
203 mov r9,rdx
204 adc r9,0
206 mov rcx,r13
207 lea r13,[r13*2+r10]
209 mul rax
210 add r11,rax
211 adc r12,rdx
212 adc r13,0
214 mov QWORD[32+rsp],r11
215 mov QWORD[40+rsp],r12
216 shr rcx,63
219 mov r10,QWORD[24+rsi]
220 mov rax,QWORD[32+rsi]
221 mul r10
222 add r14,rax
223 mov rax,QWORD[40+rsi]
224 mov rbx,rdx
225 adc rbx,0
227 mul r10
228 add r15,rax
229 mov rax,QWORD[48+rsi]
230 adc rdx,0
231 add r15,rbx
232 mov rbx,rdx
233 adc rbx,0
235 mul r10
236 mov r12,r14
237 lea r14,[r14*2+rcx]
238 add r8,rax
239 mov rax,QWORD[56+rsi]
240 adc rdx,0
241 add r8,rbx
242 mov rbx,rdx
243 adc rbx,0
245 mul r10
246 shr r12,63
247 add r9,rax
248 mov rax,r10
249 adc rdx,0
250 add r9,rbx
251 mov r10,rdx
252 adc r10,0
254 mov rbx,r15
255 lea r15,[r15*2+r12]
257 mul rax
258 add r13,rax
259 adc r14,rdx
260 adc r15,0
262 mov QWORD[48+rsp],r13
263 mov QWORD[56+rsp],r14
264 shr rbx,63
267 mov r11,QWORD[32+rsi]
268 mov rax,QWORD[40+rsi]
269 mul r11
270 add r8,rax
271 mov rax,QWORD[48+rsi]
272 mov rcx,rdx
273 adc rcx,0
275 mul r11
276 add r9,rax
277 mov rax,QWORD[56+rsi]
278 adc rdx,0
279 mov r12,r8
280 lea r8,[r8*2+rbx]
281 add r9,rcx
282 mov rcx,rdx
283 adc rcx,0
285 mul r11
286 shr r12,63
287 add r10,rax
288 mov rax,r11
289 adc rdx,0
290 add r10,rcx
291 mov r11,rdx
292 adc r11,0
294 mov rcx,r9
295 lea r9,[r9*2+r12]
297 mul rax
298 add r15,rax
299 adc r8,rdx
300 adc r9,0
302 mov QWORD[64+rsp],r15
303 mov QWORD[72+rsp],r8
304 shr rcx,63
307 mov r12,QWORD[40+rsi]
308 mov rax,QWORD[48+rsi]
309 mul r12
310 add r10,rax
311 mov rax,QWORD[56+rsi]
312 mov rbx,rdx
313 adc rbx,0
315 mul r12
316 add r11,rax
317 mov rax,r12
318 mov r15,r10
319 lea r10,[r10*2+rcx]
320 adc rdx,0
321 shr r15,63
322 add r11,rbx
323 mov r12,rdx
324 adc r12,0
326 mov rbx,r11
327 lea r11,[r11*2+r15]
329 mul rax
330 add r9,rax
331 adc r10,rdx
332 adc r11,0
334 mov QWORD[80+rsp],r9
335 mov QWORD[88+rsp],r10
338 mov r13,QWORD[48+rsi]
339 mov rax,QWORD[56+rsi]
340 mul r13
341 add r12,rax
342 mov rax,r13
343 mov r13,rdx
344 adc r13,0
346 xor r14,r14
347 shl rbx,1
348 adc r12,r12
349 adc r13,r13
350 adc r14,r14
352 mul rax
353 add r11,rax
354 adc r12,rdx
355 adc r13,0
357 mov QWORD[96+rsp],r11
358 mov QWORD[104+rsp],r12
361 mov rax,QWORD[56+rsi]
362 mul rax
363 add r13,rax
364 adc rdx,0
366 add r14,rdx
368 mov QWORD[112+rsp],r13
369 mov QWORD[120+rsp],r14
371 mov r8,QWORD[rsp]
372 mov r9,QWORD[8+rsp]
373 mov r10,QWORD[16+rsp]
374 mov r11,QWORD[24+rsp]
375 mov r12,QWORD[32+rsp]
376 mov r13,QWORD[40+rsp]
377 mov r14,QWORD[48+rsp]
378 mov r15,QWORD[56+rsp]
380 call __rsaz_512_reduce
382 add r8,QWORD[64+rsp]
383 adc r9,QWORD[72+rsp]
384 adc r10,QWORD[80+rsp]
385 adc r11,QWORD[88+rsp]
386 adc r12,QWORD[96+rsp]
387 adc r13,QWORD[104+rsp]
388 adc r14,QWORD[112+rsp]
389 adc r15,QWORD[120+rsp]
390 sbb rcx,rcx
392 call __rsaz_512_subtract
394 mov rdx,r8
395 mov rax,r9
396 mov r8d,DWORD[((128+8))+rsp]
397 mov rsi,rdi
399 dec r8d
400 jnz NEAR $L$oop_sqr
402 lea rax,[((128+24+48))+rsp]
403 mov r15,QWORD[((-48))+rax]
404 mov r14,QWORD[((-40))+rax]
405 mov r13,QWORD[((-32))+rax]
406 mov r12,QWORD[((-24))+rax]
407 mov rbp,QWORD[((-16))+rax]
408 mov rbx,QWORD[((-8))+rax]
409 lea rsp,[rax]
410 $L$sqr_epilogue:
411 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
412 mov rsi,QWORD[16+rsp]
413 DB 0F3h,0C3h ;repret
414 $L$SEH_end_rsaz_512_sqr:
415 global rsaz_512_mul
417 ALIGN 32
418 rsaz_512_mul:
419 mov QWORD[8+rsp],rdi ;WIN64 prologue
420 mov QWORD[16+rsp],rsi
421 mov rax,rsp
422 $L$SEH_begin_rsaz_512_mul:
423 mov rdi,rcx
424 mov rsi,rdx
425 mov rdx,r8
426 mov rcx,r9
427 mov r8,QWORD[40+rsp]
430 push rbx
431 push rbp
432 push r12
433 push r13
434 push r14
435 push r15
437 sub rsp,128+24
438 $L$mul_body:
439 DB 102,72,15,110,199
440 DB 102,72,15,110,201
441 mov QWORD[128+rsp],r8
442 mov rbx,QWORD[rdx]
443 mov rbp,rdx
444 call __rsaz_512_mul
446 DB 102,72,15,126,199
447 DB 102,72,15,126,205
449 mov r8,QWORD[rsp]
450 mov r9,QWORD[8+rsp]
451 mov r10,QWORD[16+rsp]
452 mov r11,QWORD[24+rsp]
453 mov r12,QWORD[32+rsp]
454 mov r13,QWORD[40+rsp]
455 mov r14,QWORD[48+rsp]
456 mov r15,QWORD[56+rsp]
458 call __rsaz_512_reduce
459 add r8,QWORD[64+rsp]
460 adc r9,QWORD[72+rsp]
461 adc r10,QWORD[80+rsp]
462 adc r11,QWORD[88+rsp]
463 adc r12,QWORD[96+rsp]
464 adc r13,QWORD[104+rsp]
465 adc r14,QWORD[112+rsp]
466 adc r15,QWORD[120+rsp]
467 sbb rcx,rcx
469 call __rsaz_512_subtract
471 lea rax,[((128+24+48))+rsp]
472 mov r15,QWORD[((-48))+rax]
473 mov r14,QWORD[((-40))+rax]
474 mov r13,QWORD[((-32))+rax]
475 mov r12,QWORD[((-24))+rax]
476 mov rbp,QWORD[((-16))+rax]
477 mov rbx,QWORD[((-8))+rax]
478 lea rsp,[rax]
479 $L$mul_epilogue:
480 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
481 mov rsi,QWORD[16+rsp]
482 DB 0F3h,0C3h ;repret
483 $L$SEH_end_rsaz_512_mul:
484 global rsaz_512_mul_gather4
486 ALIGN 32
487 rsaz_512_mul_gather4:
488 mov QWORD[8+rsp],rdi ;WIN64 prologue
489 mov QWORD[16+rsp],rsi
490 mov rax,rsp
491 $L$SEH_begin_rsaz_512_mul_gather4:
492 mov rdi,rcx
493 mov rsi,rdx
494 mov rdx,r8
495 mov rcx,r9
496 mov r8,QWORD[40+rsp]
497 mov r9,QWORD[48+rsp]
500 push rbx
501 push rbp
502 push r12
503 push r13
504 push r14
505 push r15
507 mov r9d,r9d
508 sub rsp,128+24
509 $L$mul_gather4_body:
510 mov eax,DWORD[64+r9*4+rdx]
511 DB 102,72,15,110,199
512 mov ebx,DWORD[r9*4+rdx]
513 DB 102,72,15,110,201
514 mov QWORD[128+rsp],r8
516 shl rax,32
517 or rbx,rax
518 mov rax,QWORD[rsi]
519 mov rcx,QWORD[8+rsi]
520 lea rbp,[128+r9*4+rdx]
521 mul rbx
522 mov QWORD[rsp],rax
523 mov rax,rcx
524 mov r8,rdx
526 mul rbx
527 movd xmm4,DWORD[rbp]
528 add r8,rax
529 mov rax,QWORD[16+rsi]
530 mov r9,rdx
531 adc r9,0
533 mul rbx
534 movd xmm5,DWORD[64+rbp]
535 add r9,rax
536 mov rax,QWORD[24+rsi]
537 mov r10,rdx
538 adc r10,0
540 mul rbx
541 pslldq xmm5,4
542 add r10,rax
543 mov rax,QWORD[32+rsi]
544 mov r11,rdx
545 adc r11,0
547 mul rbx
548 por xmm4,xmm5
549 add r11,rax
550 mov rax,QWORD[40+rsi]
551 mov r12,rdx
552 adc r12,0
554 mul rbx
555 add r12,rax
556 mov rax,QWORD[48+rsi]
557 mov r13,rdx
558 adc r13,0
560 mul rbx
561 lea rbp,[128+rbp]
562 add r13,rax
563 mov rax,QWORD[56+rsi]
564 mov r14,rdx
565 adc r14,0
567 mul rbx
568 DB 102,72,15,126,227
569 add r14,rax
570 mov rax,QWORD[rsi]
571 mov r15,rdx
572 adc r15,0
574 lea rdi,[8+rsp]
575 mov ecx,7
576 jmp NEAR $L$oop_mul_gather
578 ALIGN 32
579 $L$oop_mul_gather:
580 mul rbx
581 add r8,rax
582 mov rax,QWORD[8+rsi]
583 mov QWORD[rdi],r8
584 mov r8,rdx
585 adc r8,0
587 mul rbx
588 movd xmm4,DWORD[rbp]
589 add r9,rax
590 mov rax,QWORD[16+rsi]
591 adc rdx,0
592 add r8,r9
593 mov r9,rdx
594 adc r9,0
596 mul rbx
597 movd xmm5,DWORD[64+rbp]
598 add r10,rax
599 mov rax,QWORD[24+rsi]
600 adc rdx,0
601 add r9,r10
602 mov r10,rdx
603 adc r10,0
605 mul rbx
606 pslldq xmm5,4
607 add r11,rax
608 mov rax,QWORD[32+rsi]
609 adc rdx,0
610 add r10,r11
611 mov r11,rdx
612 adc r11,0
614 mul rbx
615 por xmm4,xmm5
616 add r12,rax
617 mov rax,QWORD[40+rsi]
618 adc rdx,0
619 add r11,r12
620 mov r12,rdx
621 adc r12,0
623 mul rbx
624 add r13,rax
625 mov rax,QWORD[48+rsi]
626 adc rdx,0
627 add r12,r13
628 mov r13,rdx
629 adc r13,0
631 mul rbx
632 add r14,rax
633 mov rax,QWORD[56+rsi]
634 adc rdx,0
635 add r13,r14
636 mov r14,rdx
637 adc r14,0
639 mul rbx
640 DB 102,72,15,126,227
641 add r15,rax
642 mov rax,QWORD[rsi]
643 adc rdx,0
644 add r14,r15
645 mov r15,rdx
646 adc r15,0
648 lea rbp,[128+rbp]
649 lea rdi,[8+rdi]
651 dec ecx
652 jnz NEAR $L$oop_mul_gather
654 mov QWORD[rdi],r8
655 mov QWORD[8+rdi],r9
656 mov QWORD[16+rdi],r10
657 mov QWORD[24+rdi],r11
658 mov QWORD[32+rdi],r12
659 mov QWORD[40+rdi],r13
660 mov QWORD[48+rdi],r14
661 mov QWORD[56+rdi],r15
663 DB 102,72,15,126,199
664 DB 102,72,15,126,205
666 mov r8,QWORD[rsp]
667 mov r9,QWORD[8+rsp]
668 mov r10,QWORD[16+rsp]
669 mov r11,QWORD[24+rsp]
670 mov r12,QWORD[32+rsp]
671 mov r13,QWORD[40+rsp]
672 mov r14,QWORD[48+rsp]
673 mov r15,QWORD[56+rsp]
675 call __rsaz_512_reduce
676 add r8,QWORD[64+rsp]
677 adc r9,QWORD[72+rsp]
678 adc r10,QWORD[80+rsp]
679 adc r11,QWORD[88+rsp]
680 adc r12,QWORD[96+rsp]
681 adc r13,QWORD[104+rsp]
682 adc r14,QWORD[112+rsp]
683 adc r15,QWORD[120+rsp]
684 sbb rcx,rcx
686 call __rsaz_512_subtract
688 lea rax,[((128+24+48))+rsp]
689 mov r15,QWORD[((-48))+rax]
690 mov r14,QWORD[((-40))+rax]
691 mov r13,QWORD[((-32))+rax]
692 mov r12,QWORD[((-24))+rax]
693 mov rbp,QWORD[((-16))+rax]
694 mov rbx,QWORD[((-8))+rax]
695 lea rsp,[rax]
696 $L$mul_gather4_epilogue:
697 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
698 mov rsi,QWORD[16+rsp]
699 DB 0F3h,0C3h ;repret
700 $L$SEH_end_rsaz_512_mul_gather4:
701 global rsaz_512_mul_scatter4
703 ALIGN 32
704 rsaz_512_mul_scatter4:
705 mov QWORD[8+rsp],rdi ;WIN64 prologue
706 mov QWORD[16+rsp],rsi
707 mov rax,rsp
708 $L$SEH_begin_rsaz_512_mul_scatter4:
709 mov rdi,rcx
710 mov rsi,rdx
711 mov rdx,r8
712 mov rcx,r9
713 mov r8,QWORD[40+rsp]
714 mov r9,QWORD[48+rsp]
717 push rbx
718 push rbp
719 push r12
720 push r13
721 push r14
722 push r15
724 mov r9d,r9d
725 sub rsp,128+24
726 $L$mul_scatter4_body:
727 lea r8,[r9*4+r8]
728 DB 102,72,15,110,199
729 DB 102,72,15,110,202
730 DB 102,73,15,110,208
731 mov QWORD[128+rsp],rcx
733 mov rbp,rdi
734 mov rbx,QWORD[rdi]
735 call __rsaz_512_mul
737 DB 102,72,15,126,199
738 DB 102,72,15,126,205
740 mov r8,QWORD[rsp]
741 mov r9,QWORD[8+rsp]
742 mov r10,QWORD[16+rsp]
743 mov r11,QWORD[24+rsp]
744 mov r12,QWORD[32+rsp]
745 mov r13,QWORD[40+rsp]
746 mov r14,QWORD[48+rsp]
747 mov r15,QWORD[56+rsp]
749 call __rsaz_512_reduce
750 add r8,QWORD[64+rsp]
751 adc r9,QWORD[72+rsp]
752 adc r10,QWORD[80+rsp]
753 adc r11,QWORD[88+rsp]
754 adc r12,QWORD[96+rsp]
755 adc r13,QWORD[104+rsp]
756 adc r14,QWORD[112+rsp]
757 adc r15,QWORD[120+rsp]
758 DB 102,72,15,126,214
759 sbb rcx,rcx
761 call __rsaz_512_subtract
763 mov DWORD[rsi],r8d
764 shr r8,32
765 mov DWORD[128+rsi],r9d
766 shr r9,32
767 mov DWORD[256+rsi],r10d
768 shr r10,32
769 mov DWORD[384+rsi],r11d
770 shr r11,32
771 mov DWORD[512+rsi],r12d
772 shr r12,32
773 mov DWORD[640+rsi],r13d
774 shr r13,32
775 mov DWORD[768+rsi],r14d
776 shr r14,32
777 mov DWORD[896+rsi],r15d
778 shr r15,32
779 mov DWORD[64+rsi],r8d
780 mov DWORD[192+rsi],r9d
781 mov DWORD[320+rsi],r10d
782 mov DWORD[448+rsi],r11d
783 mov DWORD[576+rsi],r12d
784 mov DWORD[704+rsi],r13d
785 mov DWORD[832+rsi],r14d
786 mov DWORD[960+rsi],r15d
788 lea rax,[((128+24+48))+rsp]
789 mov r15,QWORD[((-48))+rax]
790 mov r14,QWORD[((-40))+rax]
791 mov r13,QWORD[((-32))+rax]
792 mov r12,QWORD[((-24))+rax]
793 mov rbp,QWORD[((-16))+rax]
794 mov rbx,QWORD[((-8))+rax]
795 lea rsp,[rax]
796 $L$mul_scatter4_epilogue:
797 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
798 mov rsi,QWORD[16+rsp]
799 DB 0F3h,0C3h ;repret
800 $L$SEH_end_rsaz_512_mul_scatter4:
801 global rsaz_512_mul_by_one
803 ALIGN 32
804 rsaz_512_mul_by_one:
805 mov QWORD[8+rsp],rdi ;WIN64 prologue
806 mov QWORD[16+rsp],rsi
807 mov rax,rsp
808 $L$SEH_begin_rsaz_512_mul_by_one:
809 mov rdi,rcx
810 mov rsi,rdx
811 mov rdx,r8
812 mov rcx,r9
815 push rbx
816 push rbp
817 push r12
818 push r13
819 push r14
820 push r15
822 sub rsp,128+24
823 $L$mul_by_one_body:
824 mov rbp,rdx
825 mov QWORD[128+rsp],rcx
827 mov r8,QWORD[rsi]
828 pxor xmm0,xmm0
829 mov r9,QWORD[8+rsi]
830 mov r10,QWORD[16+rsi]
831 mov r11,QWORD[24+rsi]
832 mov r12,QWORD[32+rsi]
833 mov r13,QWORD[40+rsi]
834 mov r14,QWORD[48+rsi]
835 mov r15,QWORD[56+rsi]
837 movdqa XMMWORD[rsp],xmm0
838 movdqa XMMWORD[16+rsp],xmm0
839 movdqa XMMWORD[32+rsp],xmm0
840 movdqa XMMWORD[48+rsp],xmm0
841 movdqa XMMWORD[64+rsp],xmm0
842 movdqa XMMWORD[80+rsp],xmm0
843 movdqa XMMWORD[96+rsp],xmm0
844 call __rsaz_512_reduce
845 mov QWORD[rdi],r8
846 mov QWORD[8+rdi],r9
847 mov QWORD[16+rdi],r10
848 mov QWORD[24+rdi],r11
849 mov QWORD[32+rdi],r12
850 mov QWORD[40+rdi],r13
851 mov QWORD[48+rdi],r14
852 mov QWORD[56+rdi],r15
854 lea rax,[((128+24+48))+rsp]
855 mov r15,QWORD[((-48))+rax]
856 mov r14,QWORD[((-40))+rax]
857 mov r13,QWORD[((-32))+rax]
858 mov r12,QWORD[((-24))+rax]
859 mov rbp,QWORD[((-16))+rax]
860 mov rbx,QWORD[((-8))+rax]
861 lea rsp,[rax]
862 $L$mul_by_one_epilogue:
863 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
864 mov rsi,QWORD[16+rsp]
865 DB 0F3h,0C3h ;repret
866 $L$SEH_end_rsaz_512_mul_by_one:
868 ALIGN 32
869 __rsaz_512_reduce:
870 mov rbx,r8
871 imul rbx,QWORD[((128+8))+rsp]
872 mov rax,QWORD[rbp]
873 mov ecx,8
874 jmp NEAR $L$reduction_loop
876 ALIGN 32
877 $L$reduction_loop:
878 mul rbx
879 mov rax,QWORD[8+rbp]
880 neg r8
881 mov r8,rdx
882 adc r8,0
884 mul rbx
885 add r9,rax
886 mov rax,QWORD[16+rbp]
887 adc rdx,0
888 add r8,r9
889 mov r9,rdx
890 adc r9,0
892 mul rbx
893 add r10,rax
894 mov rax,QWORD[24+rbp]
895 adc rdx,0
896 add r9,r10
897 mov r10,rdx
898 adc r10,0
900 mul rbx
901 add r11,rax
902 mov rax,QWORD[32+rbp]
903 adc rdx,0
904 add r10,r11
905 mov rsi,QWORD[((128+8))+rsp]
908 adc rdx,0
909 mov r11,rdx
911 mul rbx
912 add r12,rax
913 mov rax,QWORD[40+rbp]
914 adc rdx,0
915 imul rsi,r8
916 add r11,r12
917 mov r12,rdx
918 adc r12,0
920 mul rbx
921 add r13,rax
922 mov rax,QWORD[48+rbp]
923 adc rdx,0
924 add r12,r13
925 mov r13,rdx
926 adc r13,0
928 mul rbx
929 add r14,rax
930 mov rax,QWORD[56+rbp]
931 adc rdx,0
932 add r13,r14
933 mov r14,rdx
934 adc r14,0
936 mul rbx
937 mov rbx,rsi
938 add r15,rax
939 mov rax,QWORD[rbp]
940 adc rdx,0
941 add r14,r15
942 mov r15,rdx
943 adc r15,0
945 dec ecx
946 jne NEAR $L$reduction_loop
948 DB 0F3h,0C3h ;repret
951 ALIGN 32
952 __rsaz_512_subtract:
953 mov QWORD[rdi],r8
954 mov QWORD[8+rdi],r9
955 mov QWORD[16+rdi],r10
956 mov QWORD[24+rdi],r11
957 mov QWORD[32+rdi],r12
958 mov QWORD[40+rdi],r13
959 mov QWORD[48+rdi],r14
960 mov QWORD[56+rdi],r15
962 mov r8,QWORD[rbp]
963 mov r9,QWORD[8+rbp]
964 neg r8
965 not r9
966 and r8,rcx
967 mov r10,QWORD[16+rbp]
968 and r9,rcx
969 not r10
970 mov r11,QWORD[24+rbp]
971 and r10,rcx
972 not r11
973 mov r12,QWORD[32+rbp]
974 and r11,rcx
975 not r12
976 mov r13,QWORD[40+rbp]
977 and r12,rcx
978 not r13
979 mov r14,QWORD[48+rbp]
980 and r13,rcx
981 not r14
982 mov r15,QWORD[56+rbp]
983 and r14,rcx
984 not r15
985 and r15,rcx
987 add r8,QWORD[rdi]
988 adc r9,QWORD[8+rdi]
989 adc r10,QWORD[16+rdi]
990 adc r11,QWORD[24+rdi]
991 adc r12,QWORD[32+rdi]
992 adc r13,QWORD[40+rdi]
993 adc r14,QWORD[48+rdi]
994 adc r15,QWORD[56+rdi]
996 mov QWORD[rdi],r8
997 mov QWORD[8+rdi],r9
998 mov QWORD[16+rdi],r10
999 mov QWORD[24+rdi],r11
1000 mov QWORD[32+rdi],r12
1001 mov QWORD[40+rdi],r13
1002 mov QWORD[48+rdi],r14
1003 mov QWORD[56+rdi],r15
1005 DB 0F3h,0C3h ;repret
1008 ALIGN 32
1009 __rsaz_512_mul:
1010 lea rdi,[8+rsp]
1012 mov rax,QWORD[rsi]
1013 mul rbx
1014 mov QWORD[rdi],rax
1015 mov rax,QWORD[8+rsi]
1016 mov r8,rdx
1018 mul rbx
1019 add r8,rax
1020 mov rax,QWORD[16+rsi]
1021 mov r9,rdx
1022 adc r9,0
1024 mul rbx
1025 add r9,rax
1026 mov rax,QWORD[24+rsi]
1027 mov r10,rdx
1028 adc r10,0
1030 mul rbx
1031 add r10,rax
1032 mov rax,QWORD[32+rsi]
1033 mov r11,rdx
1034 adc r11,0
1036 mul rbx
1037 add r11,rax
1038 mov rax,QWORD[40+rsi]
1039 mov r12,rdx
1040 adc r12,0
1042 mul rbx
1043 add r12,rax
1044 mov rax,QWORD[48+rsi]
1045 mov r13,rdx
1046 adc r13,0
1048 mul rbx
1049 add r13,rax
1050 mov rax,QWORD[56+rsi]
1051 mov r14,rdx
1052 adc r14,0
1054 mul rbx
1055 add r14,rax
1056 mov rax,QWORD[rsi]
1057 mov r15,rdx
1058 adc r15,0
1060 lea rbp,[8+rbp]
1061 lea rdi,[8+rdi]
1063 mov ecx,7
1064 jmp NEAR $L$oop_mul
1066 ALIGN 32
1067 $L$oop_mul:
1068 mov rbx,QWORD[rbp]
1069 mul rbx
1070 add r8,rax
1071 mov rax,QWORD[8+rsi]
1072 mov QWORD[rdi],r8
1073 mov r8,rdx
1074 adc r8,0
1076 mul rbx
1077 add r9,rax
1078 mov rax,QWORD[16+rsi]
1079 adc rdx,0
1080 add r8,r9
1081 mov r9,rdx
1082 adc r9,0
1084 mul rbx
1085 add r10,rax
1086 mov rax,QWORD[24+rsi]
1087 adc rdx,0
1088 add r9,r10
1089 mov r10,rdx
1090 adc r10,0
1092 mul rbx
1093 add r11,rax
1094 mov rax,QWORD[32+rsi]
1095 adc rdx,0
1096 add r10,r11
1097 mov r11,rdx
1098 adc r11,0
1100 mul rbx
1101 add r12,rax
1102 mov rax,QWORD[40+rsi]
1103 adc rdx,0
1104 add r11,r12
1105 mov r12,rdx
1106 adc r12,0
1108 mul rbx
1109 add r13,rax
1110 mov rax,QWORD[48+rsi]
1111 adc rdx,0
1112 add r12,r13
1113 mov r13,rdx
1114 adc r13,0
1116 mul rbx
1117 add r14,rax
1118 mov rax,QWORD[56+rsi]
1119 adc rdx,0
1120 add r13,r14
1121 mov r14,rdx
1122 lea rbp,[8+rbp]
1123 adc r14,0
1125 mul rbx
1126 add r15,rax
1127 mov rax,QWORD[rsi]
1128 adc rdx,0
1129 add r14,r15
1130 mov r15,rdx
1131 adc r15,0
1133 lea rdi,[8+rdi]
1135 dec ecx
1136 jnz NEAR $L$oop_mul
1138 mov QWORD[rdi],r8
1139 mov QWORD[8+rdi],r9
1140 mov QWORD[16+rdi],r10
1141 mov QWORD[24+rdi],r11
1142 mov QWORD[32+rdi],r12
1143 mov QWORD[40+rdi],r13
1144 mov QWORD[48+rdi],r14
1145 mov QWORD[56+rdi],r15
1147 DB 0F3h,0C3h ;repret
1149 global rsaz_512_scatter4
1151 ALIGN 16
1152 rsaz_512_scatter4:
1153 lea rcx,[r8*4+rcx]
1154 mov r9d,8
1155 jmp NEAR $L$oop_scatter
1156 ALIGN 16
1157 $L$oop_scatter:
1158 mov rax,QWORD[rdx]
1159 lea rdx,[8+rdx]
1160 mov DWORD[rcx],eax
1161 shr rax,32
1162 mov DWORD[64+rcx],eax
1163 lea rcx,[128+rcx]
1164 dec r9d
1165 jnz NEAR $L$oop_scatter
1166 DB 0F3h,0C3h ;repret
1169 global rsaz_512_gather4
1171 ALIGN 16
1172 rsaz_512_gather4:
1173 lea rdx,[r8*4+rdx]
1174 mov r9d,8
1175 jmp NEAR $L$oop_gather
1176 ALIGN 16
1177 $L$oop_gather:
1178 mov eax,DWORD[rdx]
1179 mov r8d,DWORD[64+rdx]
1180 lea rdx,[128+rdx]
1181 shl r8,32
1182 or rax,r8
1183 mov QWORD[rcx],rax
1184 lea rcx,[8+rcx]
1185 dec r9d
1186 jnz NEAR $L$oop_gather
1187 DB 0F3h,0C3h ;repret
1189 EXTERN __imp_RtlVirtualUnwind
1191 ALIGN 16
1192 se_handler:
1193 push rsi
1194 push rdi
1195 push rbx
1196 push rbp
1197 push r12
1198 push r13
1199 push r14
1200 push r15
1201 pushfq
1202 sub rsp,64
1204 mov rax,QWORD[120+r8]
1205 mov rbx,QWORD[248+r8]
1207 mov rsi,QWORD[8+r9]
1208 mov r11,QWORD[56+r9]
1210 mov r10d,DWORD[r11]
1211 lea r10,[r10*1+rsi]
1212 cmp rbx,r10
1213 jb NEAR $L$common_seh_tail
1215 mov rax,QWORD[152+r8]
1217 mov r10d,DWORD[4+r11]
1218 lea r10,[r10*1+rsi]
1219 cmp rbx,r10
1220 jae NEAR $L$common_seh_tail
1222 lea rax,[((128+24+48))+rax]
1224 mov rbx,QWORD[((-8))+rax]
1225 mov rbp,QWORD[((-16))+rax]
1226 mov r12,QWORD[((-24))+rax]
1227 mov r13,QWORD[((-32))+rax]
1228 mov r14,QWORD[((-40))+rax]
1229 mov r15,QWORD[((-48))+rax]
1230 mov QWORD[144+r8],rbx
1231 mov QWORD[160+r8],rbp
1232 mov QWORD[216+r8],r12
1233 mov QWORD[224+r8],r13
1234 mov QWORD[232+r8],r14
1235 mov QWORD[240+r8],r15
1237 $L$common_seh_tail:
1238 mov rdi,QWORD[8+rax]
1239 mov rsi,QWORD[16+rax]
1240 mov QWORD[152+r8],rax
1241 mov QWORD[168+r8],rsi
1242 mov QWORD[176+r8],rdi
1244 mov rdi,QWORD[40+r9]
1245 mov rsi,r8
1246 mov ecx,154
1247 DD 0xa548f3fc
1249 mov rsi,r9
1250 xor rcx,rcx
1251 mov rdx,QWORD[8+rsi]
1252 mov r8,QWORD[rsi]
1253 mov r9,QWORD[16+rsi]
1254 mov r10,QWORD[40+rsi]
1255 lea r11,[56+rsi]
1256 lea r12,[24+rsi]
1257 mov QWORD[32+rsp],r10
1258 mov QWORD[40+rsp],r11
1259 mov QWORD[48+rsp],r12
1260 mov QWORD[56+rsp],rcx
1261 call QWORD[__imp_RtlVirtualUnwind]
1263 mov eax,1
1264 add rsp,64
1265 popfq
1266 pop r15
1267 pop r14
1268 pop r13
1269 pop r12
1270 pop rbp
1271 pop rbx
1272 pop rdi
1273 pop rsi
1274 DB 0F3h,0C3h ;repret
1277 section .pdata rdata align=4
1278 ALIGN 4
1279 DD $L$SEH_begin_rsaz_512_sqr wrt ..imagebase
1280 DD $L$SEH_end_rsaz_512_sqr wrt ..imagebase
1281 DD $L$SEH_info_rsaz_512_sqr wrt ..imagebase
1283 DD $L$SEH_begin_rsaz_512_mul wrt ..imagebase
1284 DD $L$SEH_end_rsaz_512_mul wrt ..imagebase
1285 DD $L$SEH_info_rsaz_512_mul wrt ..imagebase
1287 DD $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase
1288 DD $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase
1289 DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase
1291 DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase
1292 DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase
1293 DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase
1295 DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase
1296 DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase
1297 DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase
1299 section .xdata rdata align=8
1300 ALIGN 8
1301 $L$SEH_info_rsaz_512_sqr:
1302 DB 9,0,0,0
1303 DD se_handler wrt ..imagebase
1304 DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
1305 $L$SEH_info_rsaz_512_mul:
1306 DB 9,0,0,0
1307 DD se_handler wrt ..imagebase
1308 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
1309 $L$SEH_info_rsaz_512_mul_gather4:
1310 DB 9,0,0,0
1311 DD se_handler wrt ..imagebase
1312 DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase
1313 $L$SEH_info_rsaz_512_mul_scatter4:
1314 DB 9,0,0,0
1315 DD se_handler wrt ..imagebase
1316 DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wrt ..imagebase
1317 $L$SEH_info_rsaz_512_mul_by_one:
1318 DB 9,0,0,0
1319 DD se_handler wrt ..imagebase
1320 DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase