1 #if defined(__x86_64__)
4 .extern OPENSSL_ia32cap_P
5 .hidden OPENSSL_ia32cap_P
7 .globl bn_mul_mont_gather5
8 .hidden bn_mul_mont_gather5
9 .type bn_mul_mont_gather5,@function
29 leaq (%rsp,%r11,8),%rsp
32 movq %rax,8(%rsp,%r9,8)
39 leaq .Lmagic_masks(%rip),%rax
41 leaq 96(%r12,%r11,8),%r12
42 movq 0(%rax,%r10,8),%xmm4
43 movq 8(%rax,%r10,8),%xmm5
44 movq 16(%rax,%r10,8),%xmm6
45 movq 24(%rax,%r10,8),%xmm7
60 .byte 102,72,15,126,195
103 movq (%rsi,%r15,8),%rax
108 movq %r13,-16(%rsp,%r15,8)
114 movq (%rcx,%r15,8),%rax
123 .byte 102,72,15,126,195
130 movq %r13,-16(%rsp,%r15,8)
137 movq %r13,-8(%rsp,%r9,8)
138 movq %rdx,(%rsp,%r9,8)
184 movq (%rsi,%r15,8),%rax
187 movq (%rsp,%r15,8),%r10
189 movq %r13,-16(%rsp,%r15,8)
195 movq (%rcx,%r15,8),%rax
206 .byte 102,72,15,126,195
212 movq (%rsp,%r15,8),%r10
214 movq %r13,-16(%rsp,%r15,8)
222 movq %r13,-8(%rsp,%r9,8)
223 movq %rdx,(%rsp,%r9,8)
235 .Lsub: sbbq (%rcx,%r14,8),%rax
236 movq %rax,(%rdi,%r14,8)
237 movq 8(%rsi,%r14,8),%rax
247 movq (%rsp,%r14,8),%rsi
248 movq (%rdi,%r14,8),%rcx
252 movq %r14,(%rsp,%r14,8)
253 movq %rsi,(%rdi,%r14,8)
258 movq 8(%rsp,%r9,8),%rsi
269 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
270 .type bn_mul4x_mont_gather5,@function
272 bn_mul4x_mont_gather5:
295 leaq -64(%rsp,%r9,2),%r11
301 leaq -64(%rsp,%r9,2),%rsp
306 leaq 4096-64(,%r9,2),%r10
307 leaq -64(%rsp,%r9,2),%rsp
332 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
334 .type mul4x_internal,@function
339 leaq 256(%rdx,%r9,1),%r13
345 leaq .Lmagic_masks(%rip),%rax
347 leaq 96(%rdx,%r11,8),%r12
348 movq 0(%rax,%r10,8),%xmm4
349 movq 8(%rax,%r10,8),%xmm5
351 movq 16(%rax,%r10,8),%xmm6
352 movq 24(%rax,%r10,8),%xmm7
377 .byte 102,72,15,126,195
384 leaq (%rsi,%r9,1),%rsi
404 leaq 64+8(%rsp,%r11,8),%r14
414 movq 8(%rsi,%r9,1),%rax
426 movq 16(%rsi,%r9,1),%rax
447 movq -8(%rsi,%r15,1),%rax
462 movq (%rsi,%r15,1),%rax
477 movq 8(%rsi,%r15,1),%rax
492 movq 16(%rsi,%r15,1),%rax
527 movq (%rsi,%r9,1),%rax
534 .byte 102,72,15,126,195
535 leaq (%rcx,%r9,2),%rcx
546 movq (%r14,%r9,1),%r10
569 leaq (%r14,%r9,1),%r14
575 movq 8(%rsi,%r9,1),%rax
589 movq 16(%rsi,%r9,1),%rax
611 movq -8(%rsi,%r15,1),%rax
628 movq (%rsi,%r15,1),%rax
645 movq 8(%rsi,%r15,1),%rax
662 movq 16(%rsi,%r15,1),%rax
702 movq (%rsi,%r9,1),%rax
709 .byte 102,72,15,126,195
711 leaq (%rcx,%r9,2),%rcx
726 leaq (%r14,%r9,1),%rbx
727 leaq (%rcx,%rdi,8),%rbp
732 .size mul4x_internal,.-mul4x_internal
735 .type bn_power5,@function
757 leaq -64(%rsp,%r9,2),%r11
763 leaq -64(%rsp,%r9,2),%rsp
768 leaq 4096-64(,%r9,2),%r10
769 leaq -64(%rsp,%r9,2),%rsp
791 .byte 102,72,15,110,207
792 .byte 102,72,15,110,209
793 .byte 102,73,15,110,218
794 .byte 102,72,15,110,226
796 call __bn_sqr8x_internal
797 call __bn_sqr8x_internal
798 call __bn_sqr8x_internal
799 call __bn_sqr8x_internal
800 call __bn_sqr8x_internal
802 .byte 102,72,15,126,209
803 .byte 102,72,15,126,226
821 .size bn_power5,.-bn_power5
823 .globl bn_sqr8x_internal
824 .hidden bn_sqr8x_internal
825 .hidden bn_sqr8x_internal
826 .type bn_sqr8x_internal,@function
904 leaq (%rsi,%r9,1),%rsi
909 movq -32(%rsi,%rbp,1),%r14
910 leaq 48+8(%rsp,%r9,2),%rdi
911 movq -24(%rsi,%rbp,1),%rax
912 leaq -32(%rdi,%rbp,1),%rdi
913 movq -16(%rsi,%rbp,1),%rbx
920 movq %r10,-24(%rdi,%rbp,1)
926 movq %r11,-16(%rdi,%rbp,1)
930 movq -8(%rsi,%rbp,1),%rbx
944 movq %r10,-8(%rdi,%rcx,1)
949 movq (%rsi,%rcx,1),%rbx
959 movq 8(%rsi,%rcx,1),%rbx
969 movq %r11,(%rdi,%rcx,1)
976 movq 16(%rsi,%rcx,1),%rbx
985 movq %r10,8(%rdi,%rcx,1)
992 movq 24(%rsi,%rcx,1),%rbx
1002 movq %r11,16(%rdi,%rcx,1)
1014 movq %r10,-8(%rdi,%rcx,1)
1033 movq -32(%rsi,%rbp,1),%r14
1034 leaq 48+8(%rsp,%r9,2),%rdi
1035 movq -24(%rsi,%rbp,1),%rax
1036 leaq -32(%rdi,%rbp,1),%rdi
1037 movq -16(%rsi,%rbp,1),%rbx
1041 movq -24(%rdi,%rbp,1),%r10
1045 movq %r10,-24(%rdi,%rbp,1)
1052 addq -16(%rdi,%rbp,1),%r11
1055 movq %r11,-16(%rdi,%rbp,1)
1059 movq -8(%rsi,%rbp,1),%rbx
1064 addq -8(%rdi,%rbp,1),%r12
1075 movq %r10,-8(%rdi,%rbp,1)
1082 movq (%rsi,%rcx,1),%rbx
1088 addq (%rdi,%rcx,1),%r13
1095 movq 8(%rsi,%rcx,1),%rbx
1103 movq %r11,(%rdi,%rcx,1)
1107 addq 8(%rdi,%rcx,1),%r12
1118 movq %r10,-8(%rdi,%rcx,1)
1139 leaq 48+8(%rsp,%r9,2),%rdi
1141 leaq -32(%rdi,%rbp,1),%rdi
1200 movq -16(%rsi,%rbp,1),%rax
1201 leaq 48+8(%rsp),%rdi
1205 leaq (%r14,%r10,2),%r12
1207 leaq (%rcx,%r11,2),%r13
1216 movq -8(%rsi,%rbp,1),%rax
1220 leaq (%r14,%r10,2),%rbx
1224 leaq (%rcx,%r11,2),%r8
1233 movq 0(%rsi,%rbp,1),%rax
1240 jmp .Lsqr4x_shift_n_add
1243 .Lsqr4x_shift_n_add:
1244 leaq (%r14,%r10,2),%r12
1246 leaq (%rcx,%r11,2),%r13
1255 movq -8(%rsi,%rbp,1),%rax
1259 leaq (%r14,%r10,2),%rbx
1263 leaq (%rcx,%r11,2),%r8
1272 movq 0(%rsi,%rbp,1),%rax
1276 leaq (%r14,%r10,2),%r12
1280 leaq (%rcx,%r11,2),%r13
1289 movq 8(%rsi,%rbp,1),%rax
1293 leaq (%r14,%r10,2),%rbx
1297 leaq (%rcx,%r11,2),%r8
1306 movq 16(%rsi,%rbp,1),%rax
1313 jnz .Lsqr4x_shift_n_add
1315 leaq (%r14,%r10,2),%r12
1318 leaq (%rcx,%r11,2),%r13
1331 leaq (%r14,%r10,2),%rbx
1335 leaq (%rcx,%r11,2),%r8
1344 .byte 102,72,15,126,213
1347 leaq (%rbp,%r9,2),%rcx
1348 leaq 48+8(%rsp,%r9,2),%rdx
1350 leaq 48+8(%rsp,%r9,1),%rdi
1353 jmp .L8x_reduction_loop
1356 .L8x_reduction_loop:
1357 leaq (%rdi,%r9,1),%rdi
1372 imulq 32+8(%rsp),%rbx
1390 movq %rbx,48-8+8(%rsp,%rcx,8)
1399 movq 32+8(%rsp),%rsi
1465 movq 48+56+8(%rsp),%rbx
1529 movq 48-16+8(%rsp,%rcx,8),%rbx
1545 movq 48+56+8(%rsp),%rbx
1580 .byte 102,72,15,126,213
1584 .byte 102,73,15,126,217
1594 jb .L8x_reduction_loop
1597 leaq (%rdi,%r9,1),%rbx
1601 .byte 102,72,15,126,207
1603 .byte 102,72,15,126,206
1604 leaq (%rbp,%rax,8),%rbp
1632 .size bn_sqr8x_internal,.-bn_sqr8x_internal
1633 .globl bn_from_montgomery
1634 .hidden bn_from_montgomery
1635 .type bn_from_montgomery,@function
1642 .size bn_from_montgomery,.-bn_from_montgomery
1644 .type bn_from_mont8x,@function
1668 leaq -64(%rsp,%r9,2),%r11
1674 leaq -64(%rsp,%r9,2),%rsp
1679 leaq 4096-64(,%r9,2),%r10
1680 leaq -64(%rsp,%r9,2),%rsp
1710 movdqu 16(%rsi),%xmm2
1711 movdqu 32(%rsi),%xmm3
1712 movdqa %xmm0,(%rax,%r9,1)
1713 movdqu 48(%rsi),%xmm4
1714 movdqa %xmm0,16(%rax,%r9,1)
1715 .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
1717 movdqa %xmm0,32(%rax,%r9,1)
1718 movdqa %xmm2,16(%rax)
1719 movdqa %xmm0,48(%rax,%r9,1)
1720 movdqa %xmm3,32(%rax)
1721 movdqa %xmm4,48(%rax)
1726 .byte 102,72,15,110,207
1727 .byte 102,72,15,110,209
1730 .byte 102,73,15,110,218
1731 call sqr8x_reduction
1736 jmp .Lfrom_mont_zero
1740 movdqa %xmm0,0(%rax)
1741 movdqa %xmm0,16(%rax)
1742 movdqa %xmm0,32(%rax)
1743 movdqa %xmm0,48(%rax)
1746 jnz .Lfrom_mont_zero
1758 .size bn_from_mont8x,.-bn_from_mont8x
1761 .type bn_scatter5,@function
1765 jz .Lscatter_epilogue
1766 leaq (%rdx,%rcx,8),%rdx
1776 .size bn_scatter5,.-bn_scatter5
1780 .type bn_gather5,@function
1787 leaq .Lmagic_masks(%rip),%rax
1789 leaq 128(%rdx,%r11,8),%rdx
1790 movq 0(%rax,%rcx,8),%xmm4
1791 movq 8(%rax,%rcx,8),%xmm5
1792 movq 16(%rax,%rcx,8),%xmm6
1793 movq 24(%rax,%rcx,8),%xmm7
1797 movq -128(%rdx),%xmm0
1798 movq -64(%rdx),%xmm1
1816 .LSEH_end_bn_gather5:
1817 .size bn_gather5,.-bn_gather5
1820 .long 0,0, 0,0, 0,0, -1,-1
1821 .long 0,0, 0,0, 0,0, 0,0
1822 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0