1 %ifidn __OUTPUT_FORMAT__
,obj
2 section code use32 class
=code
align=64
3 %elifidn __OUTPUT_FORMAT__
,win32
4 %ifdef __YASM_VERSION_ID__
5 %if __YASM_VERSION_ID__
< 01010000h
6 %error yasm version
1.1.0 or later needed.
8 ; Yasm automatically includes .00 and complains about redefining it.
9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
13 section .text code
align=64
17 ;extern _OPENSSL_ia32cap_P
18 global _bn_mul_add_words
21 L$
_bn_mul_add_words_begin:
22 lea eax,[_OPENSSL_ia32cap_P
]
24 jnc NEAR L
$000maw_non_sse2
27 mov ecx,DWORD [12+esp]
28 movd mm0
,DWORD [16+esp]
30 jmp NEAR L
$001maw_sse2_entry
32 L
$002maw_sse2_unrolled
:
37 movd mm4
,DWORD [4+edx]
39 movd mm6
,DWORD [8+edx]
41 movd mm7
,DWORD [12+edx]
44 movd mm3
,DWORD [4+eax]
46 movd mm5
,DWORD [8+eax]
48 movd mm4
,DWORD [12+eax]
51 movd mm2
,DWORD [16+edx]
54 movd mm4
,DWORD [20+edx]
57 movd mm6
,DWORD [24+edx]
59 movd
DWORD [4+eax],mm1
61 movd mm3
,DWORD [28+edx]
65 movd mm5
,DWORD [16+eax]
67 movd
DWORD [8+eax],mm1
70 movd mm5
,DWORD [20+eax]
72 movd
DWORD [12+eax],mm1
75 movd mm5
,DWORD [24+eax]
77 movd
DWORD [16+eax],mm1
80 movd mm5
,DWORD [28+eax]
82 movd
DWORD [20+eax],mm1
85 movd
DWORD [24+eax],mm1
88 movd
DWORD [28+eax],mm1
92 jz NEAR L
$003maw_sse2_exit
95 jnz NEAR L
$002maw_sse2_unrolled
108 jnz NEAR L
$004maw_sse2_loop
121 mov edi,DWORD [20+esp]
122 mov ecx,DWORD [28+esp]
123 mov ebx,DWORD [24+esp]
125 mov ebp,DWORD [32+esp]
127 jz NEAR L
$005maw_finish
140 mov eax,DWORD [4+ebx]
144 add eax,DWORD [4+edi]
146 mov DWORD [4+edi],eax
149 mov eax,DWORD [8+ebx]
153 add eax,DWORD [8+edi]
155 mov DWORD [8+edi],eax
158 mov eax,DWORD [12+ebx]
162 add eax,DWORD [12+edi]
164 mov DWORD [12+edi],eax
167 mov eax,DWORD [16+ebx]
171 add eax,DWORD [16+edi]
173 mov DWORD [16+edi],eax
176 mov eax,DWORD [20+ebx]
180 add eax,DWORD [20+edi]
182 mov DWORD [20+edi],eax
185 mov eax,DWORD [24+ebx]
189 add eax,DWORD [24+edi]
191 mov DWORD [24+edi],eax
194 mov eax,DWORD [28+ebx]
198 add eax,DWORD [28+edi]
200 mov DWORD [28+edi],eax
206 jnz NEAR L
$006maw_loop
208 mov ecx,DWORD [32+esp]
210 jnz NEAR L
$007maw_finish2
211 jmp NEAR L
$008maw_end
225 mov eax,DWORD [4+ebx]
229 add eax,DWORD [4+edi]
232 mov DWORD [4+edi],eax
236 mov eax,DWORD [8+ebx]
240 add eax,DWORD [8+edi]
243 mov DWORD [8+edi],eax
247 mov eax,DWORD [12+ebx]
251 add eax,DWORD [12+edi]
254 mov DWORD [12+edi],eax
258 mov eax,DWORD [16+ebx]
262 add eax,DWORD [16+edi]
265 mov DWORD [16+edi],eax
269 mov eax,DWORD [20+ebx]
273 add eax,DWORD [20+edi]
276 mov DWORD [20+edi],eax
280 mov eax,DWORD [24+ebx]
284 add eax,DWORD [24+edi]
286 mov DWORD [24+edi],eax
299 L$
_bn_mul_words_begin:
300 lea eax,[_OPENSSL_ia32cap_P
]
302 jnc NEAR L
$009mw_non_sse2
303 mov eax,DWORD [4+esp]
304 mov edx,DWORD [8+esp]
305 mov ecx,DWORD [12+esp]
306 movd mm0
,DWORD [16+esp]
318 jnz NEAR L
$010mw_sse2_loop
330 mov edi,DWORD [20+esp]
331 mov ebx,DWORD [24+esp]
332 mov ebp,DWORD [28+esp]
333 mov ecx,DWORD [32+esp]
335 jz NEAR L
$011mw_finish
345 mov eax,DWORD [4+ebx]
349 mov DWORD [4+edi],eax
352 mov eax,DWORD [8+ebx]
356 mov DWORD [8+edi],eax
359 mov eax,DWORD [12+ebx]
363 mov DWORD [12+edi],eax
366 mov eax,DWORD [16+ebx]
370 mov DWORD [16+edi],eax
373 mov eax,DWORD [20+ebx]
377 mov DWORD [20+edi],eax
380 mov eax,DWORD [24+ebx]
384 mov DWORD [24+edi],eax
387 mov eax,DWORD [28+ebx]
391 mov DWORD [28+edi],eax
397 jz NEAR L
$011mw_finish
398 jmp NEAR L
$012mw_loop
400 mov ebp,DWORD [28+esp]
402 jnz NEAR L
$013mw_finish2
415 mov eax,DWORD [4+ebx]
419 mov DWORD [4+edi],eax
424 mov eax,DWORD [8+ebx]
428 mov DWORD [8+edi],eax
433 mov eax,DWORD [12+ebx]
437 mov DWORD [12+edi],eax
442 mov eax,DWORD [16+ebx]
446 mov DWORD [16+edi],eax
451 mov eax,DWORD [20+ebx]
455 mov DWORD [20+edi],eax
460 mov eax,DWORD [24+ebx]
464 mov DWORD [24+edi],eax
476 L$
_bn_sqr_words_begin:
477 lea eax,[_OPENSSL_ia32cap_P
]
479 jnc NEAR L
$015sqr_non_sse2
480 mov eax,DWORD [4+esp]
481 mov edx,DWORD [8+esp]
482 mov ecx,DWORD [12+esp]
491 jnz NEAR L
$016sqr_sse2_loop
501 mov esi,DWORD [20+esp]
502 mov edi,DWORD [24+esp]
503 mov ebx,DWORD [28+esp]
505 jz NEAR L
$017sw_finish
511 mov DWORD [4+esi],edx
513 mov eax,DWORD [4+edi]
515 mov DWORD [8+esi],eax
516 mov DWORD [12+esi],edx
518 mov eax,DWORD [8+edi]
520 mov DWORD [16+esi],eax
521 mov DWORD [20+esi],edx
523 mov eax,DWORD [12+edi]
525 mov DWORD [24+esi],eax
526 mov DWORD [28+esi],edx
528 mov eax,DWORD [16+edi]
530 mov DWORD [32+esi],eax
531 mov DWORD [36+esi],edx
533 mov eax,DWORD [20+edi]
535 mov DWORD [40+esi],eax
536 mov DWORD [44+esi],edx
538 mov eax,DWORD [24+edi]
540 mov DWORD [48+esi],eax
541 mov DWORD [52+esi],edx
543 mov eax,DWORD [28+edi]
545 mov DWORD [56+esi],eax
546 mov DWORD [60+esi],edx
551 jnz NEAR L
$018sw_loop
553 mov ebx,DWORD [28+esp]
561 mov DWORD [4+esi],edx
564 mov eax,DWORD [4+edi]
566 mov DWORD [8+esi],eax
568 mov DWORD [12+esi],edx
571 mov eax,DWORD [8+edi]
573 mov DWORD [16+esi],eax
575 mov DWORD [20+esi],edx
578 mov eax,DWORD [12+edi]
580 mov DWORD [24+esi],eax
582 mov DWORD [28+esi],edx
585 mov eax,DWORD [16+edi]
587 mov DWORD [32+esi],eax
589 mov DWORD [36+esi],edx
592 mov eax,DWORD [20+edi]
594 mov DWORD [40+esi],eax
596 mov DWORD [44+esi],edx
599 mov eax,DWORD [24+edi]
601 mov DWORD [48+esi],eax
602 mov DWORD [52+esi],edx
612 L$
_bn_div_words_begin:
613 mov edx,DWORD [4+esp]
614 mov eax,DWORD [8+esp]
615 mov ecx,DWORD [12+esp]
621 L$
_bn_add_words_begin:
627 mov ebx,DWORD [20+esp]
628 mov esi,DWORD [24+esp]
629 mov edi,DWORD [28+esp]
630 mov ebp,DWORD [32+esp]
633 jz NEAR L
$020aw_finish
645 mov ecx,DWORD [4+esi]
646 mov edx,DWORD [4+edi]
652 mov DWORD [4+ebx],ecx
654 mov ecx,DWORD [8+esi]
655 mov edx,DWORD [8+edi]
661 mov DWORD [8+ebx],ecx
663 mov ecx,DWORD [12+esi]
664 mov edx,DWORD [12+edi]
670 mov DWORD [12+ebx],ecx
672 mov ecx,DWORD [16+esi]
673 mov edx,DWORD [16+edi]
679 mov DWORD [16+ebx],ecx
681 mov ecx,DWORD [20+esi]
682 mov edx,DWORD [20+edi]
688 mov DWORD [20+ebx],ecx
690 mov ecx,DWORD [24+esi]
691 mov edx,DWORD [24+edi]
697 mov DWORD [24+ebx],ecx
699 mov ecx,DWORD [28+esi]
700 mov edx,DWORD [28+edi]
706 mov DWORD [28+ebx],ecx
712 jnz NEAR L
$021aw_loop
714 mov ebp,DWORD [32+esp]
729 mov ecx,DWORD [4+esi]
730 mov edx,DWORD [4+edi]
737 mov DWORD [4+ebx],ecx
740 mov ecx,DWORD [8+esi]
741 mov edx,DWORD [8+edi]
748 mov DWORD [8+ebx],ecx
751 mov ecx,DWORD [12+esi]
752 mov edx,DWORD [12+edi]
759 mov DWORD [12+ebx],ecx
762 mov ecx,DWORD [16+esi]
763 mov edx,DWORD [16+edi]
770 mov DWORD [16+ebx],ecx
773 mov ecx,DWORD [20+esi]
774 mov edx,DWORD [20+edi]
781 mov DWORD [20+ebx],ecx
784 mov ecx,DWORD [24+esi]
785 mov edx,DWORD [24+edi]
791 mov DWORD [24+ebx],ecx
801 L$
_bn_sub_words_begin:
807 mov ebx,DWORD [20+esp]
808 mov esi,DWORD [24+esp]
809 mov edi,DWORD [28+esp]
810 mov ebp,DWORD [32+esp]
813 jz NEAR L
$023aw_finish
825 mov ecx,DWORD [4+esi]
826 mov edx,DWORD [4+edi]
832 mov DWORD [4+ebx],ecx
834 mov ecx,DWORD [8+esi]
835 mov edx,DWORD [8+edi]
841 mov DWORD [8+ebx],ecx
843 mov ecx,DWORD [12+esi]
844 mov edx,DWORD [12+edi]
850 mov DWORD [12+ebx],ecx
852 mov ecx,DWORD [16+esi]
853 mov edx,DWORD [16+edi]
859 mov DWORD [16+ebx],ecx
861 mov ecx,DWORD [20+esi]
862 mov edx,DWORD [20+edi]
868 mov DWORD [20+ebx],ecx
870 mov ecx,DWORD [24+esi]
871 mov edx,DWORD [24+edi]
877 mov DWORD [24+ebx],ecx
879 mov ecx,DWORD [28+esi]
880 mov edx,DWORD [28+edi]
886 mov DWORD [28+ebx],ecx
892 jnz NEAR L
$024aw_loop
894 mov ebp,DWORD [32+esp]
909 mov ecx,DWORD [4+esi]
910 mov edx,DWORD [4+edi]
917 mov DWORD [4+ebx],ecx
920 mov ecx,DWORD [8+esi]
921 mov edx,DWORD [8+edi]
928 mov DWORD [8+ebx],ecx
931 mov ecx,DWORD [12+esi]
932 mov edx,DWORD [12+edi]
939 mov DWORD [12+ebx],ecx
942 mov ecx,DWORD [16+esi]
943 mov edx,DWORD [16+edi]
950 mov DWORD [16+ebx],ecx
953 mov ecx,DWORD [20+esi]
954 mov edx,DWORD [20+edi]
961 mov DWORD [20+ebx],ecx
964 mov ecx,DWORD [24+esi]
965 mov edx,DWORD [24+edi]
971 mov DWORD [24+ebx],ecx
978 global _bn_sub_part_words
981 L$
_bn_sub_part_words_begin:
987 mov ebx,DWORD [20+esp]
988 mov esi,DWORD [24+esp]
989 mov edi,DWORD [28+esp]
990 mov ebp,DWORD [32+esp]
993 jz NEAR L
$026aw_finish
1005 mov ecx,DWORD [4+esi]
1006 mov edx,DWORD [4+edi]
1012 mov DWORD [4+ebx],ecx
1014 mov ecx,DWORD [8+esi]
1015 mov edx,DWORD [8+edi]
1021 mov DWORD [8+ebx],ecx
1023 mov ecx,DWORD [12+esi]
1024 mov edx,DWORD [12+edi]
1030 mov DWORD [12+ebx],ecx
1032 mov ecx,DWORD [16+esi]
1033 mov edx,DWORD [16+edi]
1039 mov DWORD [16+ebx],ecx
1041 mov ecx,DWORD [20+esi]
1042 mov edx,DWORD [20+edi]
1048 mov DWORD [20+ebx],ecx
1050 mov ecx,DWORD [24+esi]
1051 mov edx,DWORD [24+edi]
1057 mov DWORD [24+ebx],ecx
1059 mov ecx,DWORD [28+esi]
1060 mov edx,DWORD [28+edi]
1066 mov DWORD [28+ebx],ecx
1072 jnz NEAR L
$027aw_loop
1074 mov ebp,DWORD [32+esp]
1174 cmp DWORD [36+esp],0
1176 mov ebp,DWORD [36+esp]
1179 jge NEAR L
$030pw_pos
1185 jz NEAR L
$031pw_neg_finish
1198 mov edx,DWORD [4+edi]
1204 mov DWORD [4+ebx],ecx
1207 mov edx,DWORD [8+edi]
1213 mov DWORD [8+ebx],ecx
1216 mov edx,DWORD [12+edi]
1222 mov DWORD [12+ebx],ecx
1225 mov edx,DWORD [16+edi]
1231 mov DWORD [16+ebx],ecx
1234 mov edx,DWORD [20+edi]
1240 mov DWORD [20+ebx],ecx
1243 mov edx,DWORD [24+edi]
1249 mov DWORD [24+ebx],ecx
1252 mov edx,DWORD [28+edi]
1258 mov DWORD [28+ebx],ecx
1263 jnz NEAR L
$032pw_neg_loop
1265 mov edx,DWORD [36+esp]
1283 mov edx,DWORD [4+edi]
1290 mov DWORD [4+ebx],ecx
1294 mov edx,DWORD [8+edi]
1301 mov DWORD [8+ebx],ecx
1305 mov edx,DWORD [12+edi]
1312 mov DWORD [12+ebx],ecx
1316 mov edx,DWORD [16+edi]
1323 mov DWORD [16+ebx],ecx
1327 mov edx,DWORD [20+edi]
1334 mov DWORD [20+ebx],ecx
1338 mov edx,DWORD [24+edi]
1344 mov DWORD [24+ebx],ecx
1345 jmp NEAR L
$029pw_end
1348 jz NEAR L
$033pw_pos_finish
1354 jnc NEAR L
$035pw_nc0
1356 mov ecx,DWORD [4+esi]
1358 mov DWORD [4+ebx],ecx
1359 jnc NEAR L
$036pw_nc1
1361 mov ecx,DWORD [8+esi]
1363 mov DWORD [8+ebx],ecx
1364 jnc NEAR L
$037pw_nc2
1366 mov ecx,DWORD [12+esi]
1368 mov DWORD [12+ebx],ecx
1369 jnc NEAR L
$038pw_nc3
1371 mov ecx,DWORD [16+esi]
1373 mov DWORD [16+ebx],ecx
1374 jnc NEAR L
$039pw_nc4
1376 mov ecx,DWORD [20+esi]
1378 mov DWORD [20+ebx],ecx
1379 jnc NEAR L
$040pw_nc5
1381 mov ecx,DWORD [24+esi]
1383 mov DWORD [24+ebx],ecx
1384 jnc NEAR L
$041pw_nc6
1386 mov ecx,DWORD [28+esi]
1388 mov DWORD [28+ebx],ecx
1389 jnc NEAR L
$042pw_nc7
1394 jnz NEAR L
$034pw_pos_loop
1396 mov ebp,DWORD [36+esp]
1403 jnc NEAR L
$043pw_tail_nc0
1407 mov ecx,DWORD [4+esi]
1409 mov DWORD [4+ebx],ecx
1410 jnc NEAR L
$044pw_tail_nc1
1414 mov ecx,DWORD [8+esi]
1416 mov DWORD [8+ebx],ecx
1417 jnc NEAR L
$045pw_tail_nc2
1421 mov ecx,DWORD [12+esi]
1423 mov DWORD [12+ebx],ecx
1424 jnc NEAR L
$046pw_tail_nc3
1428 mov ecx,DWORD [16+esi]
1430 mov DWORD [16+ebx],ecx
1431 jnc NEAR L
$047pw_tail_nc4
1435 mov ecx,DWORD [20+esi]
1437 mov DWORD [20+ebx],ecx
1438 jnc NEAR L
$048pw_tail_nc5
1442 mov ecx,DWORD [24+esi]
1444 mov DWORD [24+ebx],ecx
1445 jnc NEAR L
$049pw_tail_nc6
1447 jmp NEAR L
$029pw_end
1452 mov ecx,DWORD [4+esi]
1453 mov DWORD [4+ebx],ecx
1455 mov ecx,DWORD [8+esi]
1456 mov DWORD [8+ebx],ecx
1458 mov ecx,DWORD [12+esi]
1459 mov DWORD [12+ebx],ecx
1461 mov ecx,DWORD [16+esi]
1462 mov DWORD [16+ebx],ecx
1464 mov ecx,DWORD [20+esi]
1465 mov DWORD [20+ebx],ecx
1467 mov ecx,DWORD [24+esi]
1468 mov DWORD [24+ebx],ecx
1470 mov ecx,DWORD [28+esi]
1471 mov DWORD [28+ebx],ecx
1477 jnz NEAR L
$050pw_nc_loop
1478 mov ebp,DWORD [36+esp]
1480 jz NEAR L
$051pw_nc_end
1485 jz NEAR L
$051pw_nc_end
1486 mov ecx,DWORD [4+esi]
1487 mov DWORD [4+ebx],ecx
1490 jz NEAR L
$051pw_nc_end
1491 mov ecx,DWORD [8+esi]
1492 mov DWORD [8+ebx],ecx
1495 jz NEAR L
$051pw_nc_end
1496 mov ecx,DWORD [12+esi]
1497 mov DWORD [12+ebx],ecx
1500 jz NEAR L
$051pw_nc_end
1501 mov ecx,DWORD [16+esi]
1502 mov DWORD [16+ebx],ecx
1505 jz NEAR L
$051pw_nc_end
1506 mov ecx,DWORD [20+esi]
1507 mov DWORD [20+ebx],ecx
1510 jz NEAR L
$051pw_nc_end
1511 mov ecx,DWORD [24+esi]
1512 mov DWORD [24+ebx],ecx
1523 common _OPENSSL_ia32cap_P
16