1 %ifidn __OUTPUT_FORMAT__
,obj
2 section code use32 class
=code
align=64
3 %elifidn __OUTPUT_FORMAT__
,win32
4 %ifdef __YASM_VERSION_ID__
5 %if __YASM_VERSION_ID__
< 01010000h
6 %error yasm version
1.1.0 or later needed.
8 ; Yasm automatically includes .00 and complains about redefining it.
9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
13 section .text code
align=64
17 global _gcm_gmult_4bit_x86
20 L$
_gcm_gmult_4bit_x86_begin:
26 mov edi,DWORD [104+esp]
27 mov esi,DWORD [108+esp]
31 mov ebx,DWORD [12+edi]
33 mov DWORD [20+esp],471859200
34 mov DWORD [24+esp],943718400
35 mov DWORD [28+esp],610271232
36 mov DWORD [32+esp],1887436800
37 mov DWORD [36+esp],1822425088
38 mov DWORD [40+esp],1220542464
39 mov DWORD [44+esp],1423966208
40 mov DWORD [48+esp],3774873600
41 mov DWORD [52+esp],4246732800
42 mov DWORD [56+esp],3644850176
43 mov DWORD [60+esp],3311403008
44 mov DWORD [64+esp],2441084928
45 mov DWORD [68+esp],2376073216
46 mov DWORD [72+esp],2847932416
47 mov DWORD [76+esp],3051356160
51 mov DWORD [12+esp],ebx
54 mov ebp,DWORD [4+ebx*1+esi]
55 mov edx,DWORD [ebx*1+esi]
56 mov ecx,DWORD [12+ebx*1+esi]
57 mov ebx,DWORD [8+ebx*1+esi]
60 jmp NEAR L
$000x86_loop
69 xor ebp,DWORD [16+eax*4+esp]
70 mov al,BYTE [edi*1+esp]
72 xor ebx,DWORD [8+eax*1+esi]
73 xor ecx,DWORD [12+eax*1+esi]
74 xor edx,DWORD [eax*1+esi]
75 xor ebp,DWORD [4+eax*1+esi]
77 js NEAR L
$001x86_break
84 xor ebp,DWORD [16+eax*4+esp]
85 mov al,BYTE [edi*1+esp]
87 xor ebx,DWORD [8+eax*1+esi]
88 xor ecx,DWORD [12+eax*1+esi]
89 xor edx,DWORD [eax*1+esi]
90 xor ebp,DWORD [4+eax*1+esi]
91 jmp NEAR L
$000x86_loop
98 mov edi,DWORD [104+esp]
99 mov DWORD [12+edi],ebx
100 mov DWORD [8+edi],ecx
101 mov DWORD [4+edi],edx
109 global _gcm_ghash_4bit_x86
112 L$
_gcm_ghash_4bit_x86_begin:
118 mov ebx,DWORD [104+esp]
119 mov esi,DWORD [108+esp]
120 mov edi,DWORD [112+esp]
121 mov ecx,DWORD [116+esp]
123 mov DWORD [116+esp],ecx
125 mov edx,DWORD [4+ebx]
126 mov ecx,DWORD [8+ebx]
127 mov ebx,DWORD [12+ebx]
129 mov DWORD [20+esp],471859200
130 mov DWORD [24+esp],943718400
131 mov DWORD [28+esp],610271232
132 mov DWORD [32+esp],1887436800
133 mov DWORD [36+esp],1822425088
134 mov DWORD [40+esp],1220542464
135 mov DWORD [44+esp],1423966208
136 mov DWORD [48+esp],3774873600
137 mov DWORD [52+esp],4246732800
138 mov DWORD [56+esp],3644850176
139 mov DWORD [60+esp],3311403008
140 mov DWORD [64+esp],2441084928
141 mov DWORD [68+esp],2376073216
142 mov DWORD [72+esp],2847932416
143 mov DWORD [76+esp],3051356160
146 xor ebx,DWORD [12+edi]
147 xor ecx,DWORD [8+edi]
148 xor edx,DWORD [4+edi]
150 mov DWORD [12+esp],ebx
151 mov DWORD [8+esp],ecx
152 mov DWORD [4+esp],edx
156 mov ebp,DWORD [4+ebx*1+esi]
157 mov edx,DWORD [ebx*1+esi]
158 mov ecx,DWORD [12+ebx*1+esi]
159 mov ebx,DWORD [8+ebx*1+esi]
162 jmp NEAR L
$003x86_loop
171 xor ebp,DWORD [16+eax*4+esp]
172 mov al,BYTE [edi*1+esp]
174 xor ebx,DWORD [8+eax*1+esi]
175 xor ecx,DWORD [12+eax*1+esi]
176 xor edx,DWORD [eax*1+esi]
177 xor ebp,DWORD [4+eax*1+esi]
179 js NEAR L
$004x86_break
186 xor ebp,DWORD [16+eax*4+esp]
187 mov al,BYTE [edi*1+esp]
189 xor ebx,DWORD [8+eax*1+esi]
190 xor ecx,DWORD [12+eax*1+esi]
191 xor edx,DWORD [eax*1+esi]
192 xor ebp,DWORD [4+eax*1+esi]
193 jmp NEAR L
$003x86_loop
200 mov edi,DWORD [112+esp]
202 cmp edi,DWORD [116+esp]
203 mov DWORD [112+esp],edi
204 jb NEAR L
$002x86_outer_loop
205 mov edi,DWORD [104+esp]
206 mov DWORD [12+edi],ebx
207 mov DWORD [8+edi],ecx
208 mov DWORD [4+edi],edx
216 global _gcm_gmult_4bit_mmx
219 L$
_gcm_gmult_4bit_mmx_begin:
224 mov edi,DWORD [20+esp]
225 mov esi,DWORD [24+esp]
229 lea eax,[(L$rem_4bit
-L
$005pic_point
)+eax]
230 movzx ebx,BYTE [15+edi]
237 movq mm0
,[8+ecx*1+esi]
240 jmp NEAR L
$006mmx_loop
247 pxor mm0
,[8+edx*1+esi]
248 mov cl,BYTE [ebp*1+edi]
256 js NEAR L
$007mmx_break
263 pxor mm0
,[8+ecx*1+esi]
269 jmp NEAR L
$006mmx_loop
278 pxor mm0
,[8+ecx*1+esi]
288 pxor mm0
,[8+edx*1+esi]
304 mov DWORD [12+edi],ebx
305 mov DWORD [4+edi],edx
306 mov DWORD [8+edi],ecx
313 global _gcm_ghash_4bit_mmx
316 L$
_gcm_ghash_4bit_mmx_begin:
321 mov eax,DWORD [20+esp]
322 mov ebx,DWORD [24+esp]
323 mov ecx,DWORD [28+esp]
324 mov edx,DWORD [32+esp]
329 lea esi,[(L$rem_8bit
-L
$008pic_point
)+esi]
334 mov DWORD [544+esp],eax
335 mov DWORD [552+esp],edx
336 mov DWORD [556+esp],ebp
340 mov edx,DWORD [ebx-120]
345 mov edx,DWORD [ebx-104]
355 mov edx,DWORD [ebx-88]
369 mov edx,DWORD [ebx-72]
383 mov edx,DWORD [ebx-56]
397 mov edx,DWORD [ebx-40]
411 mov edx,DWORD [ebx-24]
425 mov edx,DWORD [ebx-8]
439 mov edx,DWORD [8+ebx]
453 mov edx,DWORD [24+ebx]
467 mov edx,DWORD [40+ebx]
481 mov edx,DWORD [56+ebx]
495 mov edx,DWORD [72+ebx]
509 mov edx,DWORD [88+ebx]
523 mov edx,DWORD [104+ebx]
537 mov edx,DWORD [120+ebx]
565 mov ebx,DWORD [8+eax]
566 mov edx,DWORD [12+eax]
569 xor edx,DWORD [12+ecx]
570 xor ebx,DWORD [8+ecx]
573 mov DWORD [536+esp],ebx
575 mov DWORD [548+esp],ecx
586 movq mm7
,[16+eax*8+esp]
587 movq mm6
,[144+eax*8+esp]
594 pxor mm7
,[272+ebp*8+esp]
598 pxor mm7
,[16+eax*8+esp]
600 pxor mm6
,[144+eax*8+esp]
602 pxor mm6
,[400+ebp*8+esp]
603 xor bl,BYTE [ebp*1+esp]
611 pxor mm7
,[272+edi*8+esp]
615 pinsrw mm2
,WORD [ebx*2+esi],2
616 pxor mm7
,[16+eax*8+esp]
618 pxor mm6
,[144+eax*8+esp]
620 pxor mm6
,[400+edi*8+esp]
621 xor cl,BYTE [edi*1+esp]
623 mov edx,DWORD [536+esp]
630 pxor mm7
,[272+ebp*8+esp]
635 pinsrw mm1
,WORD [ecx*2+esi],2
636 pxor mm7
,[16+eax*8+esp]
638 pxor mm6
,[144+eax*8+esp]
640 pxor mm6
,[400+ebp*8+esp]
641 xor bl,BYTE [ebp*1+esp]
649 pxor mm7
,[272+edi*8+esp]
654 pinsrw mm0
,WORD [ebx*2+esi],2
655 pxor mm7
,[16+eax*8+esp]
657 pxor mm6
,[144+eax*8+esp]
659 pxor mm6
,[400+edi*8+esp]
660 xor cl,BYTE [edi*1+esp]
668 pxor mm7
,[272+ebp*8+esp]
673 pinsrw mm2
,WORD [ecx*2+esi],2
674 pxor mm7
,[16+eax*8+esp]
676 pxor mm6
,[144+eax*8+esp]
678 pxor mm6
,[400+ebp*8+esp]
679 xor bl,BYTE [ebp*1+esp]
687 pxor mm7
,[272+edi*8+esp]
692 pinsrw mm1
,WORD [ebx*2+esi],2
693 pxor mm7
,[16+eax*8+esp]
695 pxor mm6
,[144+eax*8+esp]
697 pxor mm6
,[400+edi*8+esp]
698 xor cl,BYTE [edi*1+esp]
700 mov edx,DWORD [532+esp]
707 pxor mm7
,[272+ebp*8+esp]
712 pinsrw mm0
,WORD [ecx*2+esi],2
713 pxor mm7
,[16+eax*8+esp]
715 pxor mm6
,[144+eax*8+esp]
717 pxor mm6
,[400+ebp*8+esp]
718 xor bl,BYTE [ebp*1+esp]
726 pxor mm7
,[272+edi*8+esp]
731 pinsrw mm2
,WORD [ebx*2+esi],2
732 pxor mm7
,[16+eax*8+esp]
734 pxor mm6
,[144+eax*8+esp]
736 pxor mm6
,[400+edi*8+esp]
737 xor cl,BYTE [edi*1+esp]
745 pxor mm7
,[272+ebp*8+esp]
750 pinsrw mm1
,WORD [ecx*2+esi],2
751 pxor mm7
,[16+eax*8+esp]
753 pxor mm6
,[144+eax*8+esp]
755 pxor mm6
,[400+ebp*8+esp]
756 xor bl,BYTE [ebp*1+esp]
764 pxor mm7
,[272+edi*8+esp]
769 pinsrw mm0
,WORD [ebx*2+esi],2
770 pxor mm7
,[16+eax*8+esp]
772 pxor mm6
,[144+eax*8+esp]
774 pxor mm6
,[400+edi*8+esp]
775 xor cl,BYTE [edi*1+esp]
777 mov edx,DWORD [528+esp]
784 pxor mm7
,[272+ebp*8+esp]
789 pinsrw mm2
,WORD [ecx*2+esi],2
790 pxor mm7
,[16+eax*8+esp]
792 pxor mm6
,[144+eax*8+esp]
794 pxor mm6
,[400+ebp*8+esp]
795 xor bl,BYTE [ebp*1+esp]
803 pxor mm7
,[272+edi*8+esp]
808 pinsrw mm1
,WORD [ebx*2+esi],2
809 pxor mm7
,[16+eax*8+esp]
811 pxor mm6
,[144+eax*8+esp]
813 pxor mm6
,[400+edi*8+esp]
814 xor cl,BYTE [edi*1+esp]
822 pxor mm7
,[272+ebp*8+esp]
827 pinsrw mm0
,WORD [ecx*2+esi],2
828 pxor mm7
,[16+eax*8+esp]
830 pxor mm6
,[144+eax*8+esp]
832 pxor mm6
,[400+ebp*8+esp]
833 xor bl,BYTE [ebp*1+esp]
841 pxor mm7
,[272+edi*8+esp]
846 pinsrw mm2
,WORD [ebx*2+esi],2
847 pxor mm7
,[16+eax*8+esp]
849 pxor mm6
,[144+eax*8+esp]
851 pxor mm6
,[400+edi*8+esp]
852 xor cl,BYTE [edi*1+esp]
854 mov edx,DWORD [524+esp]
861 pxor mm7
,[272+ebp*8+esp]
866 pinsrw mm1
,WORD [ecx*2+esi],2
867 pxor mm7
,[16+eax*8+esp]
868 pxor mm6
,[144+eax*8+esp]
869 xor bl,BYTE [ebp*1+esp]
871 pxor mm6
,[400+ebp*8+esp]
880 pxor mm7
,[16+edi*8+esp]
884 pxor mm6
,[144+edi*8+esp]
885 pinsrw mm0
,WORD [ebx*2+esi],2
888 pinsrw mm2
,WORD [ecx*2+esi],3
893 mov ecx,DWORD [548+esp]
902 cmp ecx,DWORD [552+esp]
904 mov eax,DWORD [544+esp]
905 mov DWORD [12+eax],edx
906 mov DWORD [8+eax],ebx
908 mov esp,DWORD [556+esp]
915 global _gcm_init_clmul
918 L$
_gcm_init_clmul_begin:
919 mov edx,DWORD [4+esp]
920 mov eax,DWORD [8+esp]
924 lea ecx,[(L
$bswap
-L
$010pic
)+ecx]
943 db 102,15,58,68,194,0
944 db 102,15,58,68,202,17
945 db 102,15,58,68,220,0
979 db 102,15,58,15,227,8
982 global _gcm_gmult_clmul
985 L$
_gcm_gmult_clmul_begin:
986 mov eax,DWORD [4+esp]
987 mov edx,DWORD [8+esp]
991 lea ecx,[(L
$bswap
-L
$011pic
)+ecx]
1000 db 102,15,58,68,194,0
1001 db 102,15,58,68,202,17
1002 db 102,15,58,68,220,0
1033 global _gcm_ghash_clmul
1036 L$
_gcm_ghash_clmul_begin:
1041 mov eax,DWORD [20+esp]
1042 mov edx,DWORD [24+esp]
1043 mov esi,DWORD [28+esp]
1044 mov ebx,DWORD [32+esp]
1048 lea ecx,[(L
$bswap
-L
$012pic
)+ecx]
1054 jz NEAR L
$013odd_tail
1056 movdqu xmm6
,[16+esi]
1059 movdqu xmm5
,[32+edx]
1065 db 102,15,58,68,242,0
1066 db 102,15,58,68,250,17
1067 db 102,15,58,68,221,0
1068 movups xmm2
,[16+edx]
1071 jbe NEAR L
$014even_tail
1072 jmp NEAR L
$015mod_loop
1079 db 102,15,58,68,194,0
1080 db 102,15,58,68,202,17
1081 db 102,15,58,68,229,16
1088 movdqu xmm6
,[16+esi]
1106 db 102,15,58,68,242,0
1107 movups xmm5
,[32+edx]
1119 db 102,15,58,68,250,17
1120 movups xmm2
,[16+edx]
1126 db 102,15,58,68,221,0
1129 ja NEAR L
$015mod_loop
1134 db 102,15,58,68,194,0
1135 db 102,15,58,68,202,17
1136 db 102,15,58,68,229,16
1180 db 102,15,58,68,194,0
1181 db 102,15,58,68,202,17
1182 db 102,15,58,68,220,0
1220 db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1221 db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1224 dw 0,450,900,582,1800,1738,1164,1358
1225 dw 3600,4050,3476,3158,2328,2266,2716,2910
1226 dw 7200,7650,8100,7782,6952,6890,6316,6510
1227 dw 4656,5106,4532,4214,5432,5370,5820,6014
1228 dw 14400,14722,15300,14854,16200,16010,15564,15630
1229 dw 13904,14226,13780,13334,12632,12442,13020,13086
1230 dw 9312,9634,10212,9766,9064,8874,8428,8494
1231 dw 10864,11186,10740,10294,11640,11450,12028,12094
1232 dw 28800,28994,29444,29382,30600,30282,29708,30158
1233 dw 32400,32594,32020,31958,31128,30810,31260,31710
1234 dw 27808,28002,28452,28390,27560,27242,26668,27118
1235 dw 25264,25458,24884,24822,26040,25722,26172,26622
1236 dw 18624,18690,19268,19078,20424,19978,19532,19854
1237 dw 18128,18194,17748,17558,16856,16410,16988,17310
1238 dw 21728,21794,22372,22182,21480,21034,20588,20910
1239 dw 23280,23346,22900,22710,24056,23610,24188,24510
1240 dw 57600,57538,57988,58182,58888,59338,58764,58446
1241 dw 61200,61138,60564,60758,59416,59866,60316,59998
1242 dw 64800,64738,65188,65382,64040,64490,63916,63598
1243 dw 62256,62194,61620,61814,62520,62970,63420,63102
1244 dw 55616,55426,56004,56070,56904,57226,56780,56334
1245 dw 55120,54930,54484,54550,53336,53658,54236,53790
1246 dw 50528,50338,50916,50982,49768,50090,49644,49198
1247 dw 52080,51890,51444,51510,52344,52666,53244,52798
1248 dw 37248,36930,37380,37830,38536,38730,38156,38094
1249 dw 40848,40530,39956,40406,39064,39258,39708,39646
1250 dw 36256,35938,36388,36838,35496,35690,35116,35054
1251 dw 33712,33394,32820,33270,33976,34170,34620,34558
1252 dw 43456,43010,43588,43910,44744,44810,44364,44174
1253 dw 42960,42514,42068,42390,41176,41242,41820,41630
1254 dw 46560,46114,46692,47014,45800,45866,45420,45230
1255 dw 48112,47666,47220,47542,48376,48442,49020,48830
1258 dd 0,0,0,471859200,0,943718400,0,610271232
1259 dd 0,1887436800,0,1822425088,0,1220542464,0,1423966208
1260 dd 0,3774873600,0,4246732800,0,3644850176,0,3311403008
1261 dd 0,2441084928,0,2376073216,0,2847932416,0,3051356160
1262 db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1263 db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1264 db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62