Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / boringssl / win-x86 / crypto / modes / ghash-x86.asm
blobeb493aca63675b2d8f35096cc8e6589c1b6eb604
1 %ifidn __OUTPUT_FORMAT__,obj
2 section code use32 class=code align=64
3 %elifidn __OUTPUT_FORMAT__,win32
4 %ifdef __YASM_VERSION_ID__
5 %if __YASM_VERSION_ID__ < 01010000h
6 %error yasm version 1.1.0 or later needed.
7 %endif
8 ; Yasm automatically includes .00 and complains about redefining it.
9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
10 %else
11 $@feat.00 equ 1
12 %endif
13 section .text code align=64
14 %else
15 section .text code
16 %endif
17 global _gcm_gmult_4bit_x86
18 align 16
19 _gcm_gmult_4bit_x86:
20 L$_gcm_gmult_4bit_x86_begin:
21 push ebp
22 push ebx
23 push esi
24 push edi
25 sub esp,84
26 mov edi,DWORD [104+esp]
27 mov esi,DWORD [108+esp]
28 mov ebp,DWORD [edi]
29 mov edx,DWORD [4+edi]
30 mov ecx,DWORD [8+edi]
31 mov ebx,DWORD [12+edi]
32 mov DWORD [16+esp],0
33 mov DWORD [20+esp],471859200
34 mov DWORD [24+esp],943718400
35 mov DWORD [28+esp],610271232
36 mov DWORD [32+esp],1887436800
37 mov DWORD [36+esp],1822425088
38 mov DWORD [40+esp],1220542464
39 mov DWORD [44+esp],1423966208
40 mov DWORD [48+esp],3774873600
41 mov DWORD [52+esp],4246732800
42 mov DWORD [56+esp],3644850176
43 mov DWORD [60+esp],3311403008
44 mov DWORD [64+esp],2441084928
45 mov DWORD [68+esp],2376073216
46 mov DWORD [72+esp],2847932416
47 mov DWORD [76+esp],3051356160
48 mov DWORD [esp],ebp
49 mov DWORD [4+esp],edx
50 mov DWORD [8+esp],ecx
51 mov DWORD [12+esp],ebx
52 shr ebx,20
53 and ebx,240
54 mov ebp,DWORD [4+ebx*1+esi]
55 mov edx,DWORD [ebx*1+esi]
56 mov ecx,DWORD [12+ebx*1+esi]
57 mov ebx,DWORD [8+ebx*1+esi]
58 xor eax,eax
59 mov edi,15
60 jmp NEAR L$000x86_loop
61 align 16
62 L$000x86_loop:
63 mov al,bl
64 shrd ebx,ecx,4
65 and al,15
66 shrd ecx,edx,4
67 shrd edx,ebp,4
68 shr ebp,4
69 xor ebp,DWORD [16+eax*4+esp]
70 mov al,BYTE [edi*1+esp]
71 and al,240
72 xor ebx,DWORD [8+eax*1+esi]
73 xor ecx,DWORD [12+eax*1+esi]
74 xor edx,DWORD [eax*1+esi]
75 xor ebp,DWORD [4+eax*1+esi]
76 dec edi
77 js NEAR L$001x86_break
78 mov al,bl
79 shrd ebx,ecx,4
80 and al,15
81 shrd ecx,edx,4
82 shrd edx,ebp,4
83 shr ebp,4
84 xor ebp,DWORD [16+eax*4+esp]
85 mov al,BYTE [edi*1+esp]
86 shl al,4
87 xor ebx,DWORD [8+eax*1+esi]
88 xor ecx,DWORD [12+eax*1+esi]
89 xor edx,DWORD [eax*1+esi]
90 xor ebp,DWORD [4+eax*1+esi]
91 jmp NEAR L$000x86_loop
92 align 16
93 L$001x86_break:
94 bswap ebx
95 bswap ecx
96 bswap edx
97 bswap ebp
98 mov edi,DWORD [104+esp]
99 mov DWORD [12+edi],ebx
100 mov DWORD [8+edi],ecx
101 mov DWORD [4+edi],edx
102 mov DWORD [edi],ebp
103 add esp,84
104 pop edi
105 pop esi
106 pop ebx
107 pop ebp
109 global _gcm_ghash_4bit_x86
110 align 16
111 _gcm_ghash_4bit_x86:
112 L$_gcm_ghash_4bit_x86_begin:
113 push ebp
114 push ebx
115 push esi
116 push edi
117 sub esp,84
118 mov ebx,DWORD [104+esp]
119 mov esi,DWORD [108+esp]
120 mov edi,DWORD [112+esp]
121 mov ecx,DWORD [116+esp]
122 add ecx,edi
123 mov DWORD [116+esp],ecx
124 mov ebp,DWORD [ebx]
125 mov edx,DWORD [4+ebx]
126 mov ecx,DWORD [8+ebx]
127 mov ebx,DWORD [12+ebx]
128 mov DWORD [16+esp],0
129 mov DWORD [20+esp],471859200
130 mov DWORD [24+esp],943718400
131 mov DWORD [28+esp],610271232
132 mov DWORD [32+esp],1887436800
133 mov DWORD [36+esp],1822425088
134 mov DWORD [40+esp],1220542464
135 mov DWORD [44+esp],1423966208
136 mov DWORD [48+esp],3774873600
137 mov DWORD [52+esp],4246732800
138 mov DWORD [56+esp],3644850176
139 mov DWORD [60+esp],3311403008
140 mov DWORD [64+esp],2441084928
141 mov DWORD [68+esp],2376073216
142 mov DWORD [72+esp],2847932416
143 mov DWORD [76+esp],3051356160
144 align 16
145 L$002x86_outer_loop:
146 xor ebx,DWORD [12+edi]
147 xor ecx,DWORD [8+edi]
148 xor edx,DWORD [4+edi]
149 xor ebp,DWORD [edi]
150 mov DWORD [12+esp],ebx
151 mov DWORD [8+esp],ecx
152 mov DWORD [4+esp],edx
153 mov DWORD [esp],ebp
154 shr ebx,20
155 and ebx,240
156 mov ebp,DWORD [4+ebx*1+esi]
157 mov edx,DWORD [ebx*1+esi]
158 mov ecx,DWORD [12+ebx*1+esi]
159 mov ebx,DWORD [8+ebx*1+esi]
160 xor eax,eax
161 mov edi,15
162 jmp NEAR L$003x86_loop
163 align 16
164 L$003x86_loop:
165 mov al,bl
166 shrd ebx,ecx,4
167 and al,15
168 shrd ecx,edx,4
169 shrd edx,ebp,4
170 shr ebp,4
171 xor ebp,DWORD [16+eax*4+esp]
172 mov al,BYTE [edi*1+esp]
173 and al,240
174 xor ebx,DWORD [8+eax*1+esi]
175 xor ecx,DWORD [12+eax*1+esi]
176 xor edx,DWORD [eax*1+esi]
177 xor ebp,DWORD [4+eax*1+esi]
178 dec edi
179 js NEAR L$004x86_break
180 mov al,bl
181 shrd ebx,ecx,4
182 and al,15
183 shrd ecx,edx,4
184 shrd edx,ebp,4
185 shr ebp,4
186 xor ebp,DWORD [16+eax*4+esp]
187 mov al,BYTE [edi*1+esp]
188 shl al,4
189 xor ebx,DWORD [8+eax*1+esi]
190 xor ecx,DWORD [12+eax*1+esi]
191 xor edx,DWORD [eax*1+esi]
192 xor ebp,DWORD [4+eax*1+esi]
193 jmp NEAR L$003x86_loop
194 align 16
195 L$004x86_break:
196 bswap ebx
197 bswap ecx
198 bswap edx
199 bswap ebp
200 mov edi,DWORD [112+esp]
201 lea edi,[16+edi]
202 cmp edi,DWORD [116+esp]
203 mov DWORD [112+esp],edi
204 jb NEAR L$002x86_outer_loop
205 mov edi,DWORD [104+esp]
206 mov DWORD [12+edi],ebx
207 mov DWORD [8+edi],ecx
208 mov DWORD [4+edi],edx
209 mov DWORD [edi],ebp
210 add esp,84
211 pop edi
212 pop esi
213 pop ebx
214 pop ebp
216 global _gcm_gmult_4bit_mmx
217 align 16
218 _gcm_gmult_4bit_mmx:
219 L$_gcm_gmult_4bit_mmx_begin:
220 push ebp
221 push ebx
222 push esi
223 push edi
224 mov edi,DWORD [20+esp]
225 mov esi,DWORD [24+esp]
226 call L$005pic_point
227 L$005pic_point:
228 pop eax
229 lea eax,[(L$rem_4bit-L$005pic_point)+eax]
230 movzx ebx,BYTE [15+edi]
231 xor ecx,ecx
232 mov edx,ebx
233 mov cl,dl
234 mov ebp,14
235 shl cl,4
236 and edx,240
237 movq mm0,[8+ecx*1+esi]
238 movq mm1,[ecx*1+esi]
239 movd ebx,mm0
240 jmp NEAR L$006mmx_loop
241 align 16
242 L$006mmx_loop:
243 psrlq mm0,4
244 and ebx,15
245 movq mm2,mm1
246 psrlq mm1,4
247 pxor mm0,[8+edx*1+esi]
248 mov cl,BYTE [ebp*1+edi]
249 psllq mm2,60
250 pxor mm1,[ebx*8+eax]
251 dec ebp
252 movd ebx,mm0
253 pxor mm1,[edx*1+esi]
254 mov edx,ecx
255 pxor mm0,mm2
256 js NEAR L$007mmx_break
257 shl cl,4
258 and ebx,15
259 psrlq mm0,4
260 and edx,240
261 movq mm2,mm1
262 psrlq mm1,4
263 pxor mm0,[8+ecx*1+esi]
264 psllq mm2,60
265 pxor mm1,[ebx*8+eax]
266 movd ebx,mm0
267 pxor mm1,[ecx*1+esi]
268 pxor mm0,mm2
269 jmp NEAR L$006mmx_loop
270 align 16
271 L$007mmx_break:
272 shl cl,4
273 and ebx,15
274 psrlq mm0,4
275 and edx,240
276 movq mm2,mm1
277 psrlq mm1,4
278 pxor mm0,[8+ecx*1+esi]
279 psllq mm2,60
280 pxor mm1,[ebx*8+eax]
281 movd ebx,mm0
282 pxor mm1,[ecx*1+esi]
283 pxor mm0,mm2
284 psrlq mm0,4
285 and ebx,15
286 movq mm2,mm1
287 psrlq mm1,4
288 pxor mm0,[8+edx*1+esi]
289 psllq mm2,60
290 pxor mm1,[ebx*8+eax]
291 movd ebx,mm0
292 pxor mm1,[edx*1+esi]
293 pxor mm0,mm2
294 psrlq mm0,32
295 movd edx,mm1
296 psrlq mm1,32
297 movd ecx,mm0
298 movd ebp,mm1
299 bswap ebx
300 bswap edx
301 bswap ecx
302 bswap ebp
303 emms
304 mov DWORD [12+edi],ebx
305 mov DWORD [4+edi],edx
306 mov DWORD [8+edi],ecx
307 mov DWORD [edi],ebp
308 pop edi
309 pop esi
310 pop ebx
311 pop ebp
313 global _gcm_ghash_4bit_mmx
314 align 16
315 _gcm_ghash_4bit_mmx:
316 L$_gcm_ghash_4bit_mmx_begin:
317 push ebp
318 push ebx
319 push esi
320 push edi
321 mov eax,DWORD [20+esp]
322 mov ebx,DWORD [24+esp]
323 mov ecx,DWORD [28+esp]
324 mov edx,DWORD [32+esp]
325 mov ebp,esp
326 call L$008pic_point
327 L$008pic_point:
328 pop esi
329 lea esi,[(L$rem_8bit-L$008pic_point)+esi]
330 sub esp,544
331 and esp,-64
332 sub esp,16
333 add edx,ecx
334 mov DWORD [544+esp],eax
335 mov DWORD [552+esp],edx
336 mov DWORD [556+esp],ebp
337 add ebx,128
338 lea edi,[144+esp]
339 lea ebp,[400+esp]
340 mov edx,DWORD [ebx-120]
341 movq mm0,[ebx-120]
342 movq mm3,[ebx-128]
343 shl edx,4
344 mov BYTE [esp],dl
345 mov edx,DWORD [ebx-104]
346 movq mm2,[ebx-104]
347 movq mm5,[ebx-112]
348 movq [edi-128],mm0
349 psrlq mm0,4
350 movq [edi],mm3
351 movq mm7,mm3
352 psrlq mm3,4
353 shl edx,4
354 mov BYTE [1+esp],dl
355 mov edx,DWORD [ebx-88]
356 movq mm1,[ebx-88]
357 psllq mm7,60
358 movq mm4,[ebx-96]
359 por mm0,mm7
360 movq [edi-120],mm2
361 psrlq mm2,4
362 movq [8+edi],mm5
363 movq mm6,mm5
364 movq [ebp-128],mm0
365 psrlq mm5,4
366 movq [ebp],mm3
367 shl edx,4
368 mov BYTE [2+esp],dl
369 mov edx,DWORD [ebx-72]
370 movq mm0,[ebx-72]
371 psllq mm6,60
372 movq mm3,[ebx-80]
373 por mm2,mm6
374 movq [edi-112],mm1
375 psrlq mm1,4
376 movq [16+edi],mm4
377 movq mm7,mm4
378 movq [ebp-120],mm2
379 psrlq mm4,4
380 movq [8+ebp],mm5
381 shl edx,4
382 mov BYTE [3+esp],dl
383 mov edx,DWORD [ebx-56]
384 movq mm2,[ebx-56]
385 psllq mm7,60
386 movq mm5,[ebx-64]
387 por mm1,mm7
388 movq [edi-104],mm0
389 psrlq mm0,4
390 movq [24+edi],mm3
391 movq mm6,mm3
392 movq [ebp-112],mm1
393 psrlq mm3,4
394 movq [16+ebp],mm4
395 shl edx,4
396 mov BYTE [4+esp],dl
397 mov edx,DWORD [ebx-40]
398 movq mm1,[ebx-40]
399 psllq mm6,60
400 movq mm4,[ebx-48]
401 por mm0,mm6
402 movq [edi-96],mm2
403 psrlq mm2,4
404 movq [32+edi],mm5
405 movq mm7,mm5
406 movq [ebp-104],mm0
407 psrlq mm5,4
408 movq [24+ebp],mm3
409 shl edx,4
410 mov BYTE [5+esp],dl
411 mov edx,DWORD [ebx-24]
412 movq mm0,[ebx-24]
413 psllq mm7,60
414 movq mm3,[ebx-32]
415 por mm2,mm7
416 movq [edi-88],mm1
417 psrlq mm1,4
418 movq [40+edi],mm4
419 movq mm6,mm4
420 movq [ebp-96],mm2
421 psrlq mm4,4
422 movq [32+ebp],mm5
423 shl edx,4
424 mov BYTE [6+esp],dl
425 mov edx,DWORD [ebx-8]
426 movq mm2,[ebx-8]
427 psllq mm6,60
428 movq mm5,[ebx-16]
429 por mm1,mm6
430 movq [edi-80],mm0
431 psrlq mm0,4
432 movq [48+edi],mm3
433 movq mm7,mm3
434 movq [ebp-88],mm1
435 psrlq mm3,4
436 movq [40+ebp],mm4
437 shl edx,4
438 mov BYTE [7+esp],dl
439 mov edx,DWORD [8+ebx]
440 movq mm1,[8+ebx]
441 psllq mm7,60
442 movq mm4,[ebx]
443 por mm0,mm7
444 movq [edi-72],mm2
445 psrlq mm2,4
446 movq [56+edi],mm5
447 movq mm6,mm5
448 movq [ebp-80],mm0
449 psrlq mm5,4
450 movq [48+ebp],mm3
451 shl edx,4
452 mov BYTE [8+esp],dl
453 mov edx,DWORD [24+ebx]
454 movq mm0,[24+ebx]
455 psllq mm6,60
456 movq mm3,[16+ebx]
457 por mm2,mm6
458 movq [edi-64],mm1
459 psrlq mm1,4
460 movq [64+edi],mm4
461 movq mm7,mm4
462 movq [ebp-72],mm2
463 psrlq mm4,4
464 movq [56+ebp],mm5
465 shl edx,4
466 mov BYTE [9+esp],dl
467 mov edx,DWORD [40+ebx]
468 movq mm2,[40+ebx]
469 psllq mm7,60
470 movq mm5,[32+ebx]
471 por mm1,mm7
472 movq [edi-56],mm0
473 psrlq mm0,4
474 movq [72+edi],mm3
475 movq mm6,mm3
476 movq [ebp-64],mm1
477 psrlq mm3,4
478 movq [64+ebp],mm4
479 shl edx,4
480 mov BYTE [10+esp],dl
481 mov edx,DWORD [56+ebx]
482 movq mm1,[56+ebx]
483 psllq mm6,60
484 movq mm4,[48+ebx]
485 por mm0,mm6
486 movq [edi-48],mm2
487 psrlq mm2,4
488 movq [80+edi],mm5
489 movq mm7,mm5
490 movq [ebp-56],mm0
491 psrlq mm5,4
492 movq [72+ebp],mm3
493 shl edx,4
494 mov BYTE [11+esp],dl
495 mov edx,DWORD [72+ebx]
496 movq mm0,[72+ebx]
497 psllq mm7,60
498 movq mm3,[64+ebx]
499 por mm2,mm7
500 movq [edi-40],mm1
501 psrlq mm1,4
502 movq [88+edi],mm4
503 movq mm6,mm4
504 movq [ebp-48],mm2
505 psrlq mm4,4
506 movq [80+ebp],mm5
507 shl edx,4
508 mov BYTE [12+esp],dl
509 mov edx,DWORD [88+ebx]
510 movq mm2,[88+ebx]
511 psllq mm6,60
512 movq mm5,[80+ebx]
513 por mm1,mm6
514 movq [edi-32],mm0
515 psrlq mm0,4
516 movq [96+edi],mm3
517 movq mm7,mm3
518 movq [ebp-40],mm1
519 psrlq mm3,4
520 movq [88+ebp],mm4
521 shl edx,4
522 mov BYTE [13+esp],dl
523 mov edx,DWORD [104+ebx]
524 movq mm1,[104+ebx]
525 psllq mm7,60
526 movq mm4,[96+ebx]
527 por mm0,mm7
528 movq [edi-24],mm2
529 psrlq mm2,4
530 movq [104+edi],mm5
531 movq mm6,mm5
532 movq [ebp-32],mm0
533 psrlq mm5,4
534 movq [96+ebp],mm3
535 shl edx,4
536 mov BYTE [14+esp],dl
537 mov edx,DWORD [120+ebx]
538 movq mm0,[120+ebx]
539 psllq mm6,60
540 movq mm3,[112+ebx]
541 por mm2,mm6
542 movq [edi-16],mm1
543 psrlq mm1,4
544 movq [112+edi],mm4
545 movq mm7,mm4
546 movq [ebp-24],mm2
547 psrlq mm4,4
548 movq [104+ebp],mm5
549 shl edx,4
550 mov BYTE [15+esp],dl
551 psllq mm7,60
552 por mm1,mm7
553 movq [edi-8],mm0
554 psrlq mm0,4
555 movq [120+edi],mm3
556 movq mm6,mm3
557 movq [ebp-16],mm1
558 psrlq mm3,4
559 movq [112+ebp],mm4
560 psllq mm6,60
561 por mm0,mm6
562 movq [ebp-8],mm0
563 movq [120+ebp],mm3
564 movq mm6,[eax]
565 mov ebx,DWORD [8+eax]
566 mov edx,DWORD [12+eax]
567 align 16
568 L$009outer:
569 xor edx,DWORD [12+ecx]
570 xor ebx,DWORD [8+ecx]
571 pxor mm6,[ecx]
572 lea ecx,[16+ecx]
573 mov DWORD [536+esp],ebx
574 movq [528+esp],mm6
575 mov DWORD [548+esp],ecx
576 xor eax,eax
577 rol edx,8
578 mov al,dl
579 mov ebp,eax
580 and al,15
581 shr ebp,4
582 pxor mm0,mm0
583 rol edx,8
584 pxor mm1,mm1
585 pxor mm2,mm2
586 movq mm7,[16+eax*8+esp]
587 movq mm6,[144+eax*8+esp]
588 mov al,dl
589 movd ebx,mm7
590 psrlq mm7,8
591 movq mm3,mm6
592 mov edi,eax
593 psrlq mm6,8
594 pxor mm7,[272+ebp*8+esp]
595 and al,15
596 psllq mm3,56
597 shr edi,4
598 pxor mm7,[16+eax*8+esp]
599 rol edx,8
600 pxor mm6,[144+eax*8+esp]
601 pxor mm7,mm3
602 pxor mm6,[400+ebp*8+esp]
603 xor bl,BYTE [ebp*1+esp]
604 mov al,dl
605 movd ecx,mm7
606 movzx ebx,bl
607 psrlq mm7,8
608 movq mm3,mm6
609 mov ebp,eax
610 psrlq mm6,8
611 pxor mm7,[272+edi*8+esp]
612 and al,15
613 psllq mm3,56
614 shr ebp,4
615 pinsrw mm2,WORD [ebx*2+esi],2
616 pxor mm7,[16+eax*8+esp]
617 rol edx,8
618 pxor mm6,[144+eax*8+esp]
619 pxor mm7,mm3
620 pxor mm6,[400+edi*8+esp]
621 xor cl,BYTE [edi*1+esp]
622 mov al,dl
623 mov edx,DWORD [536+esp]
624 movd ebx,mm7
625 movzx ecx,cl
626 psrlq mm7,8
627 movq mm3,mm6
628 mov edi,eax
629 psrlq mm6,8
630 pxor mm7,[272+ebp*8+esp]
631 and al,15
632 psllq mm3,56
633 pxor mm6,mm2
634 shr edi,4
635 pinsrw mm1,WORD [ecx*2+esi],2
636 pxor mm7,[16+eax*8+esp]
637 rol edx,8
638 pxor mm6,[144+eax*8+esp]
639 pxor mm7,mm3
640 pxor mm6,[400+ebp*8+esp]
641 xor bl,BYTE [ebp*1+esp]
642 mov al,dl
643 movd ecx,mm7
644 movzx ebx,bl
645 psrlq mm7,8
646 movq mm3,mm6
647 mov ebp,eax
648 psrlq mm6,8
649 pxor mm7,[272+edi*8+esp]
650 and al,15
651 psllq mm3,56
652 pxor mm6,mm1
653 shr ebp,4
654 pinsrw mm0,WORD [ebx*2+esi],2
655 pxor mm7,[16+eax*8+esp]
656 rol edx,8
657 pxor mm6,[144+eax*8+esp]
658 pxor mm7,mm3
659 pxor mm6,[400+edi*8+esp]
660 xor cl,BYTE [edi*1+esp]
661 mov al,dl
662 movd ebx,mm7
663 movzx ecx,cl
664 psrlq mm7,8
665 movq mm3,mm6
666 mov edi,eax
667 psrlq mm6,8
668 pxor mm7,[272+ebp*8+esp]
669 and al,15
670 psllq mm3,56
671 pxor mm6,mm0
672 shr edi,4
673 pinsrw mm2,WORD [ecx*2+esi],2
674 pxor mm7,[16+eax*8+esp]
675 rol edx,8
676 pxor mm6,[144+eax*8+esp]
677 pxor mm7,mm3
678 pxor mm6,[400+ebp*8+esp]
679 xor bl,BYTE [ebp*1+esp]
680 mov al,dl
681 movd ecx,mm7
682 movzx ebx,bl
683 psrlq mm7,8
684 movq mm3,mm6
685 mov ebp,eax
686 psrlq mm6,8
687 pxor mm7,[272+edi*8+esp]
688 and al,15
689 psllq mm3,56
690 pxor mm6,mm2
691 shr ebp,4
692 pinsrw mm1,WORD [ebx*2+esi],2
693 pxor mm7,[16+eax*8+esp]
694 rol edx,8
695 pxor mm6,[144+eax*8+esp]
696 pxor mm7,mm3
697 pxor mm6,[400+edi*8+esp]
698 xor cl,BYTE [edi*1+esp]
699 mov al,dl
700 mov edx,DWORD [532+esp]
701 movd ebx,mm7
702 movzx ecx,cl
703 psrlq mm7,8
704 movq mm3,mm6
705 mov edi,eax
706 psrlq mm6,8
707 pxor mm7,[272+ebp*8+esp]
708 and al,15
709 psllq mm3,56
710 pxor mm6,mm1
711 shr edi,4
712 pinsrw mm0,WORD [ecx*2+esi],2
713 pxor mm7,[16+eax*8+esp]
714 rol edx,8
715 pxor mm6,[144+eax*8+esp]
716 pxor mm7,mm3
717 pxor mm6,[400+ebp*8+esp]
718 xor bl,BYTE [ebp*1+esp]
719 mov al,dl
720 movd ecx,mm7
721 movzx ebx,bl
722 psrlq mm7,8
723 movq mm3,mm6
724 mov ebp,eax
725 psrlq mm6,8
726 pxor mm7,[272+edi*8+esp]
727 and al,15
728 psllq mm3,56
729 pxor mm6,mm0
730 shr ebp,4
731 pinsrw mm2,WORD [ebx*2+esi],2
732 pxor mm7,[16+eax*8+esp]
733 rol edx,8
734 pxor mm6,[144+eax*8+esp]
735 pxor mm7,mm3
736 pxor mm6,[400+edi*8+esp]
737 xor cl,BYTE [edi*1+esp]
738 mov al,dl
739 movd ebx,mm7
740 movzx ecx,cl
741 psrlq mm7,8
742 movq mm3,mm6
743 mov edi,eax
744 psrlq mm6,8
745 pxor mm7,[272+ebp*8+esp]
746 and al,15
747 psllq mm3,56
748 pxor mm6,mm2
749 shr edi,4
750 pinsrw mm1,WORD [ecx*2+esi],2
751 pxor mm7,[16+eax*8+esp]
752 rol edx,8
753 pxor mm6,[144+eax*8+esp]
754 pxor mm7,mm3
755 pxor mm6,[400+ebp*8+esp]
756 xor bl,BYTE [ebp*1+esp]
757 mov al,dl
758 movd ecx,mm7
759 movzx ebx,bl
760 psrlq mm7,8
761 movq mm3,mm6
762 mov ebp,eax
763 psrlq mm6,8
764 pxor mm7,[272+edi*8+esp]
765 and al,15
766 psllq mm3,56
767 pxor mm6,mm1
768 shr ebp,4
769 pinsrw mm0,WORD [ebx*2+esi],2
770 pxor mm7,[16+eax*8+esp]
771 rol edx,8
772 pxor mm6,[144+eax*8+esp]
773 pxor mm7,mm3
774 pxor mm6,[400+edi*8+esp]
775 xor cl,BYTE [edi*1+esp]
776 mov al,dl
777 mov edx,DWORD [528+esp]
778 movd ebx,mm7
779 movzx ecx,cl
780 psrlq mm7,8
781 movq mm3,mm6
782 mov edi,eax
783 psrlq mm6,8
784 pxor mm7,[272+ebp*8+esp]
785 and al,15
786 psllq mm3,56
787 pxor mm6,mm0
788 shr edi,4
789 pinsrw mm2,WORD [ecx*2+esi],2
790 pxor mm7,[16+eax*8+esp]
791 rol edx,8
792 pxor mm6,[144+eax*8+esp]
793 pxor mm7,mm3
794 pxor mm6,[400+ebp*8+esp]
795 xor bl,BYTE [ebp*1+esp]
796 mov al,dl
797 movd ecx,mm7
798 movzx ebx,bl
799 psrlq mm7,8
800 movq mm3,mm6
801 mov ebp,eax
802 psrlq mm6,8
803 pxor mm7,[272+edi*8+esp]
804 and al,15
805 psllq mm3,56
806 pxor mm6,mm2
807 shr ebp,4
808 pinsrw mm1,WORD [ebx*2+esi],2
809 pxor mm7,[16+eax*8+esp]
810 rol edx,8
811 pxor mm6,[144+eax*8+esp]
812 pxor mm7,mm3
813 pxor mm6,[400+edi*8+esp]
814 xor cl,BYTE [edi*1+esp]
815 mov al,dl
816 movd ebx,mm7
817 movzx ecx,cl
818 psrlq mm7,8
819 movq mm3,mm6
820 mov edi,eax
821 psrlq mm6,8
822 pxor mm7,[272+ebp*8+esp]
823 and al,15
824 psllq mm3,56
825 pxor mm6,mm1
826 shr edi,4
827 pinsrw mm0,WORD [ecx*2+esi],2
828 pxor mm7,[16+eax*8+esp]
829 rol edx,8
830 pxor mm6,[144+eax*8+esp]
831 pxor mm7,mm3
832 pxor mm6,[400+ebp*8+esp]
833 xor bl,BYTE [ebp*1+esp]
834 mov al,dl
835 movd ecx,mm7
836 movzx ebx,bl
837 psrlq mm7,8
838 movq mm3,mm6
839 mov ebp,eax
840 psrlq mm6,8
841 pxor mm7,[272+edi*8+esp]
842 and al,15
843 psllq mm3,56
844 pxor mm6,mm0
845 shr ebp,4
846 pinsrw mm2,WORD [ebx*2+esi],2
847 pxor mm7,[16+eax*8+esp]
848 rol edx,8
849 pxor mm6,[144+eax*8+esp]
850 pxor mm7,mm3
851 pxor mm6,[400+edi*8+esp]
852 xor cl,BYTE [edi*1+esp]
853 mov al,dl
854 mov edx,DWORD [524+esp]
855 movd ebx,mm7
856 movzx ecx,cl
857 psrlq mm7,8
858 movq mm3,mm6
859 mov edi,eax
860 psrlq mm6,8
861 pxor mm7,[272+ebp*8+esp]
862 and al,15
863 psllq mm3,56
864 pxor mm6,mm2
865 shr edi,4
866 pinsrw mm1,WORD [ecx*2+esi],2
867 pxor mm7,[16+eax*8+esp]
868 pxor mm6,[144+eax*8+esp]
869 xor bl,BYTE [ebp*1+esp]
870 pxor mm7,mm3
871 pxor mm6,[400+ebp*8+esp]
872 movzx ebx,bl
873 pxor mm2,mm2
874 psllq mm1,4
875 movd ecx,mm7
876 psrlq mm7,4
877 movq mm3,mm6
878 psrlq mm6,4
879 shl ecx,4
880 pxor mm7,[16+edi*8+esp]
881 psllq mm3,60
882 movzx ecx,cl
883 pxor mm7,mm3
884 pxor mm6,[144+edi*8+esp]
885 pinsrw mm0,WORD [ebx*2+esi],2
886 pxor mm6,mm1
887 movd edx,mm7
888 pinsrw mm2,WORD [ecx*2+esi],3
889 psllq mm0,12
890 pxor mm6,mm0
891 psrlq mm7,32
892 pxor mm6,mm2
893 mov ecx,DWORD [548+esp]
894 movd ebx,mm7
895 movq mm3,mm6
896 psllw mm6,8
897 psrlw mm3,8
898 por mm6,mm3
899 bswap edx
900 pshufw mm6,mm6,27
901 bswap ebx
902 cmp ecx,DWORD [552+esp]
903 jne NEAR L$009outer
904 mov eax,DWORD [544+esp]
905 mov DWORD [12+eax],edx
906 mov DWORD [8+eax],ebx
907 movq [eax],mm6
908 mov esp,DWORD [556+esp]
909 emms
910 pop edi
911 pop esi
912 pop ebx
913 pop ebp
915 global _gcm_init_clmul
916 align 16
917 _gcm_init_clmul:
918 L$_gcm_init_clmul_begin:
919 mov edx,DWORD [4+esp]
920 mov eax,DWORD [8+esp]
921 call L$010pic
922 L$010pic:
923 pop ecx
924 lea ecx,[(L$bswap-L$010pic)+ecx]
925 movdqu xmm2,[eax]
926 pshufd xmm2,xmm2,78
927 pshufd xmm4,xmm2,255
928 movdqa xmm3,xmm2
929 psllq xmm2,1
930 pxor xmm5,xmm5
931 psrlq xmm3,63
932 pcmpgtd xmm5,xmm4
933 pslldq xmm3,8
934 por xmm2,xmm3
935 pand xmm5,[16+ecx]
936 pxor xmm2,xmm5
937 movdqa xmm0,xmm2
938 movdqa xmm1,xmm0
939 pshufd xmm3,xmm0,78
940 pshufd xmm4,xmm2,78
941 pxor xmm3,xmm0
942 pxor xmm4,xmm2
943 db 102,15,58,68,194,0
944 db 102,15,58,68,202,17
945 db 102,15,58,68,220,0
946 xorps xmm3,xmm0
947 xorps xmm3,xmm1
948 movdqa xmm4,xmm3
949 psrldq xmm3,8
950 pslldq xmm4,8
951 pxor xmm1,xmm3
952 pxor xmm0,xmm4
953 movdqa xmm4,xmm0
954 movdqa xmm3,xmm0
955 psllq xmm0,5
956 pxor xmm3,xmm0
957 psllq xmm0,1
958 pxor xmm0,xmm3
959 psllq xmm0,57
960 movdqa xmm3,xmm0
961 pslldq xmm0,8
962 psrldq xmm3,8
963 pxor xmm0,xmm4
964 pxor xmm1,xmm3
965 movdqa xmm4,xmm0
966 psrlq xmm0,1
967 pxor xmm1,xmm4
968 pxor xmm4,xmm0
969 psrlq xmm0,5
970 pxor xmm0,xmm4
971 psrlq xmm0,1
972 pxor xmm0,xmm1
973 pshufd xmm3,xmm2,78
974 pshufd xmm4,xmm0,78
975 pxor xmm3,xmm2
976 movdqu [edx],xmm2
977 pxor xmm4,xmm0
978 movdqu [16+edx],xmm0
979 db 102,15,58,15,227,8
980 movdqu [32+edx],xmm4
982 global _gcm_gmult_clmul
983 align 16
984 _gcm_gmult_clmul:
985 L$_gcm_gmult_clmul_begin:
986 mov eax,DWORD [4+esp]
987 mov edx,DWORD [8+esp]
988 call L$011pic
989 L$011pic:
990 pop ecx
991 lea ecx,[(L$bswap-L$011pic)+ecx]
992 movdqu xmm0,[eax]
993 movdqa xmm5,[ecx]
994 movups xmm2,[edx]
995 db 102,15,56,0,197
996 movups xmm4,[32+edx]
997 movdqa xmm1,xmm0
998 pshufd xmm3,xmm0,78
999 pxor xmm3,xmm0
1000 db 102,15,58,68,194,0
1001 db 102,15,58,68,202,17
1002 db 102,15,58,68,220,0
1003 xorps xmm3,xmm0
1004 xorps xmm3,xmm1
1005 movdqa xmm4,xmm3
1006 psrldq xmm3,8
1007 pslldq xmm4,8
1008 pxor xmm1,xmm3
1009 pxor xmm0,xmm4
1010 movdqa xmm4,xmm0
1011 movdqa xmm3,xmm0
1012 psllq xmm0,5
1013 pxor xmm3,xmm0
1014 psllq xmm0,1
1015 pxor xmm0,xmm3
1016 psllq xmm0,57
1017 movdqa xmm3,xmm0
1018 pslldq xmm0,8
1019 psrldq xmm3,8
1020 pxor xmm0,xmm4
1021 pxor xmm1,xmm3
1022 movdqa xmm4,xmm0
1023 psrlq xmm0,1
1024 pxor xmm1,xmm4
1025 pxor xmm4,xmm0
1026 psrlq xmm0,5
1027 pxor xmm0,xmm4
1028 psrlq xmm0,1
1029 pxor xmm0,xmm1
1030 db 102,15,56,0,197
1031 movdqu [eax],xmm0
1033 global _gcm_ghash_clmul
1034 align 16
1035 _gcm_ghash_clmul:
1036 L$_gcm_ghash_clmul_begin:
1037 push ebp
1038 push ebx
1039 push esi
1040 push edi
1041 mov eax,DWORD [20+esp]
1042 mov edx,DWORD [24+esp]
1043 mov esi,DWORD [28+esp]
1044 mov ebx,DWORD [32+esp]
1045 call L$012pic
1046 L$012pic:
1047 pop ecx
1048 lea ecx,[(L$bswap-L$012pic)+ecx]
1049 movdqu xmm0,[eax]
1050 movdqa xmm5,[ecx]
1051 movdqu xmm2,[edx]
1052 db 102,15,56,0,197
1053 sub ebx,16
1054 jz NEAR L$013odd_tail
1055 movdqu xmm3,[esi]
1056 movdqu xmm6,[16+esi]
1057 db 102,15,56,0,221
1058 db 102,15,56,0,245
1059 movdqu xmm5,[32+edx]
1060 pxor xmm0,xmm3
1061 pshufd xmm3,xmm6,78
1062 movdqa xmm7,xmm6
1063 pxor xmm3,xmm6
1064 lea esi,[32+esi]
1065 db 102,15,58,68,242,0
1066 db 102,15,58,68,250,17
1067 db 102,15,58,68,221,0
1068 movups xmm2,[16+edx]
1070 sub ebx,32
1071 jbe NEAR L$014even_tail
1072 jmp NEAR L$015mod_loop
1073 align 32
1074 L$015mod_loop:
1075 pshufd xmm4,xmm0,78
1076 movdqa xmm1,xmm0
1077 pxor xmm4,xmm0
1079 db 102,15,58,68,194,0
1080 db 102,15,58,68,202,17
1081 db 102,15,58,68,229,16
1082 movups xmm2,[edx]
1083 xorps xmm0,xmm6
1084 movdqa xmm5,[ecx]
1085 xorps xmm1,xmm7
1086 movdqu xmm7,[esi]
1087 pxor xmm3,xmm0
1088 movdqu xmm6,[16+esi]
1089 pxor xmm3,xmm1
1090 db 102,15,56,0,253
1091 pxor xmm4,xmm3
1092 movdqa xmm3,xmm4
1093 psrldq xmm4,8
1094 pslldq xmm3,8
1095 pxor xmm1,xmm4
1096 pxor xmm0,xmm3
1097 db 102,15,56,0,245
1098 pxor xmm1,xmm7
1099 movdqa xmm7,xmm6
1100 movdqa xmm4,xmm0
1101 movdqa xmm3,xmm0
1102 psllq xmm0,5
1103 pxor xmm3,xmm0
1104 psllq xmm0,1
1105 pxor xmm0,xmm3
1106 db 102,15,58,68,242,0
1107 movups xmm5,[32+edx]
1108 psllq xmm0,57
1109 movdqa xmm3,xmm0
1110 pslldq xmm0,8
1111 psrldq xmm3,8
1112 pxor xmm0,xmm4
1113 pxor xmm1,xmm3
1114 pshufd xmm3,xmm7,78
1115 movdqa xmm4,xmm0
1116 psrlq xmm0,1
1117 pxor xmm3,xmm7
1118 pxor xmm1,xmm4
1119 db 102,15,58,68,250,17
1120 movups xmm2,[16+edx]
1121 pxor xmm4,xmm0
1122 psrlq xmm0,5
1123 pxor xmm0,xmm4
1124 psrlq xmm0,1
1125 pxor xmm0,xmm1
1126 db 102,15,58,68,221,0
1127 lea esi,[32+esi]
1128 sub ebx,32
1129 ja NEAR L$015mod_loop
1130 L$014even_tail:
1131 pshufd xmm4,xmm0,78
1132 movdqa xmm1,xmm0
1133 pxor xmm4,xmm0
1134 db 102,15,58,68,194,0
1135 db 102,15,58,68,202,17
1136 db 102,15,58,68,229,16
1137 movdqa xmm5,[ecx]
1138 xorps xmm0,xmm6
1139 xorps xmm1,xmm7
1140 pxor xmm3,xmm0
1141 pxor xmm3,xmm1
1142 pxor xmm4,xmm3
1143 movdqa xmm3,xmm4
1144 psrldq xmm4,8
1145 pslldq xmm3,8
1146 pxor xmm1,xmm4
1147 pxor xmm0,xmm3
1148 movdqa xmm4,xmm0
1149 movdqa xmm3,xmm0
1150 psllq xmm0,5
1151 pxor xmm3,xmm0
1152 psllq xmm0,1
1153 pxor xmm0,xmm3
1154 psllq xmm0,57
1155 movdqa xmm3,xmm0
1156 pslldq xmm0,8
1157 psrldq xmm3,8
1158 pxor xmm0,xmm4
1159 pxor xmm1,xmm3
1160 movdqa xmm4,xmm0
1161 psrlq xmm0,1
1162 pxor xmm1,xmm4
1163 pxor xmm4,xmm0
1164 psrlq xmm0,5
1165 pxor xmm0,xmm4
1166 psrlq xmm0,1
1167 pxor xmm0,xmm1
1168 test ebx,ebx
1169 jnz NEAR L$016done
1170 movups xmm2,[edx]
1171 L$013odd_tail:
1172 movdqu xmm3,[esi]
1173 db 102,15,56,0,221
1174 pxor xmm0,xmm3
1175 movdqa xmm1,xmm0
1176 pshufd xmm3,xmm0,78
1177 pshufd xmm4,xmm2,78
1178 pxor xmm3,xmm0
1179 pxor xmm4,xmm2
1180 db 102,15,58,68,194,0
1181 db 102,15,58,68,202,17
1182 db 102,15,58,68,220,0
1183 xorps xmm3,xmm0
1184 xorps xmm3,xmm1
1185 movdqa xmm4,xmm3
1186 psrldq xmm3,8
1187 pslldq xmm4,8
1188 pxor xmm1,xmm3
1189 pxor xmm0,xmm4
1190 movdqa xmm4,xmm0
1191 movdqa xmm3,xmm0
1192 psllq xmm0,5
1193 pxor xmm3,xmm0
1194 psllq xmm0,1
1195 pxor xmm0,xmm3
1196 psllq xmm0,57
1197 movdqa xmm3,xmm0
1198 pslldq xmm0,8
1199 psrldq xmm3,8
1200 pxor xmm0,xmm4
1201 pxor xmm1,xmm3
1202 movdqa xmm4,xmm0
1203 psrlq xmm0,1
1204 pxor xmm1,xmm4
1205 pxor xmm4,xmm0
1206 psrlq xmm0,5
1207 pxor xmm0,xmm4
1208 psrlq xmm0,1
1209 pxor xmm0,xmm1
1210 L$016done:
1211 db 102,15,56,0,197
1212 movdqu [eax],xmm0
1213 pop edi
1214 pop esi
1215 pop ebx
1216 pop ebp
1218 align 64
1219 L$bswap:
1220 db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1221 db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1222 align 64
1223 L$rem_8bit:
1224 dw 0,450,900,582,1800,1738,1164,1358
1225 dw 3600,4050,3476,3158,2328,2266,2716,2910
1226 dw 7200,7650,8100,7782,6952,6890,6316,6510
1227 dw 4656,5106,4532,4214,5432,5370,5820,6014
1228 dw 14400,14722,15300,14854,16200,16010,15564,15630
1229 dw 13904,14226,13780,13334,12632,12442,13020,13086
1230 dw 9312,9634,10212,9766,9064,8874,8428,8494
1231 dw 10864,11186,10740,10294,11640,11450,12028,12094
1232 dw 28800,28994,29444,29382,30600,30282,29708,30158
1233 dw 32400,32594,32020,31958,31128,30810,31260,31710
1234 dw 27808,28002,28452,28390,27560,27242,26668,27118
1235 dw 25264,25458,24884,24822,26040,25722,26172,26622
1236 dw 18624,18690,19268,19078,20424,19978,19532,19854
1237 dw 18128,18194,17748,17558,16856,16410,16988,17310
1238 dw 21728,21794,22372,22182,21480,21034,20588,20910
1239 dw 23280,23346,22900,22710,24056,23610,24188,24510
1240 dw 57600,57538,57988,58182,58888,59338,58764,58446
1241 dw 61200,61138,60564,60758,59416,59866,60316,59998
1242 dw 64800,64738,65188,65382,64040,64490,63916,63598
1243 dw 62256,62194,61620,61814,62520,62970,63420,63102
1244 dw 55616,55426,56004,56070,56904,57226,56780,56334
1245 dw 55120,54930,54484,54550,53336,53658,54236,53790
1246 dw 50528,50338,50916,50982,49768,50090,49644,49198
1247 dw 52080,51890,51444,51510,52344,52666,53244,52798
1248 dw 37248,36930,37380,37830,38536,38730,38156,38094
1249 dw 40848,40530,39956,40406,39064,39258,39708,39646
1250 dw 36256,35938,36388,36838,35496,35690,35116,35054
1251 dw 33712,33394,32820,33270,33976,34170,34620,34558
1252 dw 43456,43010,43588,43910,44744,44810,44364,44174
1253 dw 42960,42514,42068,42390,41176,41242,41820,41630
1254 dw 46560,46114,46692,47014,45800,45866,45420,45230
1255 dw 48112,47666,47220,47542,48376,48442,49020,48830
1256 align 64
1257 L$rem_4bit:
1258 dd 0,0,0,471859200,0,943718400,0,610271232
1259 dd 0,1887436800,0,1822425088,0,1220542464,0,1423966208
1260 dd 0,3774873600,0,4246732800,0,3644850176,0,3311403008
1261 dd 0,2441084928,0,2376073216,0,2847932416,0,3051356160
1262 db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1263 db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1264 db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1265 db 0