1 #if defined(__x86_64__)
3 #include "llvm_blake3_prefix.h"
5 #if defined(__ELF__) && !(defined(__sun__) && defined(__svr4__))
6 .section .note.GNU-stack,"",%progbits
9 #if defined(__ELF__) && defined(__CET__) && defined(__has_include)
10 #if __has_include(<cet.h>)
15 #if !defined(_CET_ENDBR)
20 #define HIDDEN .private_extern
22 #define HIDDEN .hidden
25 .intel_syntax noprefix
26 HIDDEN blake3_hash_many_sse2
27 HIDDEN _blake3_hash_many_sse2
28 HIDDEN blake3_compress_in_place_sse2
29 HIDDEN _blake3_compress_in_place_sse2
30 HIDDEN blake3_compress_xof_sse2
31 HIDDEN _blake3_compress_xof_sse2
32 .global blake3_hash_many_sse2
33 .global _blake3_hash_many_sse2
34 .global blake3_compress_in_place_sse2
35 .global _blake3_compress_in_place_sse2
36 .global blake3_compress_xof_sse2
37 .global _blake3_compress_xof_sse2
44 _blake3_hash_many_sse2:
45 blake3_hash_many_sse2:
55 and rsp, 0xFFFFFFFFFFFFFFC0
58 pshufd xmm0, xmm0, 0x00
59 movdqa xmmword ptr [rsp+0x130], xmm0
61 pand xmm1, xmmword ptr [ADD0+rip]
62 pand xmm0, xmmword ptr [ADD1+rip]
63 movdqa xmmword ptr [rsp+0x150], xmm0
65 pshufd xmm0, xmm0, 0x00
67 movdqa xmmword ptr [rsp+0x110], xmm0
68 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
69 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
73 pshufd xmm2, xmm2, 0x00
75 movdqa xmmword ptr [rsp+0x120], xmm2
76 mov rbx, qword ptr [rbp+0x50]
79 movzx r13d, byte ptr [rbp+0x38]
80 movzx r12d, byte ptr [rbp+0x48]
84 movdqu xmm3, xmmword ptr [rcx]
85 pshufd xmm0, xmm3, 0x00
86 pshufd xmm1, xmm3, 0x55
87 pshufd xmm2, xmm3, 0xAA
88 pshufd xmm3, xmm3, 0xFF
89 movdqu xmm7, xmmword ptr [rcx+0x10]
90 pshufd xmm4, xmm7, 0x00
91 pshufd xmm5, xmm7, 0x55
92 pshufd xmm6, xmm7, 0xAA
93 pshufd xmm7, xmm7, 0xFF
94 mov r8, qword ptr [rdi]
95 mov r9, qword ptr [rdi+0x8]
96 mov r10, qword ptr [rdi+0x10]
97 mov r11, qword ptr [rdi+0x18]
98 movzx eax, byte ptr [rbp+0x40]
107 movdqu xmm8, xmmword ptr [r8+rdx-0x40]
108 movdqu xmm9, xmmword ptr [r9+rdx-0x40]
109 movdqu xmm10, xmmword ptr [r10+rdx-0x40]
110 movdqu xmm11, xmmword ptr [r11+rdx-0x40]
113 punpckhdq xmm12, xmm9
115 punpckldq xmm10, xmm11
116 punpckhdq xmm14, xmm11
118 punpcklqdq xmm8, xmm10
119 punpckhqdq xmm9, xmm10
121 punpcklqdq xmm12, xmm14
122 punpckhqdq xmm13, xmm14
123 movdqa xmmword ptr [rsp], xmm8
124 movdqa xmmword ptr [rsp+0x10], xmm9
125 movdqa xmmword ptr [rsp+0x20], xmm12
126 movdqa xmmword ptr [rsp+0x30], xmm13
127 movdqu xmm8, xmmword ptr [r8+rdx-0x30]
128 movdqu xmm9, xmmword ptr [r9+rdx-0x30]
129 movdqu xmm10, xmmword ptr [r10+rdx-0x30]
130 movdqu xmm11, xmmword ptr [r11+rdx-0x30]
133 punpckhdq xmm12, xmm9
135 punpckldq xmm10, xmm11
136 punpckhdq xmm14, xmm11
138 punpcklqdq xmm8, xmm10
139 punpckhqdq xmm9, xmm10
141 punpcklqdq xmm12, xmm14
142 punpckhqdq xmm13, xmm14
143 movdqa xmmword ptr [rsp+0x40], xmm8
144 movdqa xmmword ptr [rsp+0x50], xmm9
145 movdqa xmmword ptr [rsp+0x60], xmm12
146 movdqa xmmword ptr [rsp+0x70], xmm13
147 movdqu xmm8, xmmword ptr [r8+rdx-0x20]
148 movdqu xmm9, xmmword ptr [r9+rdx-0x20]
149 movdqu xmm10, xmmword ptr [r10+rdx-0x20]
150 movdqu xmm11, xmmword ptr [r11+rdx-0x20]
153 punpckhdq xmm12, xmm9
155 punpckldq xmm10, xmm11
156 punpckhdq xmm14, xmm11
158 punpcklqdq xmm8, xmm10
159 punpckhqdq xmm9, xmm10
161 punpcklqdq xmm12, xmm14
162 punpckhqdq xmm13, xmm14
163 movdqa xmmword ptr [rsp+0x80], xmm8
164 movdqa xmmword ptr [rsp+0x90], xmm9
165 movdqa xmmword ptr [rsp+0xA0], xmm12
166 movdqa xmmword ptr [rsp+0xB0], xmm13
167 movdqu xmm8, xmmword ptr [r8+rdx-0x10]
168 movdqu xmm9, xmmword ptr [r9+rdx-0x10]
169 movdqu xmm10, xmmword ptr [r10+rdx-0x10]
170 movdqu xmm11, xmmword ptr [r11+rdx-0x10]
173 punpckhdq xmm12, xmm9
175 punpckldq xmm10, xmm11
176 punpckhdq xmm14, xmm11
178 punpcklqdq xmm8, xmm10
179 punpckhqdq xmm9, xmm10
181 punpcklqdq xmm12, xmm14
182 punpckhqdq xmm13, xmm14
183 movdqa xmmword ptr [rsp+0xC0], xmm8
184 movdqa xmmword ptr [rsp+0xD0], xmm9
185 movdqa xmmword ptr [rsp+0xE0], xmm12
186 movdqa xmmword ptr [rsp+0xF0], xmm13
187 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
188 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
189 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
190 movdqa xmm12, xmmword ptr [rsp+0x110]
191 movdqa xmm13, xmmword ptr [rsp+0x120]
192 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
194 pshufd xmm15, xmm15, 0x00
195 prefetcht0 [r8+rdx+0x80]
196 prefetcht0 [r9+rdx+0x80]
197 prefetcht0 [r10+rdx+0x80]
198 prefetcht0 [r11+rdx+0x80]
199 paddd xmm0, xmmword ptr [rsp]
200 paddd xmm1, xmmword ptr [rsp+0x20]
201 paddd xmm2, xmmword ptr [rsp+0x40]
202 paddd xmm3, xmmword ptr [rsp+0x60]
211 pshuflw xmm12, xmm12, 0xB1
212 pshufhw xmm12, xmm12, 0xB1
213 pshuflw xmm13, xmm13, 0xB1
214 pshufhw xmm13, xmm13, 0xB1
215 pshuflw xmm14, xmm14, 0xB1
216 pshufhw xmm14, xmm14, 0xB1
217 pshuflw xmm15, xmm15, 0xB1
218 pshufhw xmm15, xmm15, 0xB1
219 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
228 movdqa xmmword ptr [rsp+0x100], xmm8
245 paddd xmm0, xmmword ptr [rsp+0x10]
246 paddd xmm1, xmmword ptr [rsp+0x30]
247 paddd xmm2, xmmword ptr [rsp+0x50]
248 paddd xmm3, xmmword ptr [rsp+0x70]
273 movdqa xmm8, xmmword ptr [rsp+0x100]
282 movdqa xmmword ptr [rsp+0x100], xmm8
299 paddd xmm0, xmmword ptr [rsp+0x80]
300 paddd xmm1, xmmword ptr [rsp+0xA0]
301 paddd xmm2, xmmword ptr [rsp+0xC0]
302 paddd xmm3, xmmword ptr [rsp+0xE0]
311 pshuflw xmm15, xmm15, 0xB1
312 pshufhw xmm15, xmm15, 0xB1
313 pshuflw xmm12, xmm12, 0xB1
314 pshufhw xmm12, xmm12, 0xB1
315 pshuflw xmm13, xmm13, 0xB1
316 pshufhw xmm13, xmm13, 0xB1
317 pshuflw xmm14, xmm14, 0xB1
318 pshufhw xmm14, xmm14, 0xB1
321 movdqa xmm8, xmmword ptr [rsp+0x100]
328 movdqa xmmword ptr [rsp+0x100], xmm8
345 paddd xmm0, xmmword ptr [rsp+0x90]
346 paddd xmm1, xmmword ptr [rsp+0xB0]
347 paddd xmm2, xmmword ptr [rsp+0xD0]
348 paddd xmm3, xmmword ptr [rsp+0xF0]
375 movdqa xmm8, xmmword ptr [rsp+0x100]
382 movdqa xmmword ptr [rsp+0x100], xmm8
399 paddd xmm0, xmmword ptr [rsp+0x20]
400 paddd xmm1, xmmword ptr [rsp+0x30]
401 paddd xmm2, xmmword ptr [rsp+0x70]
402 paddd xmm3, xmmword ptr [rsp+0x40]
411 pshuflw xmm12, xmm12, 0xB1
412 pshufhw xmm12, xmm12, 0xB1
413 pshuflw xmm13, xmm13, 0xB1
414 pshufhw xmm13, xmm13, 0xB1
415 pshuflw xmm14, xmm14, 0xB1
416 pshufhw xmm14, xmm14, 0xB1
417 pshuflw xmm15, xmm15, 0xB1
418 pshufhw xmm15, xmm15, 0xB1
419 movdqa xmm8, xmmword ptr [rsp+0x100]
428 movdqa xmmword ptr [rsp+0x100], xmm8
445 paddd xmm0, xmmword ptr [rsp+0x60]
446 paddd xmm1, xmmword ptr [rsp+0xA0]
447 paddd xmm2, xmmword ptr [rsp]
448 paddd xmm3, xmmword ptr [rsp+0xD0]
473 movdqa xmm8, xmmword ptr [rsp+0x100]
482 movdqa xmmword ptr [rsp+0x100], xmm8
499 paddd xmm0, xmmword ptr [rsp+0x10]
500 paddd xmm1, xmmword ptr [rsp+0xC0]
501 paddd xmm2, xmmword ptr [rsp+0x90]
502 paddd xmm3, xmmword ptr [rsp+0xF0]
511 pshuflw xmm15, xmm15, 0xB1
512 pshufhw xmm15, xmm15, 0xB1
513 pshuflw xmm12, xmm12, 0xB1
514 pshufhw xmm12, xmm12, 0xB1
515 pshuflw xmm13, xmm13, 0xB1
516 pshufhw xmm13, xmm13, 0xB1
517 pshuflw xmm14, xmm14, 0xB1
518 pshufhw xmm14, xmm14, 0xB1
521 movdqa xmm8, xmmword ptr [rsp+0x100]
528 movdqa xmmword ptr [rsp+0x100], xmm8
545 paddd xmm0, xmmword ptr [rsp+0xB0]
546 paddd xmm1, xmmword ptr [rsp+0x50]
547 paddd xmm2, xmmword ptr [rsp+0xE0]
548 paddd xmm3, xmmword ptr [rsp+0x80]
575 movdqa xmm8, xmmword ptr [rsp+0x100]
582 movdqa xmmword ptr [rsp+0x100], xmm8
599 paddd xmm0, xmmword ptr [rsp+0x30]
600 paddd xmm1, xmmword ptr [rsp+0xA0]
601 paddd xmm2, xmmword ptr [rsp+0xD0]
602 paddd xmm3, xmmword ptr [rsp+0x70]
611 pshuflw xmm12, xmm12, 0xB1
612 pshufhw xmm12, xmm12, 0xB1
613 pshuflw xmm13, xmm13, 0xB1
614 pshufhw xmm13, xmm13, 0xB1
615 pshuflw xmm14, xmm14, 0xB1
616 pshufhw xmm14, xmm14, 0xB1
617 pshuflw xmm15, xmm15, 0xB1
618 pshufhw xmm15, xmm15, 0xB1
619 movdqa xmm8, xmmword ptr [rsp+0x100]
628 movdqa xmmword ptr [rsp+0x100], xmm8
645 paddd xmm0, xmmword ptr [rsp+0x40]
646 paddd xmm1, xmmword ptr [rsp+0xC0]
647 paddd xmm2, xmmword ptr [rsp+0x20]
648 paddd xmm3, xmmword ptr [rsp+0xE0]
673 movdqa xmm8, xmmword ptr [rsp+0x100]
682 movdqa xmmword ptr [rsp+0x100], xmm8
699 paddd xmm0, xmmword ptr [rsp+0x60]
700 paddd xmm1, xmmword ptr [rsp+0x90]
701 paddd xmm2, xmmword ptr [rsp+0xB0]
702 paddd xmm3, xmmword ptr [rsp+0x80]
711 pshuflw xmm15, xmm15, 0xB1
712 pshufhw xmm15, xmm15, 0xB1
713 pshuflw xmm12, xmm12, 0xB1
714 pshufhw xmm12, xmm12, 0xB1
715 pshuflw xmm13, xmm13, 0xB1
716 pshufhw xmm13, xmm13, 0xB1
717 pshuflw xmm14, xmm14, 0xB1
718 pshufhw xmm14, xmm14, 0xB1
721 movdqa xmm8, xmmword ptr [rsp+0x100]
728 movdqa xmmword ptr [rsp+0x100], xmm8
745 paddd xmm0, xmmword ptr [rsp+0x50]
746 paddd xmm1, xmmword ptr [rsp]
747 paddd xmm2, xmmword ptr [rsp+0xF0]
748 paddd xmm3, xmmword ptr [rsp+0x10]
775 movdqa xmm8, xmmword ptr [rsp+0x100]
782 movdqa xmmword ptr [rsp+0x100], xmm8
799 paddd xmm0, xmmword ptr [rsp+0xA0]
800 paddd xmm1, xmmword ptr [rsp+0xC0]
801 paddd xmm2, xmmword ptr [rsp+0xE0]
802 paddd xmm3, xmmword ptr [rsp+0xD0]
811 pshuflw xmm12, xmm12, 0xB1
812 pshufhw xmm12, xmm12, 0xB1
813 pshuflw xmm13, xmm13, 0xB1
814 pshufhw xmm13, xmm13, 0xB1
815 pshuflw xmm14, xmm14, 0xB1
816 pshufhw xmm14, xmm14, 0xB1
817 pshuflw xmm15, xmm15, 0xB1
818 pshufhw xmm15, xmm15, 0xB1
819 movdqa xmm8, xmmword ptr [rsp+0x100]
828 movdqa xmmword ptr [rsp+0x100], xmm8
845 paddd xmm0, xmmword ptr [rsp+0x70]
846 paddd xmm1, xmmword ptr [rsp+0x90]
847 paddd xmm2, xmmword ptr [rsp+0x30]
848 paddd xmm3, xmmword ptr [rsp+0xF0]
873 movdqa xmm8, xmmword ptr [rsp+0x100]
882 movdqa xmmword ptr [rsp+0x100], xmm8
899 paddd xmm0, xmmword ptr [rsp+0x40]
900 paddd xmm1, xmmword ptr [rsp+0xB0]
901 paddd xmm2, xmmword ptr [rsp+0x50]
902 paddd xmm3, xmmword ptr [rsp+0x10]
911 pshuflw xmm15, xmm15, 0xB1
912 pshufhw xmm15, xmm15, 0xB1
913 pshuflw xmm12, xmm12, 0xB1
914 pshufhw xmm12, xmm12, 0xB1
915 pshuflw xmm13, xmm13, 0xB1
916 pshufhw xmm13, xmm13, 0xB1
917 pshuflw xmm14, xmm14, 0xB1
918 pshufhw xmm14, xmm14, 0xB1
921 movdqa xmm8, xmmword ptr [rsp+0x100]
928 movdqa xmmword ptr [rsp+0x100], xmm8
945 paddd xmm0, xmmword ptr [rsp]
946 paddd xmm1, xmmword ptr [rsp+0x20]
947 paddd xmm2, xmmword ptr [rsp+0x80]
948 paddd xmm3, xmmword ptr [rsp+0x60]
975 movdqa xmm8, xmmword ptr [rsp+0x100]
982 movdqa xmmword ptr [rsp+0x100], xmm8
999 paddd xmm0, xmmword ptr [rsp+0xC0]
1000 paddd xmm1, xmmword ptr [rsp+0x90]
1001 paddd xmm2, xmmword ptr [rsp+0xF0]
1002 paddd xmm3, xmmword ptr [rsp+0xE0]
1011 pshuflw xmm12, xmm12, 0xB1
1012 pshufhw xmm12, xmm12, 0xB1
1013 pshuflw xmm13, xmm13, 0xB1
1014 pshufhw xmm13, xmm13, 0xB1
1015 pshuflw xmm14, xmm14, 0xB1
1016 pshufhw xmm14, xmm14, 0xB1
1017 pshuflw xmm15, xmm15, 0xB1
1018 pshufhw xmm15, xmm15, 0xB1
1019 movdqa xmm8, xmmword ptr [rsp+0x100]
1028 movdqa xmmword ptr [rsp+0x100], xmm8
1045 paddd xmm0, xmmword ptr [rsp+0xD0]
1046 paddd xmm1, xmmword ptr [rsp+0xB0]
1047 paddd xmm2, xmmword ptr [rsp+0xA0]
1048 paddd xmm3, xmmword ptr [rsp+0x80]
1073 movdqa xmm8, xmmword ptr [rsp+0x100]
1082 movdqa xmmword ptr [rsp+0x100], xmm8
1099 paddd xmm0, xmmword ptr [rsp+0x70]
1100 paddd xmm1, xmmword ptr [rsp+0x50]
1101 paddd xmm2, xmmword ptr [rsp]
1102 paddd xmm3, xmmword ptr [rsp+0x60]
1111 pshuflw xmm15, xmm15, 0xB1
1112 pshufhw xmm15, xmm15, 0xB1
1113 pshuflw xmm12, xmm12, 0xB1
1114 pshufhw xmm12, xmm12, 0xB1
1115 pshuflw xmm13, xmm13, 0xB1
1116 pshufhw xmm13, xmm13, 0xB1
1117 pshuflw xmm14, xmm14, 0xB1
1118 pshufhw xmm14, xmm14, 0xB1
1121 movdqa xmm8, xmmword ptr [rsp+0x100]
1128 movdqa xmmword ptr [rsp+0x100], xmm8
1145 paddd xmm0, xmmword ptr [rsp+0x20]
1146 paddd xmm1, xmmword ptr [rsp+0x30]
1147 paddd xmm2, xmmword ptr [rsp+0x10]
1148 paddd xmm3, xmmword ptr [rsp+0x40]
1175 movdqa xmm8, xmmword ptr [rsp+0x100]
1182 movdqa xmmword ptr [rsp+0x100], xmm8
1199 paddd xmm0, xmmword ptr [rsp+0x90]
1200 paddd xmm1, xmmword ptr [rsp+0xB0]
1201 paddd xmm2, xmmword ptr [rsp+0x80]
1202 paddd xmm3, xmmword ptr [rsp+0xF0]
1211 pshuflw xmm12, xmm12, 0xB1
1212 pshufhw xmm12, xmm12, 0xB1
1213 pshuflw xmm13, xmm13, 0xB1
1214 pshufhw xmm13, xmm13, 0xB1
1215 pshuflw xmm14, xmm14, 0xB1
1216 pshufhw xmm14, xmm14, 0xB1
1217 pshuflw xmm15, xmm15, 0xB1
1218 pshufhw xmm15, xmm15, 0xB1
1219 movdqa xmm8, xmmword ptr [rsp+0x100]
1228 movdqa xmmword ptr [rsp+0x100], xmm8
1245 paddd xmm0, xmmword ptr [rsp+0xE0]
1246 paddd xmm1, xmmword ptr [rsp+0x50]
1247 paddd xmm2, xmmword ptr [rsp+0xC0]
1248 paddd xmm3, xmmword ptr [rsp+0x10]
1273 movdqa xmm8, xmmword ptr [rsp+0x100]
1282 movdqa xmmword ptr [rsp+0x100], xmm8
1299 paddd xmm0, xmmword ptr [rsp+0xD0]
1300 paddd xmm1, xmmword ptr [rsp]
1301 paddd xmm2, xmmword ptr [rsp+0x20]
1302 paddd xmm3, xmmword ptr [rsp+0x40]
1311 pshuflw xmm15, xmm15, 0xB1
1312 pshufhw xmm15, xmm15, 0xB1
1313 pshuflw xmm12, xmm12, 0xB1
1314 pshufhw xmm12, xmm12, 0xB1
1315 pshuflw xmm13, xmm13, 0xB1
1316 pshufhw xmm13, xmm13, 0xB1
1317 pshuflw xmm14, xmm14, 0xB1
1318 pshufhw xmm14, xmm14, 0xB1
1321 movdqa xmm8, xmmword ptr [rsp+0x100]
1328 movdqa xmmword ptr [rsp+0x100], xmm8
1345 paddd xmm0, xmmword ptr [rsp+0x30]
1346 paddd xmm1, xmmword ptr [rsp+0xA0]
1347 paddd xmm2, xmmword ptr [rsp+0x60]
1348 paddd xmm3, xmmword ptr [rsp+0x70]
1375 movdqa xmm8, xmmword ptr [rsp+0x100]
1382 movdqa xmmword ptr [rsp+0x100], xmm8
1399 paddd xmm0, xmmword ptr [rsp+0xB0]
1400 paddd xmm1, xmmword ptr [rsp+0x50]
1401 paddd xmm2, xmmword ptr [rsp+0x10]
1402 paddd xmm3, xmmword ptr [rsp+0x80]
1411 pshuflw xmm12, xmm12, 0xB1
1412 pshufhw xmm12, xmm12, 0xB1
1413 pshuflw xmm13, xmm13, 0xB1
1414 pshufhw xmm13, xmm13, 0xB1
1415 pshuflw xmm14, xmm14, 0xB1
1416 pshufhw xmm14, xmm14, 0xB1
1417 pshuflw xmm15, xmm15, 0xB1
1418 pshufhw xmm15, xmm15, 0xB1
1419 movdqa xmm8, xmmword ptr [rsp+0x100]
1428 movdqa xmmword ptr [rsp+0x100], xmm8
1445 paddd xmm0, xmmword ptr [rsp+0xF0]
1446 paddd xmm1, xmmword ptr [rsp]
1447 paddd xmm2, xmmword ptr [rsp+0x90]
1448 paddd xmm3, xmmword ptr [rsp+0x60]
1473 movdqa xmm8, xmmword ptr [rsp+0x100]
1482 movdqa xmmword ptr [rsp+0x100], xmm8
1499 paddd xmm0, xmmword ptr [rsp+0xE0]
1500 paddd xmm1, xmmword ptr [rsp+0x20]
1501 paddd xmm2, xmmword ptr [rsp+0x30]
1502 paddd xmm3, xmmword ptr [rsp+0x70]
1511 pshuflw xmm15, xmm15, 0xB1
1512 pshufhw xmm15, xmm15, 0xB1
1513 pshuflw xmm12, xmm12, 0xB1
1514 pshufhw xmm12, xmm12, 0xB1
1515 pshuflw xmm13, xmm13, 0xB1
1516 pshufhw xmm13, xmm13, 0xB1
1517 pshuflw xmm14, xmm14, 0xB1
1518 pshufhw xmm14, xmm14, 0xB1
1521 movdqa xmm8, xmmword ptr [rsp+0x100]
1528 movdqa xmmword ptr [rsp+0x100], xmm8
1545 paddd xmm0, xmmword ptr [rsp+0xA0]
1546 paddd xmm1, xmmword ptr [rsp+0xC0]
1547 paddd xmm2, xmmword ptr [rsp+0x40]
1548 paddd xmm3, xmmword ptr [rsp+0xD0]
1575 movdqa xmm8, xmmword ptr [rsp+0x100]
1609 punpckldq xmm0, xmm1
1610 punpckhdq xmm9, xmm1
1612 punpckldq xmm2, xmm3
1613 punpckhdq xmm11, xmm3
1615 punpcklqdq xmm0, xmm2
1616 punpckhqdq xmm1, xmm2
1618 punpcklqdq xmm9, xmm11
1619 punpckhqdq xmm3, xmm11
1620 movdqu xmmword ptr [rbx], xmm0
1621 movdqu xmmword ptr [rbx+0x20], xmm1
1622 movdqu xmmword ptr [rbx+0x40], xmm9
1623 movdqu xmmword ptr [rbx+0x60], xmm3
1625 punpckldq xmm4, xmm5
1626 punpckhdq xmm9, xmm5
1628 punpckldq xmm6, xmm7
1629 punpckhdq xmm11, xmm7
1631 punpcklqdq xmm4, xmm6
1632 punpckhqdq xmm5, xmm6
1634 punpcklqdq xmm9, xmm11
1635 punpckhqdq xmm7, xmm11
1636 movdqu xmmword ptr [rbx+0x10], xmm4
1637 movdqu xmmword ptr [rbx+0x30], xmm5
1638 movdqu xmmword ptr [rbx+0x50], xmm9
1639 movdqu xmmword ptr [rbx+0x70], xmm7
1640 movdqa xmm1, xmmword ptr [rsp+0x110]
1642 paddd xmm1, xmmword ptr [rsp+0x150]
1643 movdqa xmmword ptr [rsp+0x110], xmm1
1644 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
1645 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
1647 movdqa xmm1, xmmword ptr [rsp+0x120]
1649 movdqa xmmword ptr [rsp+0x120], xmm1
1670 movups xmm0, xmmword ptr [rcx]
1671 movups xmm1, xmmword ptr [rcx+0x10]
1674 movd xmm13, dword ptr [rsp+0x110]
1675 movd xmm14, dword ptr [rsp+0x120]
1676 punpckldq xmm13, xmm14
1677 movaps xmmword ptr [rsp], xmm13
1678 movd xmm14, dword ptr [rsp+0x114]
1679 movd xmm13, dword ptr [rsp+0x124]
1680 punpckldq xmm14, xmm13
1681 movaps xmmword ptr [rsp+0x10], xmm14
1682 mov r8, qword ptr [rdi]
1683 mov r9, qword ptr [rdi+0x8]
1684 movzx eax, byte ptr [rbp+0x40]
1693 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1695 movups xmm4, xmmword ptr [r8+rdx-0x40]
1696 movups xmm5, xmmword ptr [r8+rdx-0x30]
1698 shufps xmm4, xmm5, 136
1699 shufps xmm3, xmm5, 221
1701 movups xmm6, xmmword ptr [r8+rdx-0x20]
1702 movups xmm7, xmmword ptr [r8+rdx-0x10]
1704 shufps xmm6, xmm7, 136
1705 pshufd xmm6, xmm6, 0x93
1706 shufps xmm3, xmm7, 221
1707 pshufd xmm7, xmm3, 0x93
1708 movups xmm12, xmmword ptr [r9+rdx-0x40]
1709 movups xmm13, xmmword ptr [r9+rdx-0x30]
1711 shufps xmm12, xmm13, 136
1712 shufps xmm11, xmm13, 221
1714 movups xmm14, xmmword ptr [r9+rdx-0x20]
1715 movups xmm15, xmmword ptr [r9+rdx-0x10]
1717 shufps xmm14, xmm15, 136
1718 pshufd xmm14, xmm14, 0x93
1719 shufps xmm11, xmm15, 221
1720 pshufd xmm15, xmm11, 0x93
1724 movdqa xmmword ptr [rsp+0x20], xmm3
1725 movaps xmm3, xmmword ptr [rsp]
1726 movaps xmm11, xmmword ptr [rsp+0x10]
1727 punpcklqdq xmm3, xmmword ptr [rsp+0x20]
1728 punpcklqdq xmm11, xmmword ptr [rsp+0x20]
1733 movaps xmmword ptr [rsp+0x20], xmm4
1734 movaps xmmword ptr [rsp+0x30], xmm12
1739 pshuflw xmm3, xmm3, 0xB1
1740 pshufhw xmm3, xmm3, 0xB1
1741 pshuflw xmm11, xmm11, 0xB1
1742 pshufhw xmm11, xmm11, 0xB1
1757 movaps xmmword ptr [rsp+0x40], xmm5
1758 movaps xmmword ptr [rsp+0x50], xmm13
1783 pshufd xmm0, xmm0, 0x93
1784 pshufd xmm8, xmm8, 0x93
1785 pshufd xmm3, xmm3, 0x4E
1786 pshufd xmm11, xmm11, 0x4E
1787 pshufd xmm2, xmm2, 0x39
1788 pshufd xmm10, xmm10, 0x39
1795 pshuflw xmm3, xmm3, 0xB1
1796 pshufhw xmm3, xmm3, 0xB1
1797 pshuflw xmm11, xmm11, 0xB1
1798 pshufhw xmm11, xmm11, 0xB1
1837 pshufd xmm0, xmm0, 0x39
1838 pshufd xmm8, xmm8, 0x39
1839 pshufd xmm3, xmm3, 0x4E
1840 pshufd xmm11, xmm11, 0x4E
1841 pshufd xmm2, xmm2, 0x93
1842 pshufd xmm10, xmm10, 0x93
1845 movdqa xmm12, xmmword ptr [rsp+0x20]
1846 movdqa xmm5, xmmword ptr [rsp+0x40]
1847 pshufd xmm13, xmm12, 0x0F
1848 shufps xmm12, xmm5, 214
1849 pshufd xmm4, xmm12, 0x39
1851 shufps xmm12, xmm7, 250
1852 pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip]
1853 pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip]
1855 movdqa xmmword ptr [rsp+0x20], xmm13
1857 punpcklqdq xmm12, xmm5
1859 pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip]
1860 pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip]
1862 pshufd xmm12, xmm12, 0x78
1863 punpckhdq xmm5, xmm7
1864 punpckldq xmm6, xmm5
1865 pshufd xmm7, xmm6, 0x1E
1866 movdqa xmmword ptr [rsp+0x40], xmm12
1867 movdqa xmm5, xmmword ptr [rsp+0x30]
1868 movdqa xmm13, xmmword ptr [rsp+0x50]
1869 pshufd xmm6, xmm5, 0x0F
1870 shufps xmm5, xmm13, 214
1871 pshufd xmm12, xmm5, 0x39
1873 shufps xmm5, xmm15, 250
1874 pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip]
1875 pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip]
1878 punpcklqdq xmm5, xmm13
1879 movdqa xmmword ptr [rsp+0x30], xmm2
1881 pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip]
1882 pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip]
1884 movdqa xmm2, xmmword ptr [rsp+0x30]
1885 pshufd xmm5, xmm5, 0x78
1886 punpckhdq xmm13, xmm15
1887 punpckldq xmm14, xmm13
1888 pshufd xmm15, xmm14, 0x1E
1891 movdqa xmm5, xmmword ptr [rsp+0x20]
1892 movdqa xmm6, xmmword ptr [rsp+0x40]
1902 movups xmmword ptr [rbx], xmm0
1903 movups xmmword ptr [rbx+0x10], xmm1
1904 movups xmmword ptr [rbx+0x20], xmm8
1905 movups xmmword ptr [rbx+0x30], xmm9
1906 mov eax, dword ptr [rsp+0x130]
1908 mov r10d, dword ptr [rsp+0x110+8*rax]
1909 mov r11d, dword ptr [rsp+0x120+8*rax]
1910 mov dword ptr [rsp+0x110], r10d
1911 mov dword ptr [rsp+0x120], r11d
1918 movups xmm0, xmmword ptr [rcx]
1919 movups xmm1, xmmword ptr [rcx+0x10]
1920 movd xmm13, dword ptr [rsp+0x110]
1921 movd xmm14, dword ptr [rsp+0x120]
1922 punpckldq xmm13, xmm14
1923 mov r8, qword ptr [rdi]
1924 movzx eax, byte ptr [rbp+0x40]
1933 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1938 punpcklqdq xmm3, xmm12
1939 movups xmm4, xmmword ptr [r8+rdx-0x40]
1940 movups xmm5, xmmword ptr [r8+rdx-0x30]
1942 shufps xmm4, xmm5, 136
1943 shufps xmm8, xmm5, 221
1945 movups xmm6, xmmword ptr [r8+rdx-0x20]
1946 movups xmm7, xmmword ptr [r8+rdx-0x10]
1948 shufps xmm6, xmm7, 136
1949 pshufd xmm6, xmm6, 0x93
1950 shufps xmm8, xmm7, 221
1951 pshufd xmm7, xmm8, 0x93
1957 pshuflw xmm3, xmm3, 0xB1
1958 pshufhw xmm3, xmm3, 0xB1
1978 pshufd xmm0, xmm0, 0x93
1979 pshufd xmm3, xmm3, 0x4E
1980 pshufd xmm2, xmm2, 0x39
1984 pshuflw xmm3, xmm3, 0xB1
1985 pshufhw xmm3, xmm3, 0xB1
2005 pshufd xmm0, xmm0, 0x39
2006 pshufd xmm3, xmm3, 0x4E
2007 pshufd xmm2, xmm2, 0x93
2011 shufps xmm8, xmm5, 214
2012 pshufd xmm9, xmm4, 0x0F
2013 pshufd xmm4, xmm8, 0x39
2015 shufps xmm8, xmm7, 250
2016 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
2017 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
2020 punpcklqdq xmm8, xmm5
2022 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
2023 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2025 pshufd xmm8, xmm8, 0x78
2026 punpckhdq xmm5, xmm7
2027 punpckldq xmm6, xmm5
2028 pshufd xmm7, xmm6, 0x1E
2038 movups xmmword ptr [rbx], xmm0
2039 movups xmmword ptr [rbx+0x10], xmm1
2043 blake3_compress_in_place_sse2:
2044 _blake3_compress_in_place_sse2:
2046 movups xmm0, xmmword ptr [rdi]
2047 movups xmm1, xmmword ptr [rdi+0x10]
2048 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
2053 punpcklqdq xmm3, xmm4
2054 movups xmm4, xmmword ptr [rsi]
2055 movups xmm5, xmmword ptr [rsi+0x10]
2057 shufps xmm4, xmm5, 136
2058 shufps xmm8, xmm5, 221
2060 movups xmm6, xmmword ptr [rsi+0x20]
2061 movups xmm7, xmmword ptr [rsi+0x30]
2063 shufps xmm6, xmm7, 136
2064 pshufd xmm6, xmm6, 0x93
2065 shufps xmm8, xmm7, 221
2066 pshufd xmm7, xmm8, 0x93
2072 pshuflw xmm3, xmm3, 0xB1
2073 pshufhw xmm3, xmm3, 0xB1
2093 pshufd xmm0, xmm0, 0x93
2094 pshufd xmm3, xmm3, 0x4E
2095 pshufd xmm2, xmm2, 0x39
2099 pshuflw xmm3, xmm3, 0xB1
2100 pshufhw xmm3, xmm3, 0xB1
2120 pshufd xmm0, xmm0, 0x39
2121 pshufd xmm3, xmm3, 0x4E
2122 pshufd xmm2, xmm2, 0x93
2126 shufps xmm8, xmm5, 214
2127 pshufd xmm9, xmm4, 0x0F
2128 pshufd xmm4, xmm8, 0x39
2130 shufps xmm8, xmm7, 250
2131 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
2132 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
2135 punpcklqdq xmm8, xmm5
2137 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
2138 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2140 pshufd xmm8, xmm8, 0x78
2141 punpckhdq xmm5, xmm7
2142 punpckldq xmm6, xmm5
2143 pshufd xmm7, xmm6, 0x1E
2150 movups xmmword ptr [rdi], xmm0
2151 movups xmmword ptr [rdi+0x10], xmm1
2155 blake3_compress_xof_sse2:
2156 _blake3_compress_xof_sse2:
2158 movups xmm0, xmmword ptr [rdi]
2159 movups xmm1, xmmword ptr [rdi+0x10]
2160 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
2167 punpcklqdq xmm3, xmm4
2168 movups xmm4, xmmword ptr [rsi]
2169 movups xmm5, xmmword ptr [rsi+0x10]
2171 shufps xmm4, xmm5, 136
2172 shufps xmm8, xmm5, 221
2174 movups xmm6, xmmword ptr [rsi+0x20]
2175 movups xmm7, xmmword ptr [rsi+0x30]
2177 shufps xmm6, xmm7, 136
2178 pshufd xmm6, xmm6, 0x93
2179 shufps xmm8, xmm7, 221
2180 pshufd xmm7, xmm8, 0x93
2186 pshuflw xmm3, xmm3, 0xB1
2187 pshufhw xmm3, xmm3, 0xB1
2207 pshufd xmm0, xmm0, 0x93
2208 pshufd xmm3, xmm3, 0x4E
2209 pshufd xmm2, xmm2, 0x39
2213 pshuflw xmm3, xmm3, 0xB1
2214 pshufhw xmm3, xmm3, 0xB1
2234 pshufd xmm0, xmm0, 0x39
2235 pshufd xmm3, xmm3, 0x4E
2236 pshufd xmm2, xmm2, 0x93
2240 shufps xmm8, xmm5, 214
2241 pshufd xmm9, xmm4, 0x0F
2242 pshufd xmm4, xmm8, 0x39
2244 shufps xmm8, xmm7, 250
2245 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
2246 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
2249 punpcklqdq xmm8, xmm5
2251 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
2252 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2254 pshufd xmm8, xmm8, 0x78
2255 punpckhdq xmm5, xmm7
2256 punpckldq xmm6, xmm5
2257 pshufd xmm7, xmm6, 0x1E
2262 movdqu xmm4, xmmword ptr [rdi]
2263 movdqu xmm5, xmmword ptr [rdi+0x10]
2268 movups xmmword ptr [r9], xmm0
2269 movups xmmword ptr [r9+0x10], xmm1
2270 movups xmmword ptr [r9+0x20], xmm2
2271 movups xmmword ptr [r9+0x30], xmm3
2282 .long 0x6A09E667, 0xBB67AE85
2283 .long 0x3C6EF372, 0xA54FF53A
2289 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
2291 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
2293 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
2295 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
2297 .long 64, 64, 64, 64
2299 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
2301 .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000
2303 .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF
2305 .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000
2307 .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF