1 public _llvm_blake3_hash_many_sse2
2 public llvm_blake3_hash_many_sse2
3 public llvm_blake3_compress_in_place_sse2
4 public _llvm_blake3_compress_in_place_sse2
5 public llvm_blake3_compress_xof_sse2
6 public _llvm_blake3_compress_xof_sse2
8 _TEXT
SEGMENT ALIGN(16) 'CODE'
11 llvm_blake3_hash_many_sse2
PROC
12 _llvm_blake3_hash_many_sse2
PROC
23 and rsp
, 0FFFFFFFFFFFFFFC0H
24 movdqa xmmword
ptr [rsp
+170H], xmm6
25 movdqa xmmword
ptr [rsp
+180H], xmm7
26 movdqa xmmword
ptr [rsp
+190H], xmm8
27 movdqa xmmword
ptr [rsp
+1A0H
], xmm9
28 movdqa xmmword
ptr [rsp
+1B0H], xmm10
29 movdqa xmmword
ptr [rsp
+1C0H
], xmm11
30 movdqa xmmword
ptr [rsp
+1D0H
], xmm12
31 movdqa xmmword
ptr [rsp
+1E0H
], xmm13
32 movdqa xmmword
ptr [rsp
+1F0H
], xmm14
33 movdqa xmmword
ptr [rsp
+200H], xmm15
38 mov r8
, qword ptr [rbp
+68H]
39 movzx r9
, byte ptr [rbp
+70H]
42 pshufd xmm0
, xmm0
, 00H
43 movdqa xmmword
ptr [rsp
+130H], xmm0
45 pand xmm1
, xmmword
ptr [ADD0
]
46 pand xmm0
, xmmword
ptr [ADD1
]
47 movdqa xmmword
ptr [rsp
+150H], xmm0
49 pshufd xmm0
, xmm0
, 00H
51 movdqa xmmword
ptr [rsp
+110H], xmm0
52 pxor xmm0
, xmmword
ptr [CMP_MSB_MASK
]
53 pxor xmm1
, xmmword
ptr [CMP_MSB_MASK
]
57 pshufd xmm2
, xmm2
, 00H
59 movdqa xmmword
ptr [rsp
+120H], xmm2
60 mov rbx
, qword ptr [rbp
+90H]
63 movzx r13d
, byte ptr [rbp
+78H]
64 movzx r12d
, byte ptr [rbp
+88H]
68 movdqu xmm3
, xmmword
ptr [rcx
]
69 pshufd xmm0
, xmm3
, 00H
70 pshufd xmm1
, xmm3
, 55H
71 pshufd xmm2
, xmm3
, 0AAH
72 pshufd xmm3
, xmm3
, 0FFH
73 movdqu xmm7
, xmmword
ptr [rcx
+10H]
74 pshufd xmm4
, xmm7
, 00H
75 pshufd xmm5
, xmm7
, 55H
76 pshufd xmm6
, xmm7
, 0AAH
77 pshufd xmm7
, xmm7
, 0FFH
78 mov r8
, qword ptr [rdi
]
79 mov r9
, qword ptr [rdi
+8H]
80 mov r10
, qword ptr [rdi
+10H]
81 mov r11
, qword ptr [rdi
+18H]
82 movzx eax, byte ptr [rbp
+80H]
91 movdqu xmm8
, xmmword
ptr [r8
+rdx
-40H]
92 movdqu xmm9
, xmmword
ptr [r9
+rdx
-40H]
93 movdqu xmm10
, xmmword
ptr [r10
+rdx
-40H]
94 movdqu xmm11
, xmmword
ptr [r11
+rdx
-40H]
99 punpckldq xmm10
, xmm11
100 punpckhdq xmm14
, xmm11
102 punpcklqdq xmm8
, xmm10
103 punpckhqdq xmm9
, xmm10
105 punpcklqdq xmm12
, xmm14
106 punpckhqdq xmm13
, xmm14
107 movdqa xmmword
ptr [rsp
], xmm8
108 movdqa xmmword
ptr [rsp
+10H], xmm9
109 movdqa xmmword
ptr [rsp
+20H], xmm12
110 movdqa xmmword
ptr [rsp
+30H], xmm13
111 movdqu xmm8
, xmmword
ptr [r8
+rdx
-30H]
112 movdqu xmm9
, xmmword
ptr [r9
+rdx
-30H]
113 movdqu xmm10
, xmmword
ptr [r10
+rdx
-30H]
114 movdqu xmm11
, xmmword
ptr [r11
+rdx
-30H]
117 punpckhdq xmm12
, xmm9
119 punpckldq xmm10
, xmm11
120 punpckhdq xmm14
, xmm11
122 punpcklqdq xmm8
, xmm10
123 punpckhqdq xmm9
, xmm10
125 punpcklqdq xmm12
, xmm14
126 punpckhqdq xmm13
, xmm14
127 movdqa xmmword
ptr [rsp
+40H], xmm8
128 movdqa xmmword
ptr [rsp
+50H], xmm9
129 movdqa xmmword
ptr [rsp
+60H], xmm12
130 movdqa xmmword
ptr [rsp
+70H], xmm13
131 movdqu xmm8
, xmmword
ptr [r8
+rdx
-20H]
132 movdqu xmm9
, xmmword
ptr [r9
+rdx
-20H]
133 movdqu xmm10
, xmmword
ptr [r10
+rdx
-20H]
134 movdqu xmm11
, xmmword
ptr [r11
+rdx
-20H]
137 punpckhdq xmm12
, xmm9
139 punpckldq xmm10
, xmm11
140 punpckhdq xmm14
, xmm11
142 punpcklqdq xmm8
, xmm10
143 punpckhqdq xmm9
, xmm10
145 punpcklqdq xmm12
, xmm14
146 punpckhqdq xmm13
, xmm14
147 movdqa xmmword
ptr [rsp
+80H], xmm8
148 movdqa xmmword
ptr [rsp
+90H], xmm9
149 movdqa xmmword
ptr [rsp
+0A0H
], xmm12
150 movdqa xmmword
ptr [rsp
+0B0H], xmm13
151 movdqu xmm8
, xmmword
ptr [r8
+rdx
-10H]
152 movdqu xmm9
, xmmword
ptr [r9
+rdx
-10H]
153 movdqu xmm10
, xmmword
ptr [r10
+rdx
-10H]
154 movdqu xmm11
, xmmword
ptr [r11
+rdx
-10H]
157 punpckhdq xmm12
, xmm9
159 punpckldq xmm10
, xmm11
160 punpckhdq xmm14
, xmm11
162 punpcklqdq xmm8
, xmm10
163 punpckhqdq xmm9
, xmm10
165 punpcklqdq xmm12
, xmm14
166 punpckhqdq xmm13
, xmm14
167 movdqa xmmword
ptr [rsp
+0C0H
], xmm8
168 movdqa xmmword
ptr [rsp
+0D0H
], xmm9
169 movdqa xmmword
ptr [rsp
+0E0H
], xmm12
170 movdqa xmmword
ptr [rsp
+0F0H
], xmm13
171 movdqa xmm9
, xmmword
ptr [BLAKE3_IV_1
]
172 movdqa xmm10
, xmmword
ptr [BLAKE3_IV_2
]
173 movdqa xmm11
, xmmword
ptr [BLAKE3_IV_3
]
174 movdqa xmm12
, xmmword
ptr [rsp
+110H]
175 movdqa xmm13
, xmmword
ptr [rsp
+120H]
176 movdqa xmm14
, xmmword
ptr [BLAKE3_BLOCK_LEN
]
178 pshufd xmm15
, xmm15
, 00H
179 prefetcht0
byte ptr [r8
+rdx
+80H]
180 prefetcht0
byte ptr [r9
+rdx
+80H]
181 prefetcht0
byte ptr [r10
+rdx
+80H]
182 prefetcht0
byte ptr [r11
+rdx
+80H]
183 paddd xmm0
, xmmword
ptr [rsp
]
184 paddd xmm1
, xmmword
ptr [rsp
+20H]
185 paddd xmm2
, xmmword
ptr [rsp
+40H]
186 paddd xmm3
, xmmword
ptr [rsp
+60H]
195 pshuflw xmm12
, xmm12
, 0B1H
196 pshufhw xmm12
, xmm12
, 0B1H
197 pshuflw xmm13
, xmm13
, 0B1H
198 pshufhw xmm13
, xmm13
, 0B1H
199 pshuflw xmm14
, xmm14
, 0B1H
200 pshufhw xmm14
, xmm14
, 0B1H
201 pshuflw xmm15
, xmm15
, 0B1H
202 pshufhw xmm15
, xmm15
, 0B1H
203 movdqa xmm8
, xmmword
ptr [BLAKE3_IV_0
]
212 movdqa xmmword
ptr [rsp
+100H], xmm8
229 paddd xmm0
, xmmword
ptr [rsp
+10H]
230 paddd xmm1
, xmmword
ptr [rsp
+30H]
231 paddd xmm2
, xmmword
ptr [rsp
+50H]
232 paddd xmm3
, xmmword
ptr [rsp
+70H]
257 movdqa xmm8
, xmmword
ptr [rsp
+100H]
266 movdqa xmmword
ptr [rsp
+100H], xmm8
283 paddd xmm0
, xmmword
ptr [rsp
+80H]
284 paddd xmm1
, xmmword
ptr [rsp
+0A0H
]
285 paddd xmm2
, xmmword
ptr [rsp
+0C0H
]
286 paddd xmm3
, xmmword
ptr [rsp
+0E0H
]
295 pshuflw xmm15
, xmm15
, 0B1H
296 pshufhw xmm15
, xmm15
, 0B1H
297 pshuflw xmm12
, xmm12
, 0B1H
298 pshufhw xmm12
, xmm12
, 0B1H
299 pshuflw xmm13
, xmm13
, 0B1H
300 pshufhw xmm13
, xmm13
, 0B1H
301 pshuflw xmm14
, xmm14
, 0B1H
302 pshufhw xmm14
, xmm14
, 0B1H
305 movdqa xmm8
, xmmword
ptr [rsp
+100H]
312 movdqa xmmword
ptr [rsp
+100H], xmm8
329 paddd xmm0
, xmmword
ptr [rsp
+90H]
330 paddd xmm1
, xmmword
ptr [rsp
+0B0H]
331 paddd xmm2
, xmmword
ptr [rsp
+0D0H
]
332 paddd xmm3
, xmmword
ptr [rsp
+0F0H
]
359 movdqa xmm8
, xmmword
ptr [rsp
+100H]
366 movdqa xmmword
ptr [rsp
+100H], xmm8
383 paddd xmm0
, xmmword
ptr [rsp
+20H]
384 paddd xmm1
, xmmword
ptr [rsp
+30H]
385 paddd xmm2
, xmmword
ptr [rsp
+70H]
386 paddd xmm3
, xmmword
ptr [rsp
+40H]
395 pshuflw xmm12
, xmm12
, 0B1H
396 pshufhw xmm12
, xmm12
, 0B1H
397 pshuflw xmm13
, xmm13
, 0B1H
398 pshufhw xmm13
, xmm13
, 0B1H
399 pshuflw xmm14
, xmm14
, 0B1H
400 pshufhw xmm14
, xmm14
, 0B1H
401 pshuflw xmm15
, xmm15
, 0B1H
402 pshufhw xmm15
, xmm15
, 0B1H
403 movdqa xmm8
, xmmword
ptr [rsp
+100H]
412 movdqa xmmword
ptr [rsp
+100H], xmm8
429 paddd xmm0
, xmmword
ptr [rsp
+60H]
430 paddd xmm1
, xmmword
ptr [rsp
+0A0H
]
431 paddd xmm2
, xmmword
ptr [rsp
]
432 paddd xmm3
, xmmword
ptr [rsp
+0D0H
]
457 movdqa xmm8
, xmmword
ptr [rsp
+100H]
466 movdqa xmmword
ptr [rsp
+100H], xmm8
483 paddd xmm0
, xmmword
ptr [rsp
+10H]
484 paddd xmm1
, xmmword
ptr [rsp
+0C0H
]
485 paddd xmm2
, xmmword
ptr [rsp
+90H]
486 paddd xmm3
, xmmword
ptr [rsp
+0F0H
]
495 pshuflw xmm15
, xmm15
, 0B1H
496 pshufhw xmm15
, xmm15
, 0B1H
497 pshuflw xmm12
, xmm12
, 0B1H
498 pshufhw xmm12
, xmm12
, 0B1H
499 pshuflw xmm13
, xmm13
, 0B1H
500 pshufhw xmm13
, xmm13
, 0B1H
501 pshuflw xmm14
, xmm14
, 0B1H
502 pshufhw xmm14
, xmm14
, 0B1H
505 movdqa xmm8
, xmmword
ptr [rsp
+100H]
512 movdqa xmmword
ptr [rsp
+100H], xmm8
529 paddd xmm0
, xmmword
ptr [rsp
+0B0H]
530 paddd xmm1
, xmmword
ptr [rsp
+50H]
531 paddd xmm2
, xmmword
ptr [rsp
+0E0H
]
532 paddd xmm3
, xmmword
ptr [rsp
+80H]
559 movdqa xmm8
, xmmword
ptr [rsp
+100H]
566 movdqa xmmword
ptr [rsp
+100H], xmm8
583 paddd xmm0
, xmmword
ptr [rsp
+30H]
584 paddd xmm1
, xmmword
ptr [rsp
+0A0H
]
585 paddd xmm2
, xmmword
ptr [rsp
+0D0H
]
586 paddd xmm3
, xmmword
ptr [rsp
+70H]
595 pshuflw xmm12
, xmm12
, 0B1H
596 pshufhw xmm12
, xmm12
, 0B1H
597 pshuflw xmm13
, xmm13
, 0B1H
598 pshufhw xmm13
, xmm13
, 0B1H
599 pshuflw xmm14
, xmm14
, 0B1H
600 pshufhw xmm14
, xmm14
, 0B1H
601 pshuflw xmm15
, xmm15
, 0B1H
602 pshufhw xmm15
, xmm15
, 0B1H
603 movdqa xmm8
, xmmword
ptr [rsp
+100H]
612 movdqa xmmword
ptr [rsp
+100H], xmm8
629 paddd xmm0
, xmmword
ptr [rsp
+40H]
630 paddd xmm1
, xmmword
ptr [rsp
+0C0H
]
631 paddd xmm2
, xmmword
ptr [rsp
+20H]
632 paddd xmm3
, xmmword
ptr [rsp
+0E0H
]
657 movdqa xmm8
, xmmword
ptr [rsp
+100H]
666 movdqa xmmword
ptr [rsp
+100H], xmm8
683 paddd xmm0
, xmmword
ptr [rsp
+60H]
684 paddd xmm1
, xmmword
ptr [rsp
+90H]
685 paddd xmm2
, xmmword
ptr [rsp
+0B0H]
686 paddd xmm3
, xmmword
ptr [rsp
+80H]
695 pshuflw xmm15
, xmm15
, 0B1H
696 pshufhw xmm15
, xmm15
, 0B1H
697 pshuflw xmm12
, xmm12
, 0B1H
698 pshufhw xmm12
, xmm12
, 0B1H
699 pshuflw xmm13
, xmm13
, 0B1H
700 pshufhw xmm13
, xmm13
, 0B1H
701 pshuflw xmm14
, xmm14
, 0B1H
702 pshufhw xmm14
, xmm14
, 0B1H
705 movdqa xmm8
, xmmword
ptr [rsp
+100H]
712 movdqa xmmword
ptr [rsp
+100H], xmm8
729 paddd xmm0
, xmmword
ptr [rsp
+50H]
730 paddd xmm1
, xmmword
ptr [rsp
]
731 paddd xmm2
, xmmword
ptr [rsp
+0F0H
]
732 paddd xmm3
, xmmword
ptr [rsp
+10H]
759 movdqa xmm8
, xmmword
ptr [rsp
+100H]
766 movdqa xmmword
ptr [rsp
+100H], xmm8
783 paddd xmm0
, xmmword
ptr [rsp
+0A0H
]
784 paddd xmm1
, xmmword
ptr [rsp
+0C0H
]
785 paddd xmm2
, xmmword
ptr [rsp
+0E0H
]
786 paddd xmm3
, xmmword
ptr [rsp
+0D0H
]
795 pshuflw xmm12
, xmm12
, 0B1H
796 pshufhw xmm12
, xmm12
, 0B1H
797 pshuflw xmm13
, xmm13
, 0B1H
798 pshufhw xmm13
, xmm13
, 0B1H
799 pshuflw xmm14
, xmm14
, 0B1H
800 pshufhw xmm14
, xmm14
, 0B1H
801 pshuflw xmm15
, xmm15
, 0B1H
802 pshufhw xmm15
, xmm15
, 0B1H
803 movdqa xmm8
, xmmword
ptr [rsp
+100H]
812 movdqa xmmword
ptr [rsp
+100H], xmm8
829 paddd xmm0
, xmmword
ptr [rsp
+70H]
830 paddd xmm1
, xmmword
ptr [rsp
+90H]
831 paddd xmm2
, xmmword
ptr [rsp
+30H]
832 paddd xmm3
, xmmword
ptr [rsp
+0F0H
]
857 movdqa xmm8
, xmmword
ptr [rsp
+100H]
866 movdqa xmmword
ptr [rsp
+100H], xmm8
883 paddd xmm0
, xmmword
ptr [rsp
+40H]
884 paddd xmm1
, xmmword
ptr [rsp
+0B0H]
885 paddd xmm2
, xmmword
ptr [rsp
+50H]
886 paddd xmm3
, xmmword
ptr [rsp
+10H]
895 pshuflw xmm15
, xmm15
, 0B1H
896 pshufhw xmm15
, xmm15
, 0B1H
897 pshuflw xmm12
, xmm12
, 0B1H
898 pshufhw xmm12
, xmm12
, 0B1H
899 pshuflw xmm13
, xmm13
, 0B1H
900 pshufhw xmm13
, xmm13
, 0B1H
901 pshuflw xmm14
, xmm14
, 0B1H
902 pshufhw xmm14
, xmm14
, 0B1H
905 movdqa xmm8
, xmmword
ptr [rsp
+100H]
912 movdqa xmmword
ptr [rsp
+100H], xmm8
929 paddd xmm0
, xmmword
ptr [rsp
]
930 paddd xmm1
, xmmword
ptr [rsp
+20H]
931 paddd xmm2
, xmmword
ptr [rsp
+80H]
932 paddd xmm3
, xmmword
ptr [rsp
+60H]
959 movdqa xmm8
, xmmword
ptr [rsp
+100H]
966 movdqa xmmword
ptr [rsp
+100H], xmm8
983 paddd xmm0
, xmmword
ptr [rsp
+0C0H
]
984 paddd xmm1
, xmmword
ptr [rsp
+90H]
985 paddd xmm2
, xmmword
ptr [rsp
+0F0H
]
986 paddd xmm3
, xmmword
ptr [rsp
+0E0H
]
995 pshuflw xmm12
, xmm12
, 0B1H
996 pshufhw xmm12
, xmm12
, 0B1H
997 pshuflw xmm13
, xmm13
, 0B1H
998 pshufhw xmm13
, xmm13
, 0B1H
999 pshuflw xmm14
, xmm14
, 0B1H
1000 pshufhw xmm14
, xmm14
, 0B1H
1001 pshuflw xmm15
, xmm15
, 0B1H
1002 pshufhw xmm15
, xmm15
, 0B1H
1003 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1012 movdqa xmmword
ptr [rsp
+100H], xmm8
1029 paddd xmm0
, xmmword
ptr [rsp
+0D0H
]
1030 paddd xmm1
, xmmword
ptr [rsp
+0B0H]
1031 paddd xmm2
, xmmword
ptr [rsp
+0A0H
]
1032 paddd xmm3
, xmmword
ptr [rsp
+80H]
1057 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1066 movdqa xmmword
ptr [rsp
+100H], xmm8
1083 paddd xmm0
, xmmword
ptr [rsp
+70H]
1084 paddd xmm1
, xmmword
ptr [rsp
+50H]
1085 paddd xmm2
, xmmword
ptr [rsp
]
1086 paddd xmm3
, xmmword
ptr [rsp
+60H]
1095 pshuflw xmm15
, xmm15
, 0B1H
1096 pshufhw xmm15
, xmm15
, 0B1H
1097 pshuflw xmm12
, xmm12
, 0B1H
1098 pshufhw xmm12
, xmm12
, 0B1H
1099 pshuflw xmm13
, xmm13
, 0B1H
1100 pshufhw xmm13
, xmm13
, 0B1H
1101 pshuflw xmm14
, xmm14
, 0B1H
1102 pshufhw xmm14
, xmm14
, 0B1H
1105 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1112 movdqa xmmword
ptr [rsp
+100H], xmm8
1129 paddd xmm0
, xmmword
ptr [rsp
+20H]
1130 paddd xmm1
, xmmword
ptr [rsp
+30H]
1131 paddd xmm2
, xmmword
ptr [rsp
+10H]
1132 paddd xmm3
, xmmword
ptr [rsp
+40H]
1159 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1166 movdqa xmmword
ptr [rsp
+100H], xmm8
1183 paddd xmm0
, xmmword
ptr [rsp
+90H]
1184 paddd xmm1
, xmmword
ptr [rsp
+0B0H]
1185 paddd xmm2
, xmmword
ptr [rsp
+80H]
1186 paddd xmm3
, xmmword
ptr [rsp
+0F0H
]
1195 pshuflw xmm12
, xmm12
, 0B1H
1196 pshufhw xmm12
, xmm12
, 0B1H
1197 pshuflw xmm13
, xmm13
, 0B1H
1198 pshufhw xmm13
, xmm13
, 0B1H
1199 pshuflw xmm14
, xmm14
, 0B1H
1200 pshufhw xmm14
, xmm14
, 0B1H
1201 pshuflw xmm15
, xmm15
, 0B1H
1202 pshufhw xmm15
, xmm15
, 0B1H
1203 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1212 movdqa xmmword
ptr [rsp
+100H], xmm8
1229 paddd xmm0
, xmmword
ptr [rsp
+0E0H
]
1230 paddd xmm1
, xmmword
ptr [rsp
+50H]
1231 paddd xmm2
, xmmword
ptr [rsp
+0C0H
]
1232 paddd xmm3
, xmmword
ptr [rsp
+10H]
1257 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1266 movdqa xmmword
ptr [rsp
+100H], xmm8
1283 paddd xmm0
, xmmword
ptr [rsp
+0D0H
]
1284 paddd xmm1
, xmmword
ptr [rsp
]
1285 paddd xmm2
, xmmword
ptr [rsp
+20H]
1286 paddd xmm3
, xmmword
ptr [rsp
+40H]
1295 pshuflw xmm15
, xmm15
, 0B1H
1296 pshufhw xmm15
, xmm15
, 0B1H
1297 pshuflw xmm12
, xmm12
, 0B1H
1298 pshufhw xmm12
, xmm12
, 0B1H
1299 pshuflw xmm13
, xmm13
, 0B1H
1300 pshufhw xmm13
, xmm13
, 0B1H
1301 pshuflw xmm14
, xmm14
, 0B1H
1302 pshufhw xmm14
, xmm14
, 0B1H
1305 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1312 movdqa xmmword
ptr [rsp
+100H], xmm8
1329 paddd xmm0
, xmmword
ptr [rsp
+30H]
1330 paddd xmm1
, xmmword
ptr [rsp
+0A0H
]
1331 paddd xmm2
, xmmword
ptr [rsp
+60H]
1332 paddd xmm3
, xmmword
ptr [rsp
+70H]
1359 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1366 movdqa xmmword
ptr [rsp
+100H], xmm8
1383 paddd xmm0
, xmmword
ptr [rsp
+0B0H]
1384 paddd xmm1
, xmmword
ptr [rsp
+50H]
1385 paddd xmm2
, xmmword
ptr [rsp
+10H]
1386 paddd xmm3
, xmmword
ptr [rsp
+80H]
1395 pshuflw xmm12
, xmm12
, 0B1H
1396 pshufhw xmm12
, xmm12
, 0B1H
1397 pshuflw xmm13
, xmm13
, 0B1H
1398 pshufhw xmm13
, xmm13
, 0B1H
1399 pshuflw xmm14
, xmm14
, 0B1H
1400 pshufhw xmm14
, xmm14
, 0B1H
1401 pshuflw xmm15
, xmm15
, 0B1H
1402 pshufhw xmm15
, xmm15
, 0B1H
1403 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1412 movdqa xmmword
ptr [rsp
+100H], xmm8
1429 paddd xmm0
, xmmword
ptr [rsp
+0F0H
]
1430 paddd xmm1
, xmmword
ptr [rsp
]
1431 paddd xmm2
, xmmword
ptr [rsp
+90H]
1432 paddd xmm3
, xmmword
ptr [rsp
+60H]
1457 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1466 movdqa xmmword
ptr [rsp
+100H], xmm8
1483 paddd xmm0
, xmmword
ptr [rsp
+0E0H
]
1484 paddd xmm1
, xmmword
ptr [rsp
+20H]
1485 paddd xmm2
, xmmword
ptr [rsp
+30H]
1486 paddd xmm3
, xmmword
ptr [rsp
+70H]
1495 pshuflw xmm15
, xmm15
, 0B1H
1496 pshufhw xmm15
, xmm15
, 0B1H
1497 pshuflw xmm12
, xmm12
, 0B1H
1498 pshufhw xmm12
, xmm12
, 0B1H
1499 pshuflw xmm13
, xmm13
, 0B1H
1500 pshufhw xmm13
, xmm13
, 0B1H
1501 pshuflw xmm14
, xmm14
, 0B1H
1502 pshufhw xmm14
, xmm14
, 0B1H
1505 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1512 movdqa xmmword
ptr [rsp
+100H], xmm8
1529 paddd xmm0
, xmmword
ptr [rsp
+0A0H
]
1530 paddd xmm1
, xmmword
ptr [rsp
+0C0H
]
1531 paddd xmm2
, xmmword
ptr [rsp
+40H]
1532 paddd xmm3
, xmmword
ptr [rsp
+0D0H
]
1559 movdqa xmm8
, xmmword
ptr [rsp
+100H]
1593 punpckldq xmm0
, xmm1
1594 punpckhdq xmm9
, xmm1
1596 punpckldq xmm2
, xmm3
1597 punpckhdq xmm11
, xmm3
1599 punpcklqdq xmm0
, xmm2
1600 punpckhqdq xmm1
, xmm2
1602 punpcklqdq xmm9
, xmm11
1603 punpckhqdq xmm3
, xmm11
1604 movdqu xmmword
ptr [rbx
], xmm0
1605 movdqu xmmword
ptr [rbx
+20H], xmm1
1606 movdqu xmmword
ptr [rbx
+40H], xmm9
1607 movdqu xmmword
ptr [rbx
+60H], xmm3
1609 punpckldq xmm4
, xmm5
1610 punpckhdq xmm9
, xmm5
1612 punpckldq xmm6
, xmm7
1613 punpckhdq xmm11
, xmm7
1615 punpcklqdq xmm4
, xmm6
1616 punpckhqdq xmm5
, xmm6
1618 punpcklqdq xmm9
, xmm11
1619 punpckhqdq xmm7
, xmm11
1620 movdqu xmmword
ptr [rbx
+10H], xmm4
1621 movdqu xmmword
ptr [rbx
+30H], xmm5
1622 movdqu xmmword
ptr [rbx
+50H], xmm9
1623 movdqu xmmword
ptr [rbx
+70H], xmm7
1624 movdqa xmm1
, xmmword
ptr [rsp
+110H]
1626 paddd xmm1
, xmmword
ptr [rsp
+150H]
1627 movdqa xmmword
ptr [rsp
+110H], xmm1
1628 pxor xmm0
, xmmword
ptr [CMP_MSB_MASK
]
1629 pxor xmm1
, xmmword
ptr [CMP_MSB_MASK
]
1631 movdqa xmm1
, xmmword
ptr [rsp
+120H]
1633 movdqa xmmword
ptr [rsp
+120H], xmm1
1642 movdqa xmm6
, xmmword
ptr [rsp
+170H]
1643 movdqa xmm7
, xmmword
ptr [rsp
+180H]
1644 movdqa xmm8
, xmmword
ptr [rsp
+190H]
1645 movdqa xmm9
, xmmword
ptr [rsp
+1A0H
]
1646 movdqa xmm10
, xmmword
ptr [rsp
+1B0H]
1647 movdqa xmm11
, xmmword
ptr [rsp
+1C0H
]
1648 movdqa xmm12
, xmmword
ptr [rsp
+1D0H
]
1649 movdqa xmm13
, xmmword
ptr [rsp
+1E0H
]
1650 movdqa xmm14
, xmmword
ptr [rsp
+1F0H
]
1651 movdqa xmm15
, xmmword
ptr [rsp
+200H]
1666 movups xmm0
, xmmword
ptr [rcx
]
1667 movups xmm1
, xmmword
ptr [rcx
+10H]
1670 movd xmm13
, dword ptr [rsp
+110H]
1671 movd xmm14
, dword ptr [rsp
+120H]
1672 punpckldq xmm13
, xmm14
1673 movaps xmmword
ptr [rsp
], xmm13
1674 movd xmm14
, dword ptr [rsp
+114H]
1675 movd xmm13
, dword ptr [rsp
+124H]
1676 punpckldq xmm14
, xmm13
1677 movaps xmmword
ptr [rsp
+10H], xmm14
1678 mov r8
, qword ptr [rdi
]
1679 mov r9
, qword ptr [rdi
+8H]
1680 movzx eax, byte ptr [rbp
+80H]
1689 movaps xmm2
, xmmword
ptr [BLAKE3_IV
]
1691 movups xmm4
, xmmword
ptr [r8
+rdx
-40H]
1692 movups xmm5
, xmmword
ptr [r8
+rdx
-30H]
1694 shufps xmm4
, xmm5
, 136
1695 shufps xmm3
, xmm5
, 221
1697 movups xmm6
, xmmword
ptr [r8
+rdx
-20H]
1698 movups xmm7
, xmmword
ptr [r8
+rdx
-10H]
1700 shufps xmm6
, xmm7
, 136
1701 pshufd xmm6
, xmm6
, 93H
1702 shufps xmm3
, xmm7
, 221
1703 pshufd xmm7
, xmm3
, 93H
1704 movups xmm12
, xmmword
ptr [r9
+rdx
-40H]
1705 movups xmm13
, xmmword
ptr [r9
+rdx
-30H]
1707 shufps xmm12
, xmm13
, 136
1708 shufps xmm11
, xmm13
, 221
1710 movups xmm14
, xmmword
ptr [r9
+rdx
-20H]
1711 movups xmm15
, xmmword
ptr [r9
+rdx
-10H]
1713 shufps xmm14
, xmm15
, 136
1714 pshufd xmm14
, xmm14
, 93H
1715 shufps xmm11
, xmm15
, 221
1716 pshufd xmm15
, xmm11
, 93H
1720 movdqa xmmword
ptr [rsp
+20H], xmm3
1721 movaps xmm3
, xmmword
ptr [rsp
]
1722 movaps xmm11
, xmmword
ptr [rsp
+10H]
1723 punpcklqdq xmm3
, xmmword
ptr [rsp
+20H]
1724 punpcklqdq xmm11
, xmmword
ptr [rsp
+20H]
1729 movaps xmmword
ptr [rsp
+20H], xmm4
1730 movaps xmmword
ptr [rsp
+30H], xmm12
1735 pshuflw xmm3
, xmm3
, 0B1H
1736 pshufhw xmm3
, xmm3
, 0B1H
1737 pshuflw xmm11
, xmm11
, 0B1H
1738 pshufhw xmm11
, xmm11
, 0B1H
1753 movaps xmmword
ptr [rsp
+40H], xmm5
1754 movaps xmmword
ptr [rsp
+50H], xmm13
1779 pshufd xmm0
, xmm0
, 93H
1780 pshufd xmm8
, xmm8
, 93H
1781 pshufd xmm3
, xmm3
, 4EH
1782 pshufd xmm11
, xmm11
, 4EH
1783 pshufd xmm2
, xmm2
, 39H
1784 pshufd xmm10
, xmm10
, 39H
1791 pshuflw xmm3
, xmm3
, 0B1H
1792 pshufhw xmm3
, xmm3
, 0B1H
1793 pshuflw xmm11
, xmm11
, 0B1H
1794 pshufhw xmm11
, xmm11
, 0B1H
1833 pshufd xmm0
, xmm0
, 39H
1834 pshufd xmm8
, xmm8
, 39H
1835 pshufd xmm3
, xmm3
, 4EH
1836 pshufd xmm11
, xmm11
, 4EH
1837 pshufd xmm2
, xmm2
, 93H
1838 pshufd xmm10
, xmm10
, 93H
1841 movdqa xmm12
, xmmword
ptr [rsp
+20H]
1842 movdqa xmm5
, xmmword
ptr [rsp
+40H]
1843 pshufd xmm13
, xmm12
, 0FH
1844 shufps xmm12
, xmm5
, 214
1845 pshufd xmm4
, xmm12
, 39H
1847 shufps xmm12
, xmm7
, 250
1848 pand xmm13
, xmmword
ptr [PBLENDW_0x33_MASK
]
1849 pand xmm12
, xmmword
ptr [PBLENDW_0xCC_MASK
]
1851 movdqa xmmword
ptr [rsp
+20H], xmm13
1853 punpcklqdq xmm12
, xmm5
1855 pand xmm12
, xmmword
ptr [PBLENDW_0x3F_MASK
]
1856 pand xmm13
, xmmword
ptr [PBLENDW_0xC0_MASK
]
1858 pshufd xmm12
, xmm12
, 78H
1859 punpckhdq xmm5
, xmm7
1860 punpckldq xmm6
, xmm5
1861 pshufd xmm7
, xmm6
, 1EH
1862 movdqa xmmword
ptr [rsp
+40H], xmm12
1863 movdqa xmm5
, xmmword
ptr [rsp
+30H]
1864 movdqa xmm13
, xmmword
ptr [rsp
+50H]
1865 pshufd xmm6
, xmm5
, 0FH
1866 shufps xmm5
, xmm13
, 214
1867 pshufd xmm12
, xmm5
, 39H
1869 shufps xmm5
, xmm15
, 250
1870 pand xmm6
, xmmword
ptr [PBLENDW_0x33_MASK
]
1871 pand xmm5
, xmmword
ptr [PBLENDW_0xCC_MASK
]
1874 punpcklqdq xmm5
, xmm13
1875 movdqa xmmword
ptr [rsp
+30H], xmm2
1877 pand xmm5
, xmmword
ptr [PBLENDW_0x3F_MASK
]
1878 pand xmm2
, xmmword
ptr [PBLENDW_0xC0_MASK
]
1880 movdqa xmm2
, xmmword
ptr [rsp
+30H]
1881 pshufd xmm5
, xmm5
, 78H
1882 punpckhdq xmm13
, xmm15
1883 punpckldq xmm14
, xmm13
1884 pshufd xmm15
, xmm14
, 1EH
1887 movdqa xmm5
, xmmword
ptr [rsp
+20H]
1888 movdqa xmm6
, xmmword
ptr [rsp
+40H]
1898 movups xmmword
ptr [rbx
], xmm0
1899 movups xmmword
ptr [rbx
+10H], xmm1
1900 movups xmmword
ptr [rbx
+20H], xmm8
1901 movups xmmword
ptr [rbx
+30H], xmm9
1902 mov eax, dword ptr [rsp
+130H]
1904 mov r10d
, dword ptr [rsp
+110H+8*rax
]
1905 mov r11d
, dword ptr [rsp
+120H+8*rax
]
1906 mov dword ptr [rsp
+110H], r10d
1907 mov dword ptr [rsp
+120H], r11d
1914 movups xmm0
, xmmword
ptr [rcx
]
1915 movups xmm1
, xmmword
ptr [rcx
+10H]
1916 movd xmm13
, dword ptr [rsp
+110H]
1917 movd xmm14
, dword ptr [rsp
+120H]
1918 punpckldq xmm13
, xmm14
1919 mov r8
, qword ptr [rdi
]
1920 movzx eax, byte ptr [rbp
+80H]
1929 movaps xmm2
, xmmword
ptr [BLAKE3_IV
]
1934 punpcklqdq xmm3
, xmm12
1935 movups xmm4
, xmmword
ptr [r8
+rdx
-40H]
1936 movups xmm5
, xmmword
ptr [r8
+rdx
-30H]
1938 shufps xmm4
, xmm5
, 136
1939 shufps xmm8
, xmm5
, 221
1941 movups xmm6
, xmmword
ptr [r8
+rdx
-20H]
1942 movups xmm7
, xmmword
ptr [r8
+rdx
-10H]
1944 shufps xmm6
, xmm7
, 136
1945 pshufd xmm6
, xmm6
, 93H
1946 shufps xmm8
, xmm7
, 221
1947 pshufd xmm7
, xmm8
, 93H
1953 pshuflw xmm3
, xmm3
, 0B1H
1954 pshufhw xmm3
, xmm3
, 0B1H
1974 pshufd xmm0
, xmm0
, 93H
1975 pshufd xmm3
, xmm3
, 4EH
1976 pshufd xmm2
, xmm2
, 39H
1980 pshuflw xmm3
, xmm3
, 0B1H
1981 pshufhw xmm3
, xmm3
, 0B1H
2001 pshufd xmm0
, xmm0
, 39H
2002 pshufd xmm3
, xmm3
, 4EH
2003 pshufd xmm2
, xmm2
, 93H
2007 shufps xmm8
, xmm5
, 214
2008 pshufd xmm9
, xmm4
, 0FH
2009 pshufd xmm4
, xmm8
, 39H
2011 shufps xmm8
, xmm7
, 250
2012 pand xmm9
, xmmword
ptr [PBLENDW_0x33_MASK
]
2013 pand xmm8
, xmmword
ptr [PBLENDW_0xCC_MASK
]
2016 punpcklqdq xmm8
, xmm5
2018 pand xmm8
, xmmword
ptr [PBLENDW_0x3F_MASK
]
2019 pand xmm10
, xmmword
ptr [PBLENDW_0xC0_MASK
]
2021 pshufd xmm8
, xmm8
, 78H
2022 punpckhdq xmm5
, xmm7
2023 punpckldq xmm6
, xmm5
2024 pshufd xmm7
, xmm6
, 1EH
2034 movups xmmword
ptr [rbx
], xmm0
2035 movups xmmword
ptr [rbx
+10H], xmm1
2037 _llvm_blake3_hash_many_sse2
ENDP
2038 llvm_blake3_hash_many_sse2
ENDP
2040 llvm_blake3_compress_in_place_sse2
PROC
2041 _llvm_blake3_compress_in_place_sse2
PROC
2043 movdqa xmmword
ptr [rsp
], xmm6
2044 movdqa xmmword
ptr [rsp
+10H], xmm7
2045 movdqa xmmword
ptr [rsp
+20H], xmm8
2046 movdqa xmmword
ptr [rsp
+30H], xmm9
2047 movdqa xmmword
ptr [rsp
+40H], xmm11
2048 movdqa xmmword
ptr [rsp
+50H], xmm14
2049 movdqa xmmword
ptr [rsp
+60H], xmm15
2050 movups xmm0
, xmmword
ptr [rcx
]
2051 movups xmm1
, xmmword
ptr [rcx
+10H]
2052 movaps xmm2
, xmmword
ptr [BLAKE3_IV
]
2053 movzx eax, byte ptr [rsp
+0A0H
]
2059 punpcklqdq xmm3
, xmm4
2060 movups xmm4
, xmmword
ptr [rdx
]
2061 movups xmm5
, xmmword
ptr [rdx
+10H]
2063 shufps xmm4
, xmm5
, 136
2064 shufps xmm8
, xmm5
, 221
2066 movups xmm6
, xmmword
ptr [rdx
+20H]
2067 movups xmm7
, xmmword
ptr [rdx
+30H]
2069 shufps xmm6
, xmm7
, 136
2070 pshufd xmm6
, xmm6
, 93H
2071 shufps xmm8
, xmm7
, 221
2072 pshufd xmm7
, xmm8
, 93H
2078 pshuflw xmm3
, xmm3
, 0B1H
2079 pshufhw xmm3
, xmm3
, 0B1H
2099 pshufd xmm0
, xmm0
, 93H
2100 pshufd xmm3
, xmm3
, 4EH
2101 pshufd xmm2
, xmm2
, 39H
2105 pshuflw xmm3
, xmm3
, 0B1H
2106 pshufhw xmm3
, xmm3
, 0B1H
2126 pshufd xmm0
, xmm0
, 39H
2127 pshufd xmm3
, xmm3
, 4EH
2128 pshufd xmm2
, xmm2
, 93H
2132 shufps xmm8
, xmm5
, 214
2133 pshufd xmm9
, xmm4
, 0FH
2134 pshufd xmm4
, xmm8
, 39H
2136 shufps xmm8
, xmm7
, 250
2137 pand xmm9
, xmmword
ptr [PBLENDW_0x33_MASK
]
2138 pand xmm8
, xmmword
ptr [PBLENDW_0xCC_MASK
]
2141 punpcklqdq xmm8
, xmm5
2143 pand xmm8
, xmmword
ptr [PBLENDW_0x3F_MASK
]
2144 pand xmm14
, xmmword
ptr [PBLENDW_0xC0_MASK
]
2146 pshufd xmm8
, xmm8
, 78H
2147 punpckhdq xmm5
, xmm7
2148 punpckldq xmm6
, xmm5
2149 pshufd xmm7
, xmm6
, 1EH
2156 movups xmmword
ptr [rcx
], xmm0
2157 movups xmmword
ptr [rcx
+10H], xmm1
2158 movdqa xmm6
, xmmword
ptr [rsp
]
2159 movdqa xmm7
, xmmword
ptr [rsp
+10H]
2160 movdqa xmm8
, xmmword
ptr [rsp
+20H]
2161 movdqa xmm9
, xmmword
ptr [rsp
+30H]
2162 movdqa xmm11
, xmmword
ptr [rsp
+40H]
2163 movdqa xmm14
, xmmword
ptr [rsp
+50H]
2164 movdqa xmm15
, xmmword
ptr [rsp
+60H]
2167 _llvm_blake3_compress_in_place_sse2
ENDP
2168 llvm_blake3_compress_in_place_sse2
ENDP
2171 llvm_blake3_compress_xof_sse2
PROC
2172 _llvm_blake3_compress_xof_sse2
PROC
2174 movdqa xmmword
ptr [rsp
], xmm6
2175 movdqa xmmword
ptr [rsp
+10H], xmm7
2176 movdqa xmmword
ptr [rsp
+20H], xmm8
2177 movdqa xmmword
ptr [rsp
+30H], xmm9
2178 movdqa xmmword
ptr [rsp
+40H], xmm11
2179 movdqa xmmword
ptr [rsp
+50H], xmm14
2180 movdqa xmmword
ptr [rsp
+60H], xmm15
2181 movups xmm0
, xmmword
ptr [rcx
]
2182 movups xmm1
, xmmword
ptr [rcx
+10H]
2183 movaps xmm2
, xmmword
ptr [BLAKE3_IV
]
2184 movzx eax, byte ptr [rsp
+0A0H
]
2186 mov r10
, qword ptr [rsp
+0A8H
]
2191 punpcklqdq xmm3
, xmm4
2192 movups xmm4
, xmmword
ptr [rdx
]
2193 movups xmm5
, xmmword
ptr [rdx
+10H]
2195 shufps xmm4
, xmm5
, 136
2196 shufps xmm8
, xmm5
, 221
2198 movups xmm6
, xmmword
ptr [rdx
+20H]
2199 movups xmm7
, xmmword
ptr [rdx
+30H]
2201 shufps xmm6
, xmm7
, 136
2202 pshufd xmm6
, xmm6
, 93H
2203 shufps xmm8
, xmm7
, 221
2204 pshufd xmm7
, xmm8
, 93H
2210 pshuflw xmm3
, xmm3
, 0B1H
2211 pshufhw xmm3
, xmm3
, 0B1H
2231 pshufd xmm0
, xmm0
, 93H
2232 pshufd xmm3
, xmm3
, 4EH
2233 pshufd xmm2
, xmm2
, 39H
2237 pshuflw xmm3
, xmm3
, 0B1H
2238 pshufhw xmm3
, xmm3
, 0B1H
2258 pshufd xmm0
, xmm0
, 39H
2259 pshufd xmm3
, xmm3
, 4EH
2260 pshufd xmm2
, xmm2
, 93H
2264 shufps xmm8
, xmm5
, 214
2265 pshufd xmm9
, xmm4
, 0FH
2266 pshufd xmm4
, xmm8
, 39H
2268 shufps xmm8
, xmm7
, 250
2269 pand xmm9
, xmmword
ptr [PBLENDW_0x33_MASK
]
2270 pand xmm8
, xmmword
ptr [PBLENDW_0xCC_MASK
]
2273 punpcklqdq xmm8
, xmm5
2275 pand xmm8
, xmmword
ptr [PBLENDW_0x3F_MASK
]
2276 pand xmm14
, xmmword
ptr [PBLENDW_0xC0_MASK
]
2278 pshufd xmm8
, xmm8
, 78H
2279 punpckhdq xmm5
, xmm7
2280 punpckldq xmm6
, xmm5
2281 pshufd xmm7
, xmm6
, 1EH
2286 movdqu xmm4
, xmmword
ptr [rcx
]
2287 movdqu xmm5
, xmmword
ptr [rcx
+10H]
2292 movups xmmword
ptr [r10
], xmm0
2293 movups xmmword
ptr [r10
+10H], xmm1
2294 movups xmmword
ptr [r10
+20H], xmm2
2295 movups xmmword
ptr [r10
+30H], xmm3
2296 movdqa xmm6
, xmmword
ptr [rsp
]
2297 movdqa xmm7
, xmmword
ptr [rsp
+10H]
2298 movdqa xmm8
, xmmword
ptr [rsp
+20H]
2299 movdqa xmm9
, xmmword
ptr [rsp
+30H]
2300 movdqa xmm11
, xmmword
ptr [rsp
+40H]
2301 movdqa xmm14
, xmmword
ptr [rsp
+50H]
2302 movdqa xmm15
, xmmword
ptr [rsp
+60H]
2305 _llvm_blake3_compress_xof_sse2
ENDP
2306 llvm_blake3_compress_xof_sse2
ENDP
2311 _RDATA
SEGMENT READONLY
PAGE ALIAS
(".rdata") 'CONST'
2314 dd 6A09E667H
, 0BB67AE
85H, 3C6EF372H
, 0A54FF53AH
2323 dd 4 dup
(6A09E667H
)
2326 dd 4 dup
(0BB67AE
85H)
2329 dd 4 dup
(3C6EF372H
)
2332 dd 4 dup
(0A54FF53AH
)
2341 dd 0FFFFFFFFH
, 000000000H, 0FFFFFFFFH
, 000000000H
2343 dd 000000000H, 0FFFFFFFFH
, 000000000H, 0FFFFFFFFH
2345 dd 0FFFFFFFFH
, 0FFFFFFFFH
, 0FFFFFFFFH
, 000000000H
2347 dd 000000000H, 000000000H, 000000000H, 0FFFFFFFFH