[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / Support / BLAKE3 / blake3_sse2_x86-64_windows_msvc.asm
blob1069c8df4ed60adbc8b728707184c903b6a49952
1 public _llvm_blake3_hash_many_sse2
2 public llvm_blake3_hash_many_sse2
3 public llvm_blake3_compress_in_place_sse2
4 public _llvm_blake3_compress_in_place_sse2
5 public llvm_blake3_compress_xof_sse2
6 public _llvm_blake3_compress_xof_sse2
8 _TEXT SEGMENT ALIGN(16) 'CODE'
10 ALIGN 16
11 llvm_blake3_hash_many_sse2 PROC
12 _llvm_blake3_hash_many_sse2 PROC
13 push r15
14 push r14
15 push r13
16 push r12
17 push rsi
18 push rdi
19 push rbx
20 push rbp
21 mov rbp, rsp
22 sub rsp, 528
23 and rsp, 0FFFFFFFFFFFFFFC0H
24 movdqa xmmword ptr [rsp+170H], xmm6
25 movdqa xmmword ptr [rsp+180H], xmm7
26 movdqa xmmword ptr [rsp+190H], xmm8
27 movdqa xmmword ptr [rsp+1A0H], xmm9
28 movdqa xmmword ptr [rsp+1B0H], xmm10
29 movdqa xmmword ptr [rsp+1C0H], xmm11
30 movdqa xmmword ptr [rsp+1D0H], xmm12
31 movdqa xmmword ptr [rsp+1E0H], xmm13
32 movdqa xmmword ptr [rsp+1F0H], xmm14
33 movdqa xmmword ptr [rsp+200H], xmm15
34 mov rdi, rcx
35 mov rsi, rdx
36 mov rdx, r8
37 mov rcx, r9
38 mov r8, qword ptr [rbp+68H]
39 movzx r9, byte ptr [rbp+70H]
40 neg r9d
41 movd xmm0, r9d
42 pshufd xmm0, xmm0, 00H
43 movdqa xmmword ptr [rsp+130H], xmm0
44 movdqa xmm1, xmm0
45 pand xmm1, xmmword ptr [ADD0]
46 pand xmm0, xmmword ptr [ADD1]
47 movdqa xmmword ptr [rsp+150H], xmm0
48 movd xmm0, r8d
49 pshufd xmm0, xmm0, 00H
50 paddd xmm0, xmm1
51 movdqa xmmword ptr [rsp+110H], xmm0
52 pxor xmm0, xmmword ptr [CMP_MSB_MASK]
53 pxor xmm1, xmmword ptr [CMP_MSB_MASK]
54 pcmpgtd xmm1, xmm0
55 shr r8, 32
56 movd xmm2, r8d
57 pshufd xmm2, xmm2, 00H
58 psubd xmm2, xmm1
59 movdqa xmmword ptr [rsp+120H], xmm2
60 mov rbx, qword ptr [rbp+90H]
61 mov r15, rdx
62 shl r15, 6
63 movzx r13d, byte ptr [rbp+78H]
64 movzx r12d, byte ptr [rbp+88H]
65 cmp rsi, 4
66 jc final3blocks
67 outerloop4:
68 movdqu xmm3, xmmword ptr [rcx]
69 pshufd xmm0, xmm3, 00H
70 pshufd xmm1, xmm3, 55H
71 pshufd xmm2, xmm3, 0AAH
72 pshufd xmm3, xmm3, 0FFH
73 movdqu xmm7, xmmword ptr [rcx+10H]
74 pshufd xmm4, xmm7, 00H
75 pshufd xmm5, xmm7, 55H
76 pshufd xmm6, xmm7, 0AAH
77 pshufd xmm7, xmm7, 0FFH
78 mov r8, qword ptr [rdi]
79 mov r9, qword ptr [rdi+8H]
80 mov r10, qword ptr [rdi+10H]
81 mov r11, qword ptr [rdi+18H]
82 movzx eax, byte ptr [rbp+80H]
83 or eax, r13d
84 xor edx, edx
85 innerloop4:
86 mov r14d, eax
87 or eax, r12d
88 add rdx, 64
89 cmp rdx, r15
90 cmovne eax, r14d
91 movdqu xmm8, xmmword ptr [r8+rdx-40H]
92 movdqu xmm9, xmmword ptr [r9+rdx-40H]
93 movdqu xmm10, xmmword ptr [r10+rdx-40H]
94 movdqu xmm11, xmmword ptr [r11+rdx-40H]
95 movdqa xmm12, xmm8
96 punpckldq xmm8, xmm9
97 punpckhdq xmm12, xmm9
98 movdqa xmm14, xmm10
99 punpckldq xmm10, xmm11
100 punpckhdq xmm14, xmm11
101 movdqa xmm9, xmm8
102 punpcklqdq xmm8, xmm10
103 punpckhqdq xmm9, xmm10
104 movdqa xmm13, xmm12
105 punpcklqdq xmm12, xmm14
106 punpckhqdq xmm13, xmm14
107 movdqa xmmword ptr [rsp], xmm8
108 movdqa xmmword ptr [rsp+10H], xmm9
109 movdqa xmmword ptr [rsp+20H], xmm12
110 movdqa xmmword ptr [rsp+30H], xmm13
111 movdqu xmm8, xmmword ptr [r8+rdx-30H]
112 movdqu xmm9, xmmword ptr [r9+rdx-30H]
113 movdqu xmm10, xmmword ptr [r10+rdx-30H]
114 movdqu xmm11, xmmword ptr [r11+rdx-30H]
115 movdqa xmm12, xmm8
116 punpckldq xmm8, xmm9
117 punpckhdq xmm12, xmm9
118 movdqa xmm14, xmm10
119 punpckldq xmm10, xmm11
120 punpckhdq xmm14, xmm11
121 movdqa xmm9, xmm8
122 punpcklqdq xmm8, xmm10
123 punpckhqdq xmm9, xmm10
124 movdqa xmm13, xmm12
125 punpcklqdq xmm12, xmm14
126 punpckhqdq xmm13, xmm14
127 movdqa xmmword ptr [rsp+40H], xmm8
128 movdqa xmmword ptr [rsp+50H], xmm9
129 movdqa xmmword ptr [rsp+60H], xmm12
130 movdqa xmmword ptr [rsp+70H], xmm13
131 movdqu xmm8, xmmword ptr [r8+rdx-20H]
132 movdqu xmm9, xmmword ptr [r9+rdx-20H]
133 movdqu xmm10, xmmword ptr [r10+rdx-20H]
134 movdqu xmm11, xmmword ptr [r11+rdx-20H]
135 movdqa xmm12, xmm8
136 punpckldq xmm8, xmm9
137 punpckhdq xmm12, xmm9
138 movdqa xmm14, xmm10
139 punpckldq xmm10, xmm11
140 punpckhdq xmm14, xmm11
141 movdqa xmm9, xmm8
142 punpcklqdq xmm8, xmm10
143 punpckhqdq xmm9, xmm10
144 movdqa xmm13, xmm12
145 punpcklqdq xmm12, xmm14
146 punpckhqdq xmm13, xmm14
147 movdqa xmmword ptr [rsp+80H], xmm8
148 movdqa xmmword ptr [rsp+90H], xmm9
149 movdqa xmmword ptr [rsp+0A0H], xmm12
150 movdqa xmmword ptr [rsp+0B0H], xmm13
151 movdqu xmm8, xmmword ptr [r8+rdx-10H]
152 movdqu xmm9, xmmword ptr [r9+rdx-10H]
153 movdqu xmm10, xmmword ptr [r10+rdx-10H]
154 movdqu xmm11, xmmword ptr [r11+rdx-10H]
155 movdqa xmm12, xmm8
156 punpckldq xmm8, xmm9
157 punpckhdq xmm12, xmm9
158 movdqa xmm14, xmm10
159 punpckldq xmm10, xmm11
160 punpckhdq xmm14, xmm11
161 movdqa xmm9, xmm8
162 punpcklqdq xmm8, xmm10
163 punpckhqdq xmm9, xmm10
164 movdqa xmm13, xmm12
165 punpcklqdq xmm12, xmm14
166 punpckhqdq xmm13, xmm14
167 movdqa xmmword ptr [rsp+0C0H], xmm8
168 movdqa xmmword ptr [rsp+0D0H], xmm9
169 movdqa xmmword ptr [rsp+0E0H], xmm12
170 movdqa xmmword ptr [rsp+0F0H], xmm13
171 movdqa xmm9, xmmword ptr [BLAKE3_IV_1]
172 movdqa xmm10, xmmword ptr [BLAKE3_IV_2]
173 movdqa xmm11, xmmword ptr [BLAKE3_IV_3]
174 movdqa xmm12, xmmword ptr [rsp+110H]
175 movdqa xmm13, xmmword ptr [rsp+120H]
176 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN]
177 movd xmm15, eax
178 pshufd xmm15, xmm15, 00H
179 prefetcht0 byte ptr [r8+rdx+80H]
180 prefetcht0 byte ptr [r9+rdx+80H]
181 prefetcht0 byte ptr [r10+rdx+80H]
182 prefetcht0 byte ptr [r11+rdx+80H]
183 paddd xmm0, xmmword ptr [rsp]
184 paddd xmm1, xmmword ptr [rsp+20H]
185 paddd xmm2, xmmword ptr [rsp+40H]
186 paddd xmm3, xmmword ptr [rsp+60H]
187 paddd xmm0, xmm4
188 paddd xmm1, xmm5
189 paddd xmm2, xmm6
190 paddd xmm3, xmm7
191 pxor xmm12, xmm0
192 pxor xmm13, xmm1
193 pxor xmm14, xmm2
194 pxor xmm15, xmm3
195 pshuflw xmm12, xmm12, 0B1H
196 pshufhw xmm12, xmm12, 0B1H
197 pshuflw xmm13, xmm13, 0B1H
198 pshufhw xmm13, xmm13, 0B1H
199 pshuflw xmm14, xmm14, 0B1H
200 pshufhw xmm14, xmm14, 0B1H
201 pshuflw xmm15, xmm15, 0B1H
202 pshufhw xmm15, xmm15, 0B1H
203 movdqa xmm8, xmmword ptr [BLAKE3_IV_0]
204 paddd xmm8, xmm12
205 paddd xmm9, xmm13
206 paddd xmm10, xmm14
207 paddd xmm11, xmm15
208 pxor xmm4, xmm8
209 pxor xmm5, xmm9
210 pxor xmm6, xmm10
211 pxor xmm7, xmm11
212 movdqa xmmword ptr [rsp+100H], xmm8
213 movdqa xmm8, xmm4
214 psrld xmm8, 12
215 pslld xmm4, 20
216 por xmm4, xmm8
217 movdqa xmm8, xmm5
218 psrld xmm8, 12
219 pslld xmm5, 20
220 por xmm5, xmm8
221 movdqa xmm8, xmm6
222 psrld xmm8, 12
223 pslld xmm6, 20
224 por xmm6, xmm8
225 movdqa xmm8, xmm7
226 psrld xmm8, 12
227 pslld xmm7, 20
228 por xmm7, xmm8
229 paddd xmm0, xmmword ptr [rsp+10H]
230 paddd xmm1, xmmword ptr [rsp+30H]
231 paddd xmm2, xmmword ptr [rsp+50H]
232 paddd xmm3, xmmword ptr [rsp+70H]
233 paddd xmm0, xmm4
234 paddd xmm1, xmm5
235 paddd xmm2, xmm6
236 paddd xmm3, xmm7
237 pxor xmm12, xmm0
238 pxor xmm13, xmm1
239 pxor xmm14, xmm2
240 pxor xmm15, xmm3
241 movdqa xmm8, xmm12
242 psrld xmm12, 8
243 pslld xmm8, 24
244 pxor xmm12, xmm8
245 movdqa xmm8, xmm13
246 psrld xmm13, 8
247 pslld xmm8, 24
248 pxor xmm13, xmm8
249 movdqa xmm8, xmm14
250 psrld xmm14, 8
251 pslld xmm8, 24
252 pxor xmm14, xmm8
253 movdqa xmm8, xmm15
254 psrld xmm15, 8
255 pslld xmm8, 24
256 pxor xmm15, xmm8
257 movdqa xmm8, xmmword ptr [rsp+100H]
258 paddd xmm8, xmm12
259 paddd xmm9, xmm13
260 paddd xmm10, xmm14
261 paddd xmm11, xmm15
262 pxor xmm4, xmm8
263 pxor xmm5, xmm9
264 pxor xmm6, xmm10
265 pxor xmm7, xmm11
266 movdqa xmmword ptr [rsp+100H], xmm8
267 movdqa xmm8, xmm4
268 psrld xmm8, 7
269 pslld xmm4, 25
270 por xmm4, xmm8
271 movdqa xmm8, xmm5
272 psrld xmm8, 7
273 pslld xmm5, 25
274 por xmm5, xmm8
275 movdqa xmm8, xmm6
276 psrld xmm8, 7
277 pslld xmm6, 25
278 por xmm6, xmm8
279 movdqa xmm8, xmm7
280 psrld xmm8, 7
281 pslld xmm7, 25
282 por xmm7, xmm8
283 paddd xmm0, xmmword ptr [rsp+80H]
284 paddd xmm1, xmmword ptr [rsp+0A0H]
285 paddd xmm2, xmmword ptr [rsp+0C0H]
286 paddd xmm3, xmmword ptr [rsp+0E0H]
287 paddd xmm0, xmm5
288 paddd xmm1, xmm6
289 paddd xmm2, xmm7
290 paddd xmm3, xmm4
291 pxor xmm15, xmm0
292 pxor xmm12, xmm1
293 pxor xmm13, xmm2
294 pxor xmm14, xmm3
295 pshuflw xmm15, xmm15, 0B1H
296 pshufhw xmm15, xmm15, 0B1H
297 pshuflw xmm12, xmm12, 0B1H
298 pshufhw xmm12, xmm12, 0B1H
299 pshuflw xmm13, xmm13, 0B1H
300 pshufhw xmm13, xmm13, 0B1H
301 pshuflw xmm14, xmm14, 0B1H
302 pshufhw xmm14, xmm14, 0B1H
303 paddd xmm10, xmm15
304 paddd xmm11, xmm12
305 movdqa xmm8, xmmword ptr [rsp+100H]
306 paddd xmm8, xmm13
307 paddd xmm9, xmm14
308 pxor xmm5, xmm10
309 pxor xmm6, xmm11
310 pxor xmm7, xmm8
311 pxor xmm4, xmm9
312 movdqa xmmword ptr [rsp+100H], xmm8
313 movdqa xmm8, xmm5
314 psrld xmm8, 12
315 pslld xmm5, 20
316 por xmm5, xmm8
317 movdqa xmm8, xmm6
318 psrld xmm8, 12
319 pslld xmm6, 20
320 por xmm6, xmm8
321 movdqa xmm8, xmm7
322 psrld xmm8, 12
323 pslld xmm7, 20
324 por xmm7, xmm8
325 movdqa xmm8, xmm4
326 psrld xmm8, 12
327 pslld xmm4, 20
328 por xmm4, xmm8
329 paddd xmm0, xmmword ptr [rsp+90H]
330 paddd xmm1, xmmword ptr [rsp+0B0H]
331 paddd xmm2, xmmword ptr [rsp+0D0H]
332 paddd xmm3, xmmword ptr [rsp+0F0H]
333 paddd xmm0, xmm5
334 paddd xmm1, xmm6
335 paddd xmm2, xmm7
336 paddd xmm3, xmm4
337 pxor xmm15, xmm0
338 pxor xmm12, xmm1
339 pxor xmm13, xmm2
340 pxor xmm14, xmm3
341 movdqa xmm8, xmm15
342 psrld xmm15, 8
343 pslld xmm8, 24
344 pxor xmm15, xmm8
345 movdqa xmm8, xmm12
346 psrld xmm12, 8
347 pslld xmm8, 24
348 pxor xmm12, xmm8
349 movdqa xmm8, xmm13
350 psrld xmm13, 8
351 pslld xmm8, 24
352 pxor xmm13, xmm8
353 movdqa xmm8, xmm14
354 psrld xmm14, 8
355 pslld xmm8, 24
356 pxor xmm14, xmm8
357 paddd xmm10, xmm15
358 paddd xmm11, xmm12
359 movdqa xmm8, xmmword ptr [rsp+100H]
360 paddd xmm8, xmm13
361 paddd xmm9, xmm14
362 pxor xmm5, xmm10
363 pxor xmm6, xmm11
364 pxor xmm7, xmm8
365 pxor xmm4, xmm9
366 movdqa xmmword ptr [rsp+100H], xmm8
367 movdqa xmm8, xmm5
368 psrld xmm8, 7
369 pslld xmm5, 25
370 por xmm5, xmm8
371 movdqa xmm8, xmm6
372 psrld xmm8, 7
373 pslld xmm6, 25
374 por xmm6, xmm8
375 movdqa xmm8, xmm7
376 psrld xmm8, 7
377 pslld xmm7, 25
378 por xmm7, xmm8
379 movdqa xmm8, xmm4
380 psrld xmm8, 7
381 pslld xmm4, 25
382 por xmm4, xmm8
383 paddd xmm0, xmmword ptr [rsp+20H]
384 paddd xmm1, xmmword ptr [rsp+30H]
385 paddd xmm2, xmmword ptr [rsp+70H]
386 paddd xmm3, xmmword ptr [rsp+40H]
387 paddd xmm0, xmm4
388 paddd xmm1, xmm5
389 paddd xmm2, xmm6
390 paddd xmm3, xmm7
391 pxor xmm12, xmm0
392 pxor xmm13, xmm1
393 pxor xmm14, xmm2
394 pxor xmm15, xmm3
395 pshuflw xmm12, xmm12, 0B1H
396 pshufhw xmm12, xmm12, 0B1H
397 pshuflw xmm13, xmm13, 0B1H
398 pshufhw xmm13, xmm13, 0B1H
399 pshuflw xmm14, xmm14, 0B1H
400 pshufhw xmm14, xmm14, 0B1H
401 pshuflw xmm15, xmm15, 0B1H
402 pshufhw xmm15, xmm15, 0B1H
403 movdqa xmm8, xmmword ptr [rsp+100H]
404 paddd xmm8, xmm12
405 paddd xmm9, xmm13
406 paddd xmm10, xmm14
407 paddd xmm11, xmm15
408 pxor xmm4, xmm8
409 pxor xmm5, xmm9
410 pxor xmm6, xmm10
411 pxor xmm7, xmm11
412 movdqa xmmword ptr [rsp+100H], xmm8
413 movdqa xmm8, xmm4
414 psrld xmm8, 12
415 pslld xmm4, 20
416 por xmm4, xmm8
417 movdqa xmm8, xmm5
418 psrld xmm8, 12
419 pslld xmm5, 20
420 por xmm5, xmm8
421 movdqa xmm8, xmm6
422 psrld xmm8, 12
423 pslld xmm6, 20
424 por xmm6, xmm8
425 movdqa xmm8, xmm7
426 psrld xmm8, 12
427 pslld xmm7, 20
428 por xmm7, xmm8
429 paddd xmm0, xmmword ptr [rsp+60H]
430 paddd xmm1, xmmword ptr [rsp+0A0H]
431 paddd xmm2, xmmword ptr [rsp]
432 paddd xmm3, xmmword ptr [rsp+0D0H]
433 paddd xmm0, xmm4
434 paddd xmm1, xmm5
435 paddd xmm2, xmm6
436 paddd xmm3, xmm7
437 pxor xmm12, xmm0
438 pxor xmm13, xmm1
439 pxor xmm14, xmm2
440 pxor xmm15, xmm3
441 movdqa xmm8, xmm12
442 psrld xmm12, 8
443 pslld xmm8, 24
444 pxor xmm12, xmm8
445 movdqa xmm8, xmm13
446 psrld xmm13, 8
447 pslld xmm8, 24
448 pxor xmm13, xmm8
449 movdqa xmm8, xmm14
450 psrld xmm14, 8
451 pslld xmm8, 24
452 pxor xmm14, xmm8
453 movdqa xmm8, xmm15
454 psrld xmm15, 8
455 pslld xmm8, 24
456 pxor xmm15, xmm8
457 movdqa xmm8, xmmword ptr [rsp+100H]
458 paddd xmm8, xmm12
459 paddd xmm9, xmm13
460 paddd xmm10, xmm14
461 paddd xmm11, xmm15
462 pxor xmm4, xmm8
463 pxor xmm5, xmm9
464 pxor xmm6, xmm10
465 pxor xmm7, xmm11
466 movdqa xmmword ptr [rsp+100H], xmm8
467 movdqa xmm8, xmm4
468 psrld xmm8, 7
469 pslld xmm4, 25
470 por xmm4, xmm8
471 movdqa xmm8, xmm5
472 psrld xmm8, 7
473 pslld xmm5, 25
474 por xmm5, xmm8
475 movdqa xmm8, xmm6
476 psrld xmm8, 7
477 pslld xmm6, 25
478 por xmm6, xmm8
479 movdqa xmm8, xmm7
480 psrld xmm8, 7
481 pslld xmm7, 25
482 por xmm7, xmm8
483 paddd xmm0, xmmword ptr [rsp+10H]
484 paddd xmm1, xmmword ptr [rsp+0C0H]
485 paddd xmm2, xmmword ptr [rsp+90H]
486 paddd xmm3, xmmword ptr [rsp+0F0H]
487 paddd xmm0, xmm5
488 paddd xmm1, xmm6
489 paddd xmm2, xmm7
490 paddd xmm3, xmm4
491 pxor xmm15, xmm0
492 pxor xmm12, xmm1
493 pxor xmm13, xmm2
494 pxor xmm14, xmm3
495 pshuflw xmm15, xmm15, 0B1H
496 pshufhw xmm15, xmm15, 0B1H
497 pshuflw xmm12, xmm12, 0B1H
498 pshufhw xmm12, xmm12, 0B1H
499 pshuflw xmm13, xmm13, 0B1H
500 pshufhw xmm13, xmm13, 0B1H
501 pshuflw xmm14, xmm14, 0B1H
502 pshufhw xmm14, xmm14, 0B1H
503 paddd xmm10, xmm15
504 paddd xmm11, xmm12
505 movdqa xmm8, xmmword ptr [rsp+100H]
506 paddd xmm8, xmm13
507 paddd xmm9, xmm14
508 pxor xmm5, xmm10
509 pxor xmm6, xmm11
510 pxor xmm7, xmm8
511 pxor xmm4, xmm9
512 movdqa xmmword ptr [rsp+100H], xmm8
513 movdqa xmm8, xmm5
514 psrld xmm8, 12
515 pslld xmm5, 20
516 por xmm5, xmm8
517 movdqa xmm8, xmm6
518 psrld xmm8, 12
519 pslld xmm6, 20
520 por xmm6, xmm8
521 movdqa xmm8, xmm7
522 psrld xmm8, 12
523 pslld xmm7, 20
524 por xmm7, xmm8
525 movdqa xmm8, xmm4
526 psrld xmm8, 12
527 pslld xmm4, 20
528 por xmm4, xmm8
529 paddd xmm0, xmmword ptr [rsp+0B0H]
530 paddd xmm1, xmmword ptr [rsp+50H]
531 paddd xmm2, xmmword ptr [rsp+0E0H]
532 paddd xmm3, xmmword ptr [rsp+80H]
533 paddd xmm0, xmm5
534 paddd xmm1, xmm6
535 paddd xmm2, xmm7
536 paddd xmm3, xmm4
537 pxor xmm15, xmm0
538 pxor xmm12, xmm1
539 pxor xmm13, xmm2
540 pxor xmm14, xmm3
541 movdqa xmm8, xmm15
542 psrld xmm15, 8
543 pslld xmm8, 24
544 pxor xmm15, xmm8
545 movdqa xmm8, xmm12
546 psrld xmm12, 8
547 pslld xmm8, 24
548 pxor xmm12, xmm8
549 movdqa xmm8, xmm13
550 psrld xmm13, 8
551 pslld xmm8, 24
552 pxor xmm13, xmm8
553 movdqa xmm8, xmm14
554 psrld xmm14, 8
555 pslld xmm8, 24
556 pxor xmm14, xmm8
557 paddd xmm10, xmm15
558 paddd xmm11, xmm12
559 movdqa xmm8, xmmword ptr [rsp+100H]
560 paddd xmm8, xmm13
561 paddd xmm9, xmm14
562 pxor xmm5, xmm10
563 pxor xmm6, xmm11
564 pxor xmm7, xmm8
565 pxor xmm4, xmm9
566 movdqa xmmword ptr [rsp+100H], xmm8
567 movdqa xmm8, xmm5
568 psrld xmm8, 7
569 pslld xmm5, 25
570 por xmm5, xmm8
571 movdqa xmm8, xmm6
572 psrld xmm8, 7
573 pslld xmm6, 25
574 por xmm6, xmm8
575 movdqa xmm8, xmm7
576 psrld xmm8, 7
577 pslld xmm7, 25
578 por xmm7, xmm8
579 movdqa xmm8, xmm4
580 psrld xmm8, 7
581 pslld xmm4, 25
582 por xmm4, xmm8
583 paddd xmm0, xmmword ptr [rsp+30H]
584 paddd xmm1, xmmword ptr [rsp+0A0H]
585 paddd xmm2, xmmword ptr [rsp+0D0H]
586 paddd xmm3, xmmword ptr [rsp+70H]
587 paddd xmm0, xmm4
588 paddd xmm1, xmm5
589 paddd xmm2, xmm6
590 paddd xmm3, xmm7
591 pxor xmm12, xmm0
592 pxor xmm13, xmm1
593 pxor xmm14, xmm2
594 pxor xmm15, xmm3
595 pshuflw xmm12, xmm12, 0B1H
596 pshufhw xmm12, xmm12, 0B1H
597 pshuflw xmm13, xmm13, 0B1H
598 pshufhw xmm13, xmm13, 0B1H
599 pshuflw xmm14, xmm14, 0B1H
600 pshufhw xmm14, xmm14, 0B1H
601 pshuflw xmm15, xmm15, 0B1H
602 pshufhw xmm15, xmm15, 0B1H
603 movdqa xmm8, xmmword ptr [rsp+100H]
604 paddd xmm8, xmm12
605 paddd xmm9, xmm13
606 paddd xmm10, xmm14
607 paddd xmm11, xmm15
608 pxor xmm4, xmm8
609 pxor xmm5, xmm9
610 pxor xmm6, xmm10
611 pxor xmm7, xmm11
612 movdqa xmmword ptr [rsp+100H], xmm8
613 movdqa xmm8, xmm4
614 psrld xmm8, 12
615 pslld xmm4, 20
616 por xmm4, xmm8
617 movdqa xmm8, xmm5
618 psrld xmm8, 12
619 pslld xmm5, 20
620 por xmm5, xmm8
621 movdqa xmm8, xmm6
622 psrld xmm8, 12
623 pslld xmm6, 20
624 por xmm6, xmm8
625 movdqa xmm8, xmm7
626 psrld xmm8, 12
627 pslld xmm7, 20
628 por xmm7, xmm8
629 paddd xmm0, xmmword ptr [rsp+40H]
630 paddd xmm1, xmmword ptr [rsp+0C0H]
631 paddd xmm2, xmmword ptr [rsp+20H]
632 paddd xmm3, xmmword ptr [rsp+0E0H]
633 paddd xmm0, xmm4
634 paddd xmm1, xmm5
635 paddd xmm2, xmm6
636 paddd xmm3, xmm7
637 pxor xmm12, xmm0
638 pxor xmm13, xmm1
639 pxor xmm14, xmm2
640 pxor xmm15, xmm3
641 movdqa xmm8, xmm12
642 psrld xmm12, 8
643 pslld xmm8, 24
644 pxor xmm12, xmm8
645 movdqa xmm8, xmm13
646 psrld xmm13, 8
647 pslld xmm8, 24
648 pxor xmm13, xmm8
649 movdqa xmm8, xmm14
650 psrld xmm14, 8
651 pslld xmm8, 24
652 pxor xmm14, xmm8
653 movdqa xmm8, xmm15
654 psrld xmm15, 8
655 pslld xmm8, 24
656 pxor xmm15, xmm8
657 movdqa xmm8, xmmword ptr [rsp+100H]
658 paddd xmm8, xmm12
659 paddd xmm9, xmm13
660 paddd xmm10, xmm14
661 paddd xmm11, xmm15
662 pxor xmm4, xmm8
663 pxor xmm5, xmm9
664 pxor xmm6, xmm10
665 pxor xmm7, xmm11
666 movdqa xmmword ptr [rsp+100H], xmm8
667 movdqa xmm8, xmm4
668 psrld xmm8, 7
669 pslld xmm4, 25
670 por xmm4, xmm8
671 movdqa xmm8, xmm5
672 psrld xmm8, 7
673 pslld xmm5, 25
674 por xmm5, xmm8
675 movdqa xmm8, xmm6
676 psrld xmm8, 7
677 pslld xmm6, 25
678 por xmm6, xmm8
679 movdqa xmm8, xmm7
680 psrld xmm8, 7
681 pslld xmm7, 25
682 por xmm7, xmm8
683 paddd xmm0, xmmword ptr [rsp+60H]
684 paddd xmm1, xmmword ptr [rsp+90H]
685 paddd xmm2, xmmword ptr [rsp+0B0H]
686 paddd xmm3, xmmword ptr [rsp+80H]
687 paddd xmm0, xmm5
688 paddd xmm1, xmm6
689 paddd xmm2, xmm7
690 paddd xmm3, xmm4
691 pxor xmm15, xmm0
692 pxor xmm12, xmm1
693 pxor xmm13, xmm2
694 pxor xmm14, xmm3
695 pshuflw xmm15, xmm15, 0B1H
696 pshufhw xmm15, xmm15, 0B1H
697 pshuflw xmm12, xmm12, 0B1H
698 pshufhw xmm12, xmm12, 0B1H
699 pshuflw xmm13, xmm13, 0B1H
700 pshufhw xmm13, xmm13, 0B1H
701 pshuflw xmm14, xmm14, 0B1H
702 pshufhw xmm14, xmm14, 0B1H
703 paddd xmm10, xmm15
704 paddd xmm11, xmm12
705 movdqa xmm8, xmmword ptr [rsp+100H]
706 paddd xmm8, xmm13
707 paddd xmm9, xmm14
708 pxor xmm5, xmm10
709 pxor xmm6, xmm11
710 pxor xmm7, xmm8
711 pxor xmm4, xmm9
712 movdqa xmmword ptr [rsp+100H], xmm8
713 movdqa xmm8, xmm5
714 psrld xmm8, 12
715 pslld xmm5, 20
716 por xmm5, xmm8
717 movdqa xmm8, xmm6
718 psrld xmm8, 12
719 pslld xmm6, 20
720 por xmm6, xmm8
721 movdqa xmm8, xmm7
722 psrld xmm8, 12
723 pslld xmm7, 20
724 por xmm7, xmm8
725 movdqa xmm8, xmm4
726 psrld xmm8, 12
727 pslld xmm4, 20
728 por xmm4, xmm8
729 paddd xmm0, xmmword ptr [rsp+50H]
730 paddd xmm1, xmmword ptr [rsp]
731 paddd xmm2, xmmword ptr [rsp+0F0H]
732 paddd xmm3, xmmword ptr [rsp+10H]
733 paddd xmm0, xmm5
734 paddd xmm1, xmm6
735 paddd xmm2, xmm7
736 paddd xmm3, xmm4
737 pxor xmm15, xmm0
738 pxor xmm12, xmm1
739 pxor xmm13, xmm2
740 pxor xmm14, xmm3
741 movdqa xmm8, xmm15
742 psrld xmm15, 8
743 pslld xmm8, 24
744 pxor xmm15, xmm8
745 movdqa xmm8, xmm12
746 psrld xmm12, 8
747 pslld xmm8, 24
748 pxor xmm12, xmm8
749 movdqa xmm8, xmm13
750 psrld xmm13, 8
751 pslld xmm8, 24
752 pxor xmm13, xmm8
753 movdqa xmm8, xmm14
754 psrld xmm14, 8
755 pslld xmm8, 24
756 pxor xmm14, xmm8
757 paddd xmm10, xmm15
758 paddd xmm11, xmm12
759 movdqa xmm8, xmmword ptr [rsp+100H]
760 paddd xmm8, xmm13
761 paddd xmm9, xmm14
762 pxor xmm5, xmm10
763 pxor xmm6, xmm11
764 pxor xmm7, xmm8
765 pxor xmm4, xmm9
766 movdqa xmmword ptr [rsp+100H], xmm8
767 movdqa xmm8, xmm5
768 psrld xmm8, 7
769 pslld xmm5, 25
770 por xmm5, xmm8
771 movdqa xmm8, xmm6
772 psrld xmm8, 7
773 pslld xmm6, 25
774 por xmm6, xmm8
775 movdqa xmm8, xmm7
776 psrld xmm8, 7
777 pslld xmm7, 25
778 por xmm7, xmm8
779 movdqa xmm8, xmm4
780 psrld xmm8, 7
781 pslld xmm4, 25
782 por xmm4, xmm8
783 paddd xmm0, xmmword ptr [rsp+0A0H]
784 paddd xmm1, xmmword ptr [rsp+0C0H]
785 paddd xmm2, xmmword ptr [rsp+0E0H]
786 paddd xmm3, xmmword ptr [rsp+0D0H]
787 paddd xmm0, xmm4
788 paddd xmm1, xmm5
789 paddd xmm2, xmm6
790 paddd xmm3, xmm7
791 pxor xmm12, xmm0
792 pxor xmm13, xmm1
793 pxor xmm14, xmm2
794 pxor xmm15, xmm3
795 pshuflw xmm12, xmm12, 0B1H
796 pshufhw xmm12, xmm12, 0B1H
797 pshuflw xmm13, xmm13, 0B1H
798 pshufhw xmm13, xmm13, 0B1H
799 pshuflw xmm14, xmm14, 0B1H
800 pshufhw xmm14, xmm14, 0B1H
801 pshuflw xmm15, xmm15, 0B1H
802 pshufhw xmm15, xmm15, 0B1H
803 movdqa xmm8, xmmword ptr [rsp+100H]
804 paddd xmm8, xmm12
805 paddd xmm9, xmm13
806 paddd xmm10, xmm14
807 paddd xmm11, xmm15
808 pxor xmm4, xmm8
809 pxor xmm5, xmm9
810 pxor xmm6, xmm10
811 pxor xmm7, xmm11
812 movdqa xmmword ptr [rsp+100H], xmm8
813 movdqa xmm8, xmm4
814 psrld xmm8, 12
815 pslld xmm4, 20
816 por xmm4, xmm8
817 movdqa xmm8, xmm5
818 psrld xmm8, 12
819 pslld xmm5, 20
820 por xmm5, xmm8
821 movdqa xmm8, xmm6
822 psrld xmm8, 12
823 pslld xmm6, 20
824 por xmm6, xmm8
825 movdqa xmm8, xmm7
826 psrld xmm8, 12
827 pslld xmm7, 20
828 por xmm7, xmm8
829 paddd xmm0, xmmword ptr [rsp+70H]
830 paddd xmm1, xmmword ptr [rsp+90H]
831 paddd xmm2, xmmword ptr [rsp+30H]
832 paddd xmm3, xmmword ptr [rsp+0F0H]
833 paddd xmm0, xmm4
834 paddd xmm1, xmm5
835 paddd xmm2, xmm6
836 paddd xmm3, xmm7
837 pxor xmm12, xmm0
838 pxor xmm13, xmm1
839 pxor xmm14, xmm2
840 pxor xmm15, xmm3
841 movdqa xmm8, xmm12
842 psrld xmm12, 8
843 pslld xmm8, 24
844 pxor xmm12, xmm8
845 movdqa xmm8, xmm13
846 psrld xmm13, 8
847 pslld xmm8, 24
848 pxor xmm13, xmm8
849 movdqa xmm8, xmm14
850 psrld xmm14, 8
851 pslld xmm8, 24
852 pxor xmm14, xmm8
853 movdqa xmm8, xmm15
854 psrld xmm15, 8
855 pslld xmm8, 24
856 pxor xmm15, xmm8
857 movdqa xmm8, xmmword ptr [rsp+100H]
858 paddd xmm8, xmm12
859 paddd xmm9, xmm13
860 paddd xmm10, xmm14
861 paddd xmm11, xmm15
862 pxor xmm4, xmm8
863 pxor xmm5, xmm9
864 pxor xmm6, xmm10
865 pxor xmm7, xmm11
866 movdqa xmmword ptr [rsp+100H], xmm8
867 movdqa xmm8, xmm4
868 psrld xmm8, 7
869 pslld xmm4, 25
870 por xmm4, xmm8
871 movdqa xmm8, xmm5
872 psrld xmm8, 7
873 pslld xmm5, 25
874 por xmm5, xmm8
875 movdqa xmm8, xmm6
876 psrld xmm8, 7
877 pslld xmm6, 25
878 por xmm6, xmm8
879 movdqa xmm8, xmm7
880 psrld xmm8, 7
881 pslld xmm7, 25
882 por xmm7, xmm8
883 paddd xmm0, xmmword ptr [rsp+40H]
884 paddd xmm1, xmmword ptr [rsp+0B0H]
885 paddd xmm2, xmmword ptr [rsp+50H]
886 paddd xmm3, xmmword ptr [rsp+10H]
887 paddd xmm0, xmm5
888 paddd xmm1, xmm6
889 paddd xmm2, xmm7
890 paddd xmm3, xmm4
891 pxor xmm15, xmm0
892 pxor xmm12, xmm1
893 pxor xmm13, xmm2
894 pxor xmm14, xmm3
895 pshuflw xmm15, xmm15, 0B1H
896 pshufhw xmm15, xmm15, 0B1H
897 pshuflw xmm12, xmm12, 0B1H
898 pshufhw xmm12, xmm12, 0B1H
899 pshuflw xmm13, xmm13, 0B1H
900 pshufhw xmm13, xmm13, 0B1H
901 pshuflw xmm14, xmm14, 0B1H
902 pshufhw xmm14, xmm14, 0B1H
903 paddd xmm10, xmm15
904 paddd xmm11, xmm12
905 movdqa xmm8, xmmword ptr [rsp+100H]
906 paddd xmm8, xmm13
907 paddd xmm9, xmm14
908 pxor xmm5, xmm10
909 pxor xmm6, xmm11
910 pxor xmm7, xmm8
911 pxor xmm4, xmm9
912 movdqa xmmword ptr [rsp+100H], xmm8
913 movdqa xmm8, xmm5
914 psrld xmm8, 12
915 pslld xmm5, 20
916 por xmm5, xmm8
917 movdqa xmm8, xmm6
918 psrld xmm8, 12
919 pslld xmm6, 20
920 por xmm6, xmm8
921 movdqa xmm8, xmm7
922 psrld xmm8, 12
923 pslld xmm7, 20
924 por xmm7, xmm8
925 movdqa xmm8, xmm4
926 psrld xmm8, 12
927 pslld xmm4, 20
928 por xmm4, xmm8
929 paddd xmm0, xmmword ptr [rsp]
930 paddd xmm1, xmmword ptr [rsp+20H]
931 paddd xmm2, xmmword ptr [rsp+80H]
932 paddd xmm3, xmmword ptr [rsp+60H]
933 paddd xmm0, xmm5
934 paddd xmm1, xmm6
935 paddd xmm2, xmm7
936 paddd xmm3, xmm4
937 pxor xmm15, xmm0
938 pxor xmm12, xmm1
939 pxor xmm13, xmm2
940 pxor xmm14, xmm3
941 movdqa xmm8, xmm15
942 psrld xmm15, 8
943 pslld xmm8, 24
944 pxor xmm15, xmm8
945 movdqa xmm8, xmm12
946 psrld xmm12, 8
947 pslld xmm8, 24
948 pxor xmm12, xmm8
949 movdqa xmm8, xmm13
950 psrld xmm13, 8
951 pslld xmm8, 24
952 pxor xmm13, xmm8
953 movdqa xmm8, xmm14
954 psrld xmm14, 8
955 pslld xmm8, 24
956 pxor xmm14, xmm8
957 paddd xmm10, xmm15
958 paddd xmm11, xmm12
959 movdqa xmm8, xmmword ptr [rsp+100H]
960 paddd xmm8, xmm13
961 paddd xmm9, xmm14
962 pxor xmm5, xmm10
963 pxor xmm6, xmm11
964 pxor xmm7, xmm8
965 pxor xmm4, xmm9
966 movdqa xmmword ptr [rsp+100H], xmm8
967 movdqa xmm8, xmm5
968 psrld xmm8, 7
969 pslld xmm5, 25
970 por xmm5, xmm8
971 movdqa xmm8, xmm6
972 psrld xmm8, 7
973 pslld xmm6, 25
974 por xmm6, xmm8
975 movdqa xmm8, xmm7
976 psrld xmm8, 7
977 pslld xmm7, 25
978 por xmm7, xmm8
979 movdqa xmm8, xmm4
980 psrld xmm8, 7
981 pslld xmm4, 25
982 por xmm4, xmm8
983 paddd xmm0, xmmword ptr [rsp+0C0H]
984 paddd xmm1, xmmword ptr [rsp+90H]
985 paddd xmm2, xmmword ptr [rsp+0F0H]
986 paddd xmm3, xmmword ptr [rsp+0E0H]
987 paddd xmm0, xmm4
988 paddd xmm1, xmm5
989 paddd xmm2, xmm6
990 paddd xmm3, xmm7
991 pxor xmm12, xmm0
992 pxor xmm13, xmm1
993 pxor xmm14, xmm2
994 pxor xmm15, xmm3
995 pshuflw xmm12, xmm12, 0B1H
996 pshufhw xmm12, xmm12, 0B1H
997 pshuflw xmm13, xmm13, 0B1H
998 pshufhw xmm13, xmm13, 0B1H
999 pshuflw xmm14, xmm14, 0B1H
1000 pshufhw xmm14, xmm14, 0B1H
1001 pshuflw xmm15, xmm15, 0B1H
1002 pshufhw xmm15, xmm15, 0B1H
1003 movdqa xmm8, xmmword ptr [rsp+100H]
1004 paddd xmm8, xmm12
1005 paddd xmm9, xmm13
1006 paddd xmm10, xmm14
1007 paddd xmm11, xmm15
1008 pxor xmm4, xmm8
1009 pxor xmm5, xmm9
1010 pxor xmm6, xmm10
1011 pxor xmm7, xmm11
1012 movdqa xmmword ptr [rsp+100H], xmm8
1013 movdqa xmm8, xmm4
1014 psrld xmm8, 12
1015 pslld xmm4, 20
1016 por xmm4, xmm8
1017 movdqa xmm8, xmm5
1018 psrld xmm8, 12
1019 pslld xmm5, 20
1020 por xmm5, xmm8
1021 movdqa xmm8, xmm6
1022 psrld xmm8, 12
1023 pslld xmm6, 20
1024 por xmm6, xmm8
1025 movdqa xmm8, xmm7
1026 psrld xmm8, 12
1027 pslld xmm7, 20
1028 por xmm7, xmm8
1029 paddd xmm0, xmmword ptr [rsp+0D0H]
1030 paddd xmm1, xmmword ptr [rsp+0B0H]
1031 paddd xmm2, xmmword ptr [rsp+0A0H]
1032 paddd xmm3, xmmword ptr [rsp+80H]
1033 paddd xmm0, xmm4
1034 paddd xmm1, xmm5
1035 paddd xmm2, xmm6
1036 paddd xmm3, xmm7
1037 pxor xmm12, xmm0
1038 pxor xmm13, xmm1
1039 pxor xmm14, xmm2
1040 pxor xmm15, xmm3
1041 movdqa xmm8, xmm12
1042 psrld xmm12, 8
1043 pslld xmm8, 24
1044 pxor xmm12, xmm8
1045 movdqa xmm8, xmm13
1046 psrld xmm13, 8
1047 pslld xmm8, 24
1048 pxor xmm13, xmm8
1049 movdqa xmm8, xmm14
1050 psrld xmm14, 8
1051 pslld xmm8, 24
1052 pxor xmm14, xmm8
1053 movdqa xmm8, xmm15
1054 psrld xmm15, 8
1055 pslld xmm8, 24
1056 pxor xmm15, xmm8
1057 movdqa xmm8, xmmword ptr [rsp+100H]
1058 paddd xmm8, xmm12
1059 paddd xmm9, xmm13
1060 paddd xmm10, xmm14
1061 paddd xmm11, xmm15
1062 pxor xmm4, xmm8
1063 pxor xmm5, xmm9
1064 pxor xmm6, xmm10
1065 pxor xmm7, xmm11
1066 movdqa xmmword ptr [rsp+100H], xmm8
1067 movdqa xmm8, xmm4
1068 psrld xmm8, 7
1069 pslld xmm4, 25
1070 por xmm4, xmm8
1071 movdqa xmm8, xmm5
1072 psrld xmm8, 7
1073 pslld xmm5, 25
1074 por xmm5, xmm8
1075 movdqa xmm8, xmm6
1076 psrld xmm8, 7
1077 pslld xmm6, 25
1078 por xmm6, xmm8
1079 movdqa xmm8, xmm7
1080 psrld xmm8, 7
1081 pslld xmm7, 25
1082 por xmm7, xmm8
1083 paddd xmm0, xmmword ptr [rsp+70H]
1084 paddd xmm1, xmmword ptr [rsp+50H]
1085 paddd xmm2, xmmword ptr [rsp]
1086 paddd xmm3, xmmword ptr [rsp+60H]
1087 paddd xmm0, xmm5
1088 paddd xmm1, xmm6
1089 paddd xmm2, xmm7
1090 paddd xmm3, xmm4
1091 pxor xmm15, xmm0
1092 pxor xmm12, xmm1
1093 pxor xmm13, xmm2
1094 pxor xmm14, xmm3
1095 pshuflw xmm15, xmm15, 0B1H
1096 pshufhw xmm15, xmm15, 0B1H
1097 pshuflw xmm12, xmm12, 0B1H
1098 pshufhw xmm12, xmm12, 0B1H
1099 pshuflw xmm13, xmm13, 0B1H
1100 pshufhw xmm13, xmm13, 0B1H
1101 pshuflw xmm14, xmm14, 0B1H
1102 pshufhw xmm14, xmm14, 0B1H
1103 paddd xmm10, xmm15
1104 paddd xmm11, xmm12
1105 movdqa xmm8, xmmword ptr [rsp+100H]
1106 paddd xmm8, xmm13
1107 paddd xmm9, xmm14
1108 pxor xmm5, xmm10
1109 pxor xmm6, xmm11
1110 pxor xmm7, xmm8
1111 pxor xmm4, xmm9
1112 movdqa xmmword ptr [rsp+100H], xmm8
1113 movdqa xmm8, xmm5
1114 psrld xmm8, 12
1115 pslld xmm5, 20
1116 por xmm5, xmm8
1117 movdqa xmm8, xmm6
1118 psrld xmm8, 12
1119 pslld xmm6, 20
1120 por xmm6, xmm8
1121 movdqa xmm8, xmm7
1122 psrld xmm8, 12
1123 pslld xmm7, 20
1124 por xmm7, xmm8
1125 movdqa xmm8, xmm4
1126 psrld xmm8, 12
1127 pslld xmm4, 20
1128 por xmm4, xmm8
1129 paddd xmm0, xmmword ptr [rsp+20H]
1130 paddd xmm1, xmmword ptr [rsp+30H]
1131 paddd xmm2, xmmword ptr [rsp+10H]
1132 paddd xmm3, xmmword ptr [rsp+40H]
1133 paddd xmm0, xmm5
1134 paddd xmm1, xmm6
1135 paddd xmm2, xmm7
1136 paddd xmm3, xmm4
1137 pxor xmm15, xmm0
1138 pxor xmm12, xmm1
1139 pxor xmm13, xmm2
1140 pxor xmm14, xmm3
1141 movdqa xmm8, xmm15
1142 psrld xmm15, 8
1143 pslld xmm8, 24
1144 pxor xmm15, xmm8
1145 movdqa xmm8, xmm12
1146 psrld xmm12, 8
1147 pslld xmm8, 24
1148 pxor xmm12, xmm8
1149 movdqa xmm8, xmm13
1150 psrld xmm13, 8
1151 pslld xmm8, 24
1152 pxor xmm13, xmm8
1153 movdqa xmm8, xmm14
1154 psrld xmm14, 8
1155 pslld xmm8, 24
1156 pxor xmm14, xmm8
1157 paddd xmm10, xmm15
1158 paddd xmm11, xmm12
1159 movdqa xmm8, xmmword ptr [rsp+100H]
1160 paddd xmm8, xmm13
1161 paddd xmm9, xmm14
1162 pxor xmm5, xmm10
1163 pxor xmm6, xmm11
1164 pxor xmm7, xmm8
1165 pxor xmm4, xmm9
1166 movdqa xmmword ptr [rsp+100H], xmm8
1167 movdqa xmm8, xmm5
1168 psrld xmm8, 7
1169 pslld xmm5, 25
1170 por xmm5, xmm8
1171 movdqa xmm8, xmm6
1172 psrld xmm8, 7
1173 pslld xmm6, 25
1174 por xmm6, xmm8
1175 movdqa xmm8, xmm7
1176 psrld xmm8, 7
1177 pslld xmm7, 25
1178 por xmm7, xmm8
1179 movdqa xmm8, xmm4
1180 psrld xmm8, 7
1181 pslld xmm4, 25
1182 por xmm4, xmm8
1183 paddd xmm0, xmmword ptr [rsp+90H]
1184 paddd xmm1, xmmword ptr [rsp+0B0H]
1185 paddd xmm2, xmmword ptr [rsp+80H]
1186 paddd xmm3, xmmword ptr [rsp+0F0H]
1187 paddd xmm0, xmm4
1188 paddd xmm1, xmm5
1189 paddd xmm2, xmm6
1190 paddd xmm3, xmm7
1191 pxor xmm12, xmm0
1192 pxor xmm13, xmm1
1193 pxor xmm14, xmm2
1194 pxor xmm15, xmm3
1195 pshuflw xmm12, xmm12, 0B1H
1196 pshufhw xmm12, xmm12, 0B1H
1197 pshuflw xmm13, xmm13, 0B1H
1198 pshufhw xmm13, xmm13, 0B1H
1199 pshuflw xmm14, xmm14, 0B1H
1200 pshufhw xmm14, xmm14, 0B1H
1201 pshuflw xmm15, xmm15, 0B1H
1202 pshufhw xmm15, xmm15, 0B1H
1203 movdqa xmm8, xmmword ptr [rsp+100H]
1204 paddd xmm8, xmm12
1205 paddd xmm9, xmm13
1206 paddd xmm10, xmm14
1207 paddd xmm11, xmm15
1208 pxor xmm4, xmm8
1209 pxor xmm5, xmm9
1210 pxor xmm6, xmm10
1211 pxor xmm7, xmm11
1212 movdqa xmmword ptr [rsp+100H], xmm8
1213 movdqa xmm8, xmm4
1214 psrld xmm8, 12
1215 pslld xmm4, 20
1216 por xmm4, xmm8
1217 movdqa xmm8, xmm5
1218 psrld xmm8, 12
1219 pslld xmm5, 20
1220 por xmm5, xmm8
1221 movdqa xmm8, xmm6
1222 psrld xmm8, 12
1223 pslld xmm6, 20
1224 por xmm6, xmm8
1225 movdqa xmm8, xmm7
1226 psrld xmm8, 12
1227 pslld xmm7, 20
1228 por xmm7, xmm8
1229 paddd xmm0, xmmword ptr [rsp+0E0H]
1230 paddd xmm1, xmmword ptr [rsp+50H]
1231 paddd xmm2, xmmword ptr [rsp+0C0H]
1232 paddd xmm3, xmmword ptr [rsp+10H]
1233 paddd xmm0, xmm4
1234 paddd xmm1, xmm5
1235 paddd xmm2, xmm6
1236 paddd xmm3, xmm7
1237 pxor xmm12, xmm0
1238 pxor xmm13, xmm1
1239 pxor xmm14, xmm2
1240 pxor xmm15, xmm3
1241 movdqa xmm8, xmm12
1242 psrld xmm12, 8
1243 pslld xmm8, 24
1244 pxor xmm12, xmm8
1245 movdqa xmm8, xmm13
1246 psrld xmm13, 8
1247 pslld xmm8, 24
1248 pxor xmm13, xmm8
1249 movdqa xmm8, xmm14
1250 psrld xmm14, 8
1251 pslld xmm8, 24
1252 pxor xmm14, xmm8
1253 movdqa xmm8, xmm15
1254 psrld xmm15, 8
1255 pslld xmm8, 24
1256 pxor xmm15, xmm8
1257 movdqa xmm8, xmmword ptr [rsp+100H]
1258 paddd xmm8, xmm12
1259 paddd xmm9, xmm13
1260 paddd xmm10, xmm14
1261 paddd xmm11, xmm15
1262 pxor xmm4, xmm8
1263 pxor xmm5, xmm9
1264 pxor xmm6, xmm10
1265 pxor xmm7, xmm11
1266 movdqa xmmword ptr [rsp+100H], xmm8
1267 movdqa xmm8, xmm4
1268 psrld xmm8, 7
1269 pslld xmm4, 25
1270 por xmm4, xmm8
1271 movdqa xmm8, xmm5
1272 psrld xmm8, 7
1273 pslld xmm5, 25
1274 por xmm5, xmm8
1275 movdqa xmm8, xmm6
1276 psrld xmm8, 7
1277 pslld xmm6, 25
1278 por xmm6, xmm8
1279 movdqa xmm8, xmm7
1280 psrld xmm8, 7
1281 pslld xmm7, 25
1282 por xmm7, xmm8
1283 paddd xmm0, xmmword ptr [rsp+0D0H]
1284 paddd xmm1, xmmword ptr [rsp]
1285 paddd xmm2, xmmword ptr [rsp+20H]
1286 paddd xmm3, xmmword ptr [rsp+40H]
1287 paddd xmm0, xmm5
1288 paddd xmm1, xmm6
1289 paddd xmm2, xmm7
1290 paddd xmm3, xmm4
1291 pxor xmm15, xmm0
1292 pxor xmm12, xmm1
1293 pxor xmm13, xmm2
1294 pxor xmm14, xmm3
1295 pshuflw xmm15, xmm15, 0B1H
1296 pshufhw xmm15, xmm15, 0B1H
1297 pshuflw xmm12, xmm12, 0B1H
1298 pshufhw xmm12, xmm12, 0B1H
1299 pshuflw xmm13, xmm13, 0B1H
1300 pshufhw xmm13, xmm13, 0B1H
1301 pshuflw xmm14, xmm14, 0B1H
1302 pshufhw xmm14, xmm14, 0B1H
1303 paddd xmm10, xmm15
1304 paddd xmm11, xmm12
1305 movdqa xmm8, xmmword ptr [rsp+100H]
1306 paddd xmm8, xmm13
1307 paddd xmm9, xmm14
1308 pxor xmm5, xmm10
1309 pxor xmm6, xmm11
1310 pxor xmm7, xmm8
1311 pxor xmm4, xmm9
1312 movdqa xmmword ptr [rsp+100H], xmm8
1313 movdqa xmm8, xmm5
1314 psrld xmm8, 12
1315 pslld xmm5, 20
1316 por xmm5, xmm8
1317 movdqa xmm8, xmm6
1318 psrld xmm8, 12
1319 pslld xmm6, 20
1320 por xmm6, xmm8
1321 movdqa xmm8, xmm7
1322 psrld xmm8, 12
1323 pslld xmm7, 20
1324 por xmm7, xmm8
1325 movdqa xmm8, xmm4
1326 psrld xmm8, 12
1327 pslld xmm4, 20
1328 por xmm4, xmm8
1329 paddd xmm0, xmmword ptr [rsp+30H]
1330 paddd xmm1, xmmword ptr [rsp+0A0H]
1331 paddd xmm2, xmmword ptr [rsp+60H]
1332 paddd xmm3, xmmword ptr [rsp+70H]
1333 paddd xmm0, xmm5
1334 paddd xmm1, xmm6
1335 paddd xmm2, xmm7
1336 paddd xmm3, xmm4
1337 pxor xmm15, xmm0
1338 pxor xmm12, xmm1
1339 pxor xmm13, xmm2
1340 pxor xmm14, xmm3
1341 movdqa xmm8, xmm15
1342 psrld xmm15, 8
1343 pslld xmm8, 24
1344 pxor xmm15, xmm8
1345 movdqa xmm8, xmm12
1346 psrld xmm12, 8
1347 pslld xmm8, 24
1348 pxor xmm12, xmm8
1349 movdqa xmm8, xmm13
1350 psrld xmm13, 8
1351 pslld xmm8, 24
1352 pxor xmm13, xmm8
1353 movdqa xmm8, xmm14
1354 psrld xmm14, 8
1355 pslld xmm8, 24
1356 pxor xmm14, xmm8
1357 paddd xmm10, xmm15
1358 paddd xmm11, xmm12
1359 movdqa xmm8, xmmword ptr [rsp+100H]
1360 paddd xmm8, xmm13
1361 paddd xmm9, xmm14
1362 pxor xmm5, xmm10
1363 pxor xmm6, xmm11
1364 pxor xmm7, xmm8
1365 pxor xmm4, xmm9
1366 movdqa xmmword ptr [rsp+100H], xmm8
1367 movdqa xmm8, xmm5
1368 psrld xmm8, 7
1369 pslld xmm5, 25
1370 por xmm5, xmm8
1371 movdqa xmm8, xmm6
1372 psrld xmm8, 7
1373 pslld xmm6, 25
1374 por xmm6, xmm8
1375 movdqa xmm8, xmm7
1376 psrld xmm8, 7
1377 pslld xmm7, 25
1378 por xmm7, xmm8
1379 movdqa xmm8, xmm4
1380 psrld xmm8, 7
1381 pslld xmm4, 25
1382 por xmm4, xmm8
1383 paddd xmm0, xmmword ptr [rsp+0B0H]
1384 paddd xmm1, xmmword ptr [rsp+50H]
1385 paddd xmm2, xmmword ptr [rsp+10H]
1386 paddd xmm3, xmmword ptr [rsp+80H]
1387 paddd xmm0, xmm4
1388 paddd xmm1, xmm5
1389 paddd xmm2, xmm6
1390 paddd xmm3, xmm7
1391 pxor xmm12, xmm0
1392 pxor xmm13, xmm1
1393 pxor xmm14, xmm2
1394 pxor xmm15, xmm3
1395 pshuflw xmm12, xmm12, 0B1H
1396 pshufhw xmm12, xmm12, 0B1H
1397 pshuflw xmm13, xmm13, 0B1H
1398 pshufhw xmm13, xmm13, 0B1H
1399 pshuflw xmm14, xmm14, 0B1H
1400 pshufhw xmm14, xmm14, 0B1H
1401 pshuflw xmm15, xmm15, 0B1H
1402 pshufhw xmm15, xmm15, 0B1H
1403 movdqa xmm8, xmmword ptr [rsp+100H]
1404 paddd xmm8, xmm12
1405 paddd xmm9, xmm13
1406 paddd xmm10, xmm14
1407 paddd xmm11, xmm15
1408 pxor xmm4, xmm8
1409 pxor xmm5, xmm9
1410 pxor xmm6, xmm10
1411 pxor xmm7, xmm11
1412 movdqa xmmword ptr [rsp+100H], xmm8
1413 movdqa xmm8, xmm4
1414 psrld xmm8, 12
1415 pslld xmm4, 20
1416 por xmm4, xmm8
1417 movdqa xmm8, xmm5
1418 psrld xmm8, 12
1419 pslld xmm5, 20
1420 por xmm5, xmm8
1421 movdqa xmm8, xmm6
1422 psrld xmm8, 12
1423 pslld xmm6, 20
1424 por xmm6, xmm8
1425 movdqa xmm8, xmm7
1426 psrld xmm8, 12
1427 pslld xmm7, 20
1428 por xmm7, xmm8
1429 paddd xmm0, xmmword ptr [rsp+0F0H]
1430 paddd xmm1, xmmword ptr [rsp]
1431 paddd xmm2, xmmword ptr [rsp+90H]
1432 paddd xmm3, xmmword ptr [rsp+60H]
1433 paddd xmm0, xmm4
1434 paddd xmm1, xmm5
1435 paddd xmm2, xmm6
1436 paddd xmm3, xmm7
1437 pxor xmm12, xmm0
1438 pxor xmm13, xmm1
1439 pxor xmm14, xmm2
1440 pxor xmm15, xmm3
1441 movdqa xmm8, xmm12
1442 psrld xmm12, 8
1443 pslld xmm8, 24
1444 pxor xmm12, xmm8
1445 movdqa xmm8, xmm13
1446 psrld xmm13, 8
1447 pslld xmm8, 24
1448 pxor xmm13, xmm8
1449 movdqa xmm8, xmm14
1450 psrld xmm14, 8
1451 pslld xmm8, 24
1452 pxor xmm14, xmm8
1453 movdqa xmm8, xmm15
1454 psrld xmm15, 8
1455 pslld xmm8, 24
1456 pxor xmm15, xmm8
1457 movdqa xmm8, xmmword ptr [rsp+100H]
1458 paddd xmm8, xmm12
1459 paddd xmm9, xmm13
1460 paddd xmm10, xmm14
1461 paddd xmm11, xmm15
1462 pxor xmm4, xmm8
1463 pxor xmm5, xmm9
1464 pxor xmm6, xmm10
1465 pxor xmm7, xmm11
1466 movdqa xmmword ptr [rsp+100H], xmm8
1467 movdqa xmm8, xmm4
1468 psrld xmm8, 7
1469 pslld xmm4, 25
1470 por xmm4, xmm8
1471 movdqa xmm8, xmm5
1472 psrld xmm8, 7
1473 pslld xmm5, 25
1474 por xmm5, xmm8
1475 movdqa xmm8, xmm6
1476 psrld xmm8, 7
1477 pslld xmm6, 25
1478 por xmm6, xmm8
1479 movdqa xmm8, xmm7
1480 psrld xmm8, 7
1481 pslld xmm7, 25
1482 por xmm7, xmm8
1483 paddd xmm0, xmmword ptr [rsp+0E0H]
1484 paddd xmm1, xmmword ptr [rsp+20H]
1485 paddd xmm2, xmmword ptr [rsp+30H]
1486 paddd xmm3, xmmword ptr [rsp+70H]
1487 paddd xmm0, xmm5
1488 paddd xmm1, xmm6
1489 paddd xmm2, xmm7
1490 paddd xmm3, xmm4
1491 pxor xmm15, xmm0
1492 pxor xmm12, xmm1
1493 pxor xmm13, xmm2
1494 pxor xmm14, xmm3
1495 pshuflw xmm15, xmm15, 0B1H
1496 pshufhw xmm15, xmm15, 0B1H
1497 pshuflw xmm12, xmm12, 0B1H
1498 pshufhw xmm12, xmm12, 0B1H
1499 pshuflw xmm13, xmm13, 0B1H
1500 pshufhw xmm13, xmm13, 0B1H
1501 pshuflw xmm14, xmm14, 0B1H
1502 pshufhw xmm14, xmm14, 0B1H
1503 paddd xmm10, xmm15
1504 paddd xmm11, xmm12
1505 movdqa xmm8, xmmword ptr [rsp+100H]
1506 paddd xmm8, xmm13
1507 paddd xmm9, xmm14
1508 pxor xmm5, xmm10
1509 pxor xmm6, xmm11
1510 pxor xmm7, xmm8
1511 pxor xmm4, xmm9
1512 movdqa xmmword ptr [rsp+100H], xmm8
1513 movdqa xmm8, xmm5
1514 psrld xmm8, 12
1515 pslld xmm5, 20
1516 por xmm5, xmm8
1517 movdqa xmm8, xmm6
1518 psrld xmm8, 12
1519 pslld xmm6, 20
1520 por xmm6, xmm8
1521 movdqa xmm8, xmm7
1522 psrld xmm8, 12
1523 pslld xmm7, 20
1524 por xmm7, xmm8
1525 movdqa xmm8, xmm4
1526 psrld xmm8, 12
1527 pslld xmm4, 20
1528 por xmm4, xmm8
1529 paddd xmm0, xmmword ptr [rsp+0A0H]
1530 paddd xmm1, xmmword ptr [rsp+0C0H]
1531 paddd xmm2, xmmword ptr [rsp+40H]
1532 paddd xmm3, xmmword ptr [rsp+0D0H]
1533 paddd xmm0, xmm5
1534 paddd xmm1, xmm6
1535 paddd xmm2, xmm7
1536 paddd xmm3, xmm4
1537 pxor xmm15, xmm0
1538 pxor xmm12, xmm1
1539 pxor xmm13, xmm2
1540 pxor xmm14, xmm3
1541 movdqa xmm8, xmm15
1542 psrld xmm15, 8
1543 pslld xmm8, 24
1544 pxor xmm15, xmm8
1545 movdqa xmm8, xmm12
1546 psrld xmm12, 8
1547 pslld xmm8, 24
1548 pxor xmm12, xmm8
1549 movdqa xmm8, xmm13
1550 psrld xmm13, 8
1551 pslld xmm8, 24
1552 pxor xmm13, xmm8
1553 movdqa xmm8, xmm14
1554 psrld xmm14, 8
1555 pslld xmm8, 24
1556 pxor xmm14, xmm8
1557 paddd xmm10, xmm15
1558 paddd xmm11, xmm12
1559 movdqa xmm8, xmmword ptr [rsp+100H]
1560 paddd xmm8, xmm13
1561 paddd xmm9, xmm14
1562 pxor xmm5, xmm10
1563 pxor xmm6, xmm11
1564 pxor xmm7, xmm8
1565 pxor xmm4, xmm9
1566 pxor xmm0, xmm8
1567 pxor xmm1, xmm9
1568 pxor xmm2, xmm10
1569 pxor xmm3, xmm11
1570 movdqa xmm8, xmm5
1571 psrld xmm8, 7
1572 pslld xmm5, 25
1573 por xmm5, xmm8
1574 movdqa xmm8, xmm6
1575 psrld xmm8, 7
1576 pslld xmm6, 25
1577 por xmm6, xmm8
1578 movdqa xmm8, xmm7
1579 psrld xmm8, 7
1580 pslld xmm7, 25
1581 por xmm7, xmm8
1582 movdqa xmm8, xmm4
1583 psrld xmm8, 7
1584 pslld xmm4, 25
1585 por xmm4, xmm8
1586 pxor xmm4, xmm12
1587 pxor xmm5, xmm13
1588 pxor xmm6, xmm14
1589 pxor xmm7, xmm15
1590 mov eax, r13d
1591 jne innerloop4
1592 movdqa xmm9, xmm0
1593 punpckldq xmm0, xmm1
1594 punpckhdq xmm9, xmm1
1595 movdqa xmm11, xmm2
1596 punpckldq xmm2, xmm3
1597 punpckhdq xmm11, xmm3
1598 movdqa xmm1, xmm0
1599 punpcklqdq xmm0, xmm2
1600 punpckhqdq xmm1, xmm2
1601 movdqa xmm3, xmm9
1602 punpcklqdq xmm9, xmm11
1603 punpckhqdq xmm3, xmm11
1604 movdqu xmmword ptr [rbx], xmm0
1605 movdqu xmmword ptr [rbx+20H], xmm1
1606 movdqu xmmword ptr [rbx+40H], xmm9
1607 movdqu xmmword ptr [rbx+60H], xmm3
1608 movdqa xmm9, xmm4
1609 punpckldq xmm4, xmm5
1610 punpckhdq xmm9, xmm5
1611 movdqa xmm11, xmm6
1612 punpckldq xmm6, xmm7
1613 punpckhdq xmm11, xmm7
1614 movdqa xmm5, xmm4
1615 punpcklqdq xmm4, xmm6
1616 punpckhqdq xmm5, xmm6
1617 movdqa xmm7, xmm9
1618 punpcklqdq xmm9, xmm11
1619 punpckhqdq xmm7, xmm11
1620 movdqu xmmword ptr [rbx+10H], xmm4
1621 movdqu xmmword ptr [rbx+30H], xmm5
1622 movdqu xmmword ptr [rbx+50H], xmm9
1623 movdqu xmmword ptr [rbx+70H], xmm7
1624 movdqa xmm1, xmmword ptr [rsp+110H]
1625 movdqa xmm0, xmm1
1626 paddd xmm1, xmmword ptr [rsp+150H]
1627 movdqa xmmword ptr [rsp+110H], xmm1
1628 pxor xmm0, xmmword ptr [CMP_MSB_MASK]
1629 pxor xmm1, xmmword ptr [CMP_MSB_MASK]
1630 pcmpgtd xmm0, xmm1
1631 movdqa xmm1, xmmword ptr [rsp+120H]
1632 psubd xmm1, xmm0
1633 movdqa xmmword ptr [rsp+120H], xmm1
1634 add rbx, 128
1635 add rdi, 32
1636 sub rsi, 4
1637 cmp rsi, 4
1638 jnc outerloop4
1639 test rsi, rsi
1640 jne final3blocks
1641 unwind:
1642 movdqa xmm6, xmmword ptr [rsp+170H]
1643 movdqa xmm7, xmmword ptr [rsp+180H]
1644 movdqa xmm8, xmmword ptr [rsp+190H]
1645 movdqa xmm9, xmmword ptr [rsp+1A0H]
1646 movdqa xmm10, xmmword ptr [rsp+1B0H]
1647 movdqa xmm11, xmmword ptr [rsp+1C0H]
1648 movdqa xmm12, xmmword ptr [rsp+1D0H]
1649 movdqa xmm13, xmmword ptr [rsp+1E0H]
1650 movdqa xmm14, xmmword ptr [rsp+1F0H]
1651 movdqa xmm15, xmmword ptr [rsp+200H]
1652 mov rsp, rbp
1653 pop rbp
1654 pop rbx
1655 pop rdi
1656 pop rsi
1657 pop r12
1658 pop r13
1659 pop r14
1660 pop r15
1662 ALIGN 16
1663 final3blocks:
1664 test esi, 2H
1665 je final1block
1666 movups xmm0, xmmword ptr [rcx]
1667 movups xmm1, xmmword ptr [rcx+10H]
1668 movaps xmm8, xmm0
1669 movaps xmm9, xmm1
1670 movd xmm13, dword ptr [rsp+110H]
1671 movd xmm14, dword ptr [rsp+120H]
1672 punpckldq xmm13, xmm14
1673 movaps xmmword ptr [rsp], xmm13
1674 movd xmm14, dword ptr [rsp+114H]
1675 movd xmm13, dword ptr [rsp+124H]
1676 punpckldq xmm14, xmm13
1677 movaps xmmword ptr [rsp+10H], xmm14
1678 mov r8, qword ptr [rdi]
1679 mov r9, qword ptr [rdi+8H]
1680 movzx eax, byte ptr [rbp+80H]
1681 or eax, r13d
1682 xor edx, edx
1683 innerloop2:
1684 mov r14d, eax
1685 or eax, r12d
1686 add rdx, 64
1687 cmp rdx, r15
1688 cmovne eax, r14d
1689 movaps xmm2, xmmword ptr [BLAKE3_IV]
1690 movaps xmm10, xmm2
1691 movups xmm4, xmmword ptr [r8+rdx-40H]
1692 movups xmm5, xmmword ptr [r8+rdx-30H]
1693 movaps xmm3, xmm4
1694 shufps xmm4, xmm5, 136
1695 shufps xmm3, xmm5, 221
1696 movaps xmm5, xmm3
1697 movups xmm6, xmmword ptr [r8+rdx-20H]
1698 movups xmm7, xmmword ptr [r8+rdx-10H]
1699 movaps xmm3, xmm6
1700 shufps xmm6, xmm7, 136
1701 pshufd xmm6, xmm6, 93H
1702 shufps xmm3, xmm7, 221
1703 pshufd xmm7, xmm3, 93H
1704 movups xmm12, xmmword ptr [r9+rdx-40H]
1705 movups xmm13, xmmword ptr [r9+rdx-30H]
1706 movaps xmm11, xmm12
1707 shufps xmm12, xmm13, 136
1708 shufps xmm11, xmm13, 221
1709 movaps xmm13, xmm11
1710 movups xmm14, xmmword ptr [r9+rdx-20H]
1711 movups xmm15, xmmword ptr [r9+rdx-10H]
1712 movaps xmm11, xmm14
1713 shufps xmm14, xmm15, 136
1714 pshufd xmm14, xmm14, 93H
1715 shufps xmm11, xmm15, 221
1716 pshufd xmm15, xmm11, 93H
1717 shl rax, 20H
1718 or rax, 40H
1719 movd xmm3, rax
1720 movdqa xmmword ptr [rsp+20H], xmm3
1721 movaps xmm3, xmmword ptr [rsp]
1722 movaps xmm11, xmmword ptr [rsp+10H]
1723 punpcklqdq xmm3, xmmword ptr [rsp+20H]
1724 punpcklqdq xmm11, xmmword ptr [rsp+20H]
1725 mov al, 7
1726 roundloop2:
1727 paddd xmm0, xmm4
1728 paddd xmm8, xmm12
1729 movaps xmmword ptr [rsp+20H], xmm4
1730 movaps xmmword ptr [rsp+30H], xmm12
1731 paddd xmm0, xmm1
1732 paddd xmm8, xmm9
1733 pxor xmm3, xmm0
1734 pxor xmm11, xmm8
1735 pshuflw xmm3, xmm3, 0B1H
1736 pshufhw xmm3, xmm3, 0B1H
1737 pshuflw xmm11, xmm11, 0B1H
1738 pshufhw xmm11, xmm11, 0B1H
1739 paddd xmm2, xmm3
1740 paddd xmm10, xmm11
1741 pxor xmm1, xmm2
1742 pxor xmm9, xmm10
1743 movdqa xmm4, xmm1
1744 pslld xmm1, 20
1745 psrld xmm4, 12
1746 por xmm1, xmm4
1747 movdqa xmm4, xmm9
1748 pslld xmm9, 20
1749 psrld xmm4, 12
1750 por xmm9, xmm4
1751 paddd xmm0, xmm5
1752 paddd xmm8, xmm13
1753 movaps xmmword ptr [rsp+40H], xmm5
1754 movaps xmmword ptr [rsp+50H], xmm13
1755 paddd xmm0, xmm1
1756 paddd xmm8, xmm9
1757 pxor xmm3, xmm0
1758 pxor xmm11, xmm8
1759 movdqa xmm13, xmm3
1760 psrld xmm3, 8
1761 pslld xmm13, 24
1762 pxor xmm3, xmm13
1763 movdqa xmm13, xmm11
1764 psrld xmm11, 8
1765 pslld xmm13, 24
1766 pxor xmm11, xmm13
1767 paddd xmm2, xmm3
1768 paddd xmm10, xmm11
1769 pxor xmm1, xmm2
1770 pxor xmm9, xmm10
1771 movdqa xmm4, xmm1
1772 pslld xmm1, 25
1773 psrld xmm4, 7
1774 por xmm1, xmm4
1775 movdqa xmm4, xmm9
1776 pslld xmm9, 25
1777 psrld xmm4, 7
1778 por xmm9, xmm4
1779 pshufd xmm0, xmm0, 93H
1780 pshufd xmm8, xmm8, 93H
1781 pshufd xmm3, xmm3, 4EH
1782 pshufd xmm11, xmm11, 4EH
1783 pshufd xmm2, xmm2, 39H
1784 pshufd xmm10, xmm10, 39H
1785 paddd xmm0, xmm6
1786 paddd xmm8, xmm14
1787 paddd xmm0, xmm1
1788 paddd xmm8, xmm9
1789 pxor xmm3, xmm0
1790 pxor xmm11, xmm8
1791 pshuflw xmm3, xmm3, 0B1H
1792 pshufhw xmm3, xmm3, 0B1H
1793 pshuflw xmm11, xmm11, 0B1H
1794 pshufhw xmm11, xmm11, 0B1H
1795 paddd xmm2, xmm3
1796 paddd xmm10, xmm11
1797 pxor xmm1, xmm2
1798 pxor xmm9, xmm10
1799 movdqa xmm4, xmm1
1800 pslld xmm1, 20
1801 psrld xmm4, 12
1802 por xmm1, xmm4
1803 movdqa xmm4, xmm9
1804 pslld xmm9, 20
1805 psrld xmm4, 12
1806 por xmm9, xmm4
1807 paddd xmm0, xmm7
1808 paddd xmm8, xmm15
1809 paddd xmm0, xmm1
1810 paddd xmm8, xmm9
1811 pxor xmm3, xmm0
1812 pxor xmm11, xmm8
1813 movdqa xmm13, xmm3
1814 psrld xmm3, 8
1815 pslld xmm13, 24
1816 pxor xmm3, xmm13
1817 movdqa xmm13, xmm11
1818 psrld xmm11, 8
1819 pslld xmm13, 24
1820 pxor xmm11, xmm13
1821 paddd xmm2, xmm3
1822 paddd xmm10, xmm11
1823 pxor xmm1, xmm2
1824 pxor xmm9, xmm10
1825 movdqa xmm4, xmm1
1826 pslld xmm1, 25
1827 psrld xmm4, 7
1828 por xmm1, xmm4
1829 movdqa xmm4, xmm9
1830 pslld xmm9, 25
1831 psrld xmm4, 7
1832 por xmm9, xmm4
1833 pshufd xmm0, xmm0, 39H
1834 pshufd xmm8, xmm8, 39H
1835 pshufd xmm3, xmm3, 4EH
1836 pshufd xmm11, xmm11, 4EH
1837 pshufd xmm2, xmm2, 93H
1838 pshufd xmm10, xmm10, 93H
1839 dec al
1840 je endroundloop2
1841 movdqa xmm12, xmmword ptr [rsp+20H]
1842 movdqa xmm5, xmmword ptr [rsp+40H]
1843 pshufd xmm13, xmm12, 0FH
1844 shufps xmm12, xmm5, 214
1845 pshufd xmm4, xmm12, 39H
1846 movdqa xmm12, xmm6
1847 shufps xmm12, xmm7, 250
1848 pand xmm13, xmmword ptr [PBLENDW_0x33_MASK]
1849 pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK]
1850 por xmm13, xmm12
1851 movdqa xmmword ptr [rsp+20H], xmm13
1852 movdqa xmm12, xmm7
1853 punpcklqdq xmm12, xmm5
1854 movdqa xmm13, xmm6
1855 pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK]
1856 pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK]
1857 por xmm12, xmm13
1858 pshufd xmm12, xmm12, 78H
1859 punpckhdq xmm5, xmm7
1860 punpckldq xmm6, xmm5
1861 pshufd xmm7, xmm6, 1EH
1862 movdqa xmmword ptr [rsp+40H], xmm12
1863 movdqa xmm5, xmmword ptr [rsp+30H]
1864 movdqa xmm13, xmmword ptr [rsp+50H]
1865 pshufd xmm6, xmm5, 0FH
1866 shufps xmm5, xmm13, 214
1867 pshufd xmm12, xmm5, 39H
1868 movdqa xmm5, xmm14
1869 shufps xmm5, xmm15, 250
1870 pand xmm6, xmmword ptr [PBLENDW_0x33_MASK]
1871 pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK]
1872 por xmm6, xmm5
1873 movdqa xmm5, xmm15
1874 punpcklqdq xmm5, xmm13
1875 movdqa xmmword ptr [rsp+30H], xmm2
1876 movdqa xmm2, xmm14
1877 pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK]
1878 pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK]
1879 por xmm5, xmm2
1880 movdqa xmm2, xmmword ptr [rsp+30H]
1881 pshufd xmm5, xmm5, 78H
1882 punpckhdq xmm13, xmm15
1883 punpckldq xmm14, xmm13
1884 pshufd xmm15, xmm14, 1EH
1885 movdqa xmm13, xmm6
1886 movdqa xmm14, xmm5
1887 movdqa xmm5, xmmword ptr [rsp+20H]
1888 movdqa xmm6, xmmword ptr [rsp+40H]
1889 jmp roundloop2
1890 endroundloop2:
1891 pxor xmm0, xmm2
1892 pxor xmm1, xmm3
1893 pxor xmm8, xmm10
1894 pxor xmm9, xmm11
1895 mov eax, r13d
1896 cmp rdx, r15
1897 jne innerloop2
1898 movups xmmword ptr [rbx], xmm0
1899 movups xmmword ptr [rbx+10H], xmm1
1900 movups xmmword ptr [rbx+20H], xmm8
1901 movups xmmword ptr [rbx+30H], xmm9
1902 mov eax, dword ptr [rsp+130H]
1903 neg eax
1904 mov r10d, dword ptr [rsp+110H+8*rax]
1905 mov r11d, dword ptr [rsp+120H+8*rax]
1906 mov dword ptr [rsp+110H], r10d
1907 mov dword ptr [rsp+120H], r11d
1908 add rdi, 16
1909 add rbx, 64
1910 sub rsi, 2
1911 final1block:
1912 test esi, 1H
1913 je unwind
1914 movups xmm0, xmmword ptr [rcx]
1915 movups xmm1, xmmword ptr [rcx+10H]
1916 movd xmm13, dword ptr [rsp+110H]
1917 movd xmm14, dword ptr [rsp+120H]
1918 punpckldq xmm13, xmm14
1919 mov r8, qword ptr [rdi]
1920 movzx eax, byte ptr [rbp+80H]
1921 or eax, r13d
1922 xor edx, edx
1923 innerloop1:
1924 mov r14d, eax
1925 or eax, r12d
1926 add rdx, 64
1927 cmp rdx, r15
1928 cmovne eax, r14d
1929 movaps xmm2, xmmword ptr [BLAKE3_IV]
1930 shl rax, 32
1931 or rax, 64
1932 movd xmm12, rax
1933 movdqa xmm3, xmm13
1934 punpcklqdq xmm3, xmm12
1935 movups xmm4, xmmword ptr [r8+rdx-40H]
1936 movups xmm5, xmmword ptr [r8+rdx-30H]
1937 movaps xmm8, xmm4
1938 shufps xmm4, xmm5, 136
1939 shufps xmm8, xmm5, 221
1940 movaps xmm5, xmm8
1941 movups xmm6, xmmword ptr [r8+rdx-20H]
1942 movups xmm7, xmmword ptr [r8+rdx-10H]
1943 movaps xmm8, xmm6
1944 shufps xmm6, xmm7, 136
1945 pshufd xmm6, xmm6, 93H
1946 shufps xmm8, xmm7, 221
1947 pshufd xmm7, xmm8, 93H
1948 mov al, 7
1949 roundloop1:
1950 paddd xmm0, xmm4
1951 paddd xmm0, xmm1
1952 pxor xmm3, xmm0
1953 pshuflw xmm3, xmm3, 0B1H
1954 pshufhw xmm3, xmm3, 0B1H
1955 paddd xmm2, xmm3
1956 pxor xmm1, xmm2
1957 movdqa xmm11, xmm1
1958 pslld xmm1, 20
1959 psrld xmm11, 12
1960 por xmm1, xmm11
1961 paddd xmm0, xmm5
1962 paddd xmm0, xmm1
1963 pxor xmm3, xmm0
1964 movdqa xmm14, xmm3
1965 psrld xmm3, 8
1966 pslld xmm14, 24
1967 pxor xmm3, xmm14
1968 paddd xmm2, xmm3
1969 pxor xmm1, xmm2
1970 movdqa xmm11, xmm1
1971 pslld xmm1, 25
1972 psrld xmm11, 7
1973 por xmm1, xmm11
1974 pshufd xmm0, xmm0, 93H
1975 pshufd xmm3, xmm3, 4EH
1976 pshufd xmm2, xmm2, 39H
1977 paddd xmm0, xmm6
1978 paddd xmm0, xmm1
1979 pxor xmm3, xmm0
1980 pshuflw xmm3, xmm3, 0B1H
1981 pshufhw xmm3, xmm3, 0B1H
1982 paddd xmm2, xmm3
1983 pxor xmm1, xmm2
1984 movdqa xmm11, xmm1
1985 pslld xmm1, 20
1986 psrld xmm11, 12
1987 por xmm1, xmm11
1988 paddd xmm0, xmm7
1989 paddd xmm0, xmm1
1990 pxor xmm3, xmm0
1991 movdqa xmm14, xmm3
1992 psrld xmm3, 8
1993 pslld xmm14, 24
1994 pxor xmm3, xmm14
1995 paddd xmm2, xmm3
1996 pxor xmm1, xmm2
1997 movdqa xmm11, xmm1
1998 pslld xmm1, 25
1999 psrld xmm11, 7
2000 por xmm1, xmm11
2001 pshufd xmm0, xmm0, 39H
2002 pshufd xmm3, xmm3, 4EH
2003 pshufd xmm2, xmm2, 93H
2004 dec al
2005 jz endroundloop1
2006 movdqa xmm8, xmm4
2007 shufps xmm8, xmm5, 214
2008 pshufd xmm9, xmm4, 0FH
2009 pshufd xmm4, xmm8, 39H
2010 movdqa xmm8, xmm6
2011 shufps xmm8, xmm7, 250
2012 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK]
2013 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK]
2014 por xmm9, xmm8
2015 movdqa xmm8, xmm7
2016 punpcklqdq xmm8, xmm5
2017 movdqa xmm10, xmm6
2018 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
2019 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK]
2020 por xmm8, xmm10
2021 pshufd xmm8, xmm8, 78H
2022 punpckhdq xmm5, xmm7
2023 punpckldq xmm6, xmm5
2024 pshufd xmm7, xmm6, 1EH
2025 movdqa xmm5, xmm9
2026 movdqa xmm6, xmm8
2027 jmp roundloop1
2028 endroundloop1:
2029 pxor xmm0, xmm2
2030 pxor xmm1, xmm3
2031 mov eax, r13d
2032 cmp rdx, r15
2033 jne innerloop1
2034 movups xmmword ptr [rbx], xmm0
2035 movups xmmword ptr [rbx+10H], xmm1
2036 jmp unwind
2037 _llvm_blake3_hash_many_sse2 ENDP
2038 llvm_blake3_hash_many_sse2 ENDP
2040 llvm_blake3_compress_in_place_sse2 PROC
2041 _llvm_blake3_compress_in_place_sse2 PROC
2042 sub rsp, 120
2043 movdqa xmmword ptr [rsp], xmm6
2044 movdqa xmmword ptr [rsp+10H], xmm7
2045 movdqa xmmword ptr [rsp+20H], xmm8
2046 movdqa xmmword ptr [rsp+30H], xmm9
2047 movdqa xmmword ptr [rsp+40H], xmm11
2048 movdqa xmmword ptr [rsp+50H], xmm14
2049 movdqa xmmword ptr [rsp+60H], xmm15
2050 movups xmm0, xmmword ptr [rcx]
2051 movups xmm1, xmmword ptr [rcx+10H]
2052 movaps xmm2, xmmword ptr [BLAKE3_IV]
2053 movzx eax, byte ptr [rsp+0A0H]
2054 movzx r8d, r8b
2055 shl rax, 32
2056 add r8, rax
2057 movd xmm3, r9
2058 movd xmm4, r8
2059 punpcklqdq xmm3, xmm4
2060 movups xmm4, xmmword ptr [rdx]
2061 movups xmm5, xmmword ptr [rdx+10H]
2062 movaps xmm8, xmm4
2063 shufps xmm4, xmm5, 136
2064 shufps xmm8, xmm5, 221
2065 movaps xmm5, xmm8
2066 movups xmm6, xmmword ptr [rdx+20H]
2067 movups xmm7, xmmword ptr [rdx+30H]
2068 movaps xmm8, xmm6
2069 shufps xmm6, xmm7, 136
2070 pshufd xmm6, xmm6, 93H
2071 shufps xmm8, xmm7, 221
2072 pshufd xmm7, xmm8, 93H
2073 mov al, 7
2075 paddd xmm0, xmm4
2076 paddd xmm0, xmm1
2077 pxor xmm3, xmm0
2078 pshuflw xmm3, xmm3, 0B1H
2079 pshufhw xmm3, xmm3, 0B1H
2080 paddd xmm2, xmm3
2081 pxor xmm1, xmm2
2082 movdqa xmm11, xmm1
2083 pslld xmm1, 20
2084 psrld xmm11, 12
2085 por xmm1, xmm11
2086 paddd xmm0, xmm5
2087 paddd xmm0, xmm1
2088 pxor xmm3, xmm0
2089 movdqa xmm14, xmm3
2090 psrld xmm3, 8
2091 pslld xmm14, 24
2092 pxor xmm3, xmm14
2093 paddd xmm2, xmm3
2094 pxor xmm1, xmm2
2095 movdqa xmm11, xmm1
2096 pslld xmm1, 25
2097 psrld xmm11, 7
2098 por xmm1, xmm11
2099 pshufd xmm0, xmm0, 93H
2100 pshufd xmm3, xmm3, 4EH
2101 pshufd xmm2, xmm2, 39H
2102 paddd xmm0, xmm6
2103 paddd xmm0, xmm1
2104 pxor xmm3, xmm0
2105 pshuflw xmm3, xmm3, 0B1H
2106 pshufhw xmm3, xmm3, 0B1H
2107 paddd xmm2, xmm3
2108 pxor xmm1, xmm2
2109 movdqa xmm11, xmm1
2110 pslld xmm1, 20
2111 psrld xmm11, 12
2112 por xmm1, xmm11
2113 paddd xmm0, xmm7
2114 paddd xmm0, xmm1
2115 pxor xmm3, xmm0
2116 movdqa xmm14, xmm3
2117 psrld xmm3, 8
2118 pslld xmm14, 24
2119 pxor xmm3, xmm14
2120 paddd xmm2, xmm3
2121 pxor xmm1, xmm2
2122 movdqa xmm11, xmm1
2123 pslld xmm1, 25
2124 psrld xmm11, 7
2125 por xmm1, xmm11
2126 pshufd xmm0, xmm0, 39H
2127 pshufd xmm3, xmm3, 4EH
2128 pshufd xmm2, xmm2, 93H
2129 dec al
2130 jz @F
2131 movdqa xmm8, xmm4
2132 shufps xmm8, xmm5, 214
2133 pshufd xmm9, xmm4, 0FH
2134 pshufd xmm4, xmm8, 39H
2135 movdqa xmm8, xmm6
2136 shufps xmm8, xmm7, 250
2137 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK]
2138 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK]
2139 por xmm9, xmm8
2140 movdqa xmm8, xmm7
2141 punpcklqdq xmm8, xmm5
2142 movdqa xmm14, xmm6
2143 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
2144 pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
2145 por xmm8, xmm14
2146 pshufd xmm8, xmm8, 78H
2147 punpckhdq xmm5, xmm7
2148 punpckldq xmm6, xmm5
2149 pshufd xmm7, xmm6, 1EH
2150 movdqa xmm5, xmm9
2151 movdqa xmm6, xmm8
2152 jmp @B
2154 pxor xmm0, xmm2
2155 pxor xmm1, xmm3
2156 movups xmmword ptr [rcx], xmm0
2157 movups xmmword ptr [rcx+10H], xmm1
2158 movdqa xmm6, xmmword ptr [rsp]
2159 movdqa xmm7, xmmword ptr [rsp+10H]
2160 movdqa xmm8, xmmword ptr [rsp+20H]
2161 movdqa xmm9, xmmword ptr [rsp+30H]
2162 movdqa xmm11, xmmword ptr [rsp+40H]
2163 movdqa xmm14, xmmword ptr [rsp+50H]
2164 movdqa xmm15, xmmword ptr [rsp+60H]
2165 add rsp, 120
2167 _llvm_blake3_compress_in_place_sse2 ENDP
2168 llvm_blake3_compress_in_place_sse2 ENDP
2170 ALIGN 16
2171 llvm_blake3_compress_xof_sse2 PROC
2172 _llvm_blake3_compress_xof_sse2 PROC
2173 sub rsp, 120
2174 movdqa xmmword ptr [rsp], xmm6
2175 movdqa xmmword ptr [rsp+10H], xmm7
2176 movdqa xmmword ptr [rsp+20H], xmm8
2177 movdqa xmmword ptr [rsp+30H], xmm9
2178 movdqa xmmword ptr [rsp+40H], xmm11
2179 movdqa xmmword ptr [rsp+50H], xmm14
2180 movdqa xmmword ptr [rsp+60H], xmm15
2181 movups xmm0, xmmword ptr [rcx]
2182 movups xmm1, xmmword ptr [rcx+10H]
2183 movaps xmm2, xmmword ptr [BLAKE3_IV]
2184 movzx eax, byte ptr [rsp+0A0H]
2185 movzx r8d, r8b
2186 mov r10, qword ptr [rsp+0A8H]
2187 shl rax, 32
2188 add r8, rax
2189 movd xmm3, r9
2190 movd xmm4, r8
2191 punpcklqdq xmm3, xmm4
2192 movups xmm4, xmmword ptr [rdx]
2193 movups xmm5, xmmword ptr [rdx+10H]
2194 movaps xmm8, xmm4
2195 shufps xmm4, xmm5, 136
2196 shufps xmm8, xmm5, 221
2197 movaps xmm5, xmm8
2198 movups xmm6, xmmword ptr [rdx+20H]
2199 movups xmm7, xmmword ptr [rdx+30H]
2200 movaps xmm8, xmm6
2201 shufps xmm6, xmm7, 136
2202 pshufd xmm6, xmm6, 93H
2203 shufps xmm8, xmm7, 221
2204 pshufd xmm7, xmm8, 93H
2205 mov al, 7
2207 paddd xmm0, xmm4
2208 paddd xmm0, xmm1
2209 pxor xmm3, xmm0
2210 pshuflw xmm3, xmm3, 0B1H
2211 pshufhw xmm3, xmm3, 0B1H
2212 paddd xmm2, xmm3
2213 pxor xmm1, xmm2
2214 movdqa xmm11, xmm1
2215 pslld xmm1, 20
2216 psrld xmm11, 12
2217 por xmm1, xmm11
2218 paddd xmm0, xmm5
2219 paddd xmm0, xmm1
2220 pxor xmm3, xmm0
2221 movdqa xmm14, xmm3
2222 psrld xmm3, 8
2223 pslld xmm14, 24
2224 pxor xmm3, xmm14
2225 paddd xmm2, xmm3
2226 pxor xmm1, xmm2
2227 movdqa xmm11, xmm1
2228 pslld xmm1, 25
2229 psrld xmm11, 7
2230 por xmm1, xmm11
2231 pshufd xmm0, xmm0, 93H
2232 pshufd xmm3, xmm3, 4EH
2233 pshufd xmm2, xmm2, 39H
2234 paddd xmm0, xmm6
2235 paddd xmm0, xmm1
2236 pxor xmm3, xmm0
2237 pshuflw xmm3, xmm3, 0B1H
2238 pshufhw xmm3, xmm3, 0B1H
2239 paddd xmm2, xmm3
2240 pxor xmm1, xmm2
2241 movdqa xmm11, xmm1
2242 pslld xmm1, 20
2243 psrld xmm11, 12
2244 por xmm1, xmm11
2245 paddd xmm0, xmm7
2246 paddd xmm0, xmm1
2247 pxor xmm3, xmm0
2248 movdqa xmm14, xmm3
2249 psrld xmm3, 8
2250 pslld xmm14, 24
2251 pxor xmm3, xmm14
2252 paddd xmm2, xmm3
2253 pxor xmm1, xmm2
2254 movdqa xmm11, xmm1
2255 pslld xmm1, 25
2256 psrld xmm11, 7
2257 por xmm1, xmm11
2258 pshufd xmm0, xmm0, 39H
2259 pshufd xmm3, xmm3, 4EH
2260 pshufd xmm2, xmm2, 93H
2261 dec al
2262 jz @F
2263 movdqa xmm8, xmm4
2264 shufps xmm8, xmm5, 214
2265 pshufd xmm9, xmm4, 0FH
2266 pshufd xmm4, xmm8, 39H
2267 movdqa xmm8, xmm6
2268 shufps xmm8, xmm7, 250
2269 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK]
2270 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK]
2271 por xmm9, xmm8
2272 movdqa xmm8, xmm7
2273 punpcklqdq xmm8, xmm5
2274 movdqa xmm14, xmm6
2275 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
2276 pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
2277 por xmm8, xmm14
2278 pshufd xmm8, xmm8, 78H
2279 punpckhdq xmm5, xmm7
2280 punpckldq xmm6, xmm5
2281 pshufd xmm7, xmm6, 1EH
2282 movdqa xmm5, xmm9
2283 movdqa xmm6, xmm8
2284 jmp @B
2286 movdqu xmm4, xmmword ptr [rcx]
2287 movdqu xmm5, xmmword ptr [rcx+10H]
2288 pxor xmm0, xmm2
2289 pxor xmm1, xmm3
2290 pxor xmm2, xmm4
2291 pxor xmm3, xmm5
2292 movups xmmword ptr [r10], xmm0
2293 movups xmmword ptr [r10+10H], xmm1
2294 movups xmmword ptr [r10+20H], xmm2
2295 movups xmmword ptr [r10+30H], xmm3
2296 movdqa xmm6, xmmword ptr [rsp]
2297 movdqa xmm7, xmmword ptr [rsp+10H]
2298 movdqa xmm8, xmmword ptr [rsp+20H]
2299 movdqa xmm9, xmmword ptr [rsp+30H]
2300 movdqa xmm11, xmmword ptr [rsp+40H]
2301 movdqa xmm14, xmmword ptr [rsp+50H]
2302 movdqa xmm15, xmmword ptr [rsp+60H]
2303 add rsp, 120
2305 _llvm_blake3_compress_xof_sse2 ENDP
2306 llvm_blake3_compress_xof_sse2 ENDP
2308 _TEXT ENDS
2311 _RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
2312 ALIGN 64
2313 BLAKE3_IV:
2314 dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH
2316 ADD0:
2317 dd 0, 1, 2, 3
2319 ADD1:
2320 dd 4 dup (4)
2322 BLAKE3_IV_0:
2323 dd 4 dup (6A09E667H)
2325 BLAKE3_IV_1:
2326 dd 4 dup (0BB67AE85H)
2328 BLAKE3_IV_2:
2329 dd 4 dup (3C6EF372H)
2331 BLAKE3_IV_3:
2332 dd 4 dup (0A54FF53AH)
2334 BLAKE3_BLOCK_LEN:
2335 dd 4 dup (64)
2337 CMP_MSB_MASK:
2338 dd 8 dup(80000000H)
2340 PBLENDW_0x33_MASK:
2341 dd 0FFFFFFFFH, 000000000H, 0FFFFFFFFH, 000000000H
2342 PBLENDW_0xCC_MASK:
2343 dd 000000000H, 0FFFFFFFFH, 000000000H, 0FFFFFFFFH
2344 PBLENDW_0x3F_MASK:
2345 dd 0FFFFFFFFH, 0FFFFFFFFH, 0FFFFFFFFH, 000000000H
2346 PBLENDW_0xC0_MASK:
2347 dd 000000000H, 000000000H, 000000000H, 0FFFFFFFFH
2349 _RDATA ENDS