freebsd: Use compiler.h from FreeBSD's base's linuxkpi
[zfs.git] / module / icp / asm-aarch64 / blake3 / b3_aarch64_sse2.S
blobfefebf08116e588173eb2c72e23c9b7aa7947676
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
23  * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24  * Copyright (c) 2019-2022 Samuel Neves and Matthew Krupcale
25  * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de>
26  *
27  * This is converted assembly: SSE2 -> ARMv8-A
28  * Used tools: SIMDe https://github.com/simd-everywhere/simde
29  *
30  * Should work on FreeBSD, Linux and macOS
31  * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh
32  */
34 #if defined(__aarch64__)
36 /* make gcc <= 9 happy */
37 #if !defined(LD_VERSION) || LD_VERSION >= 233010000
38 #define CFI_NEGATE_RA_STATE .cfi_negate_ra_state
39 #else
40 #define CFI_NEGATE_RA_STATE
41 #endif
43         .text
44         .section        .note.gnu.property,"a",@note
45         .p2align        3
46         .word   4
47         .word   16
48         .word   5
49         .asciz  "GNU"
50         .word   3221225472
51         .word   4
52         .word   3
53         .word   0
54 .Lsec_end0:
55         .text
56         .globl  zfs_blake3_compress_in_place_sse2
57         .p2align        2
58         .type   zfs_blake3_compress_in_place_sse2,@function
59 zfs_blake3_compress_in_place_sse2:
60         .cfi_startproc
61         hint    #25
62         CFI_NEGATE_RA_STATE
63         sub     sp, sp, #96
64         stp     x29, x30, [sp, #64]
65         add     x29, sp, #64
66         str     x19, [sp, #80]
67         .cfi_def_cfa w29, 32
68         .cfi_offset w19, -16
69         .cfi_offset w30, -24
70         .cfi_offset w29, -32
71         mov     x19, x0
72         mov     w5, w4
73         mov     x4, x3
74         mov     w3, w2
75         mov     x2, x1
76         mov     x0, sp
77         mov     x1, x19
78         bl      compress_pre
79         ldp     q0, q1, [sp]
80         ldp     q2, q3, [sp, #32]
81         eor     v0.16b, v2.16b, v0.16b
82         eor     v1.16b, v3.16b, v1.16b
83         ldp     x29, x30, [sp, #64]
84         stp     q0, q1, [x19]
85         ldr     x19, [sp, #80]
86         add     sp, sp, #96
87         hint    #29
88         ret
89 .Lfunc_end0:
90         .size   zfs_blake3_compress_in_place_sse2, .Lfunc_end0-zfs_blake3_compress_in_place_sse2
91         .cfi_endproc
93         .section        .rodata.cst16,"aM",@progbits,16
94         .p2align        4
95 .LCPI1_0:
96         .xword  -4942790177982912921
97         .xword  -6534734903820487822
98         .text
99         .p2align        2
100         .type   compress_pre,@function
101 compress_pre:
102         .cfi_startproc
103         hint    #34
104         fmov    s1, w3
105         movi    d0, #0x0000ff000000ff
106         ldr     q2, [x1]
107         fmov    d3, x4
108         adrp    x8, .LCPI1_0
109         mov     v1.s[1], w5
110         str     q2, [x0]
111         ldr     q4, [x8, :lo12:.LCPI1_0]
112         add     x8, x2, #32
113         ldr     q5, [x1, #16]
114         and     v0.8b, v1.8b, v0.8b
115         stp     q5, q4, [x0, #16]
116         mov     v3.d[1], v0.d[0]
117         str     q3, [x0, #48]
118         ldp     q0, q6, [x2]
119         uzp1    v1.4s, v0.4s, v6.4s
120         uzp2    v0.4s, v0.4s, v6.4s
121         add     v2.4s, v2.4s, v1.4s
122         uzp1    v18.4s, v1.4s, v1.4s
123         add     v2.4s, v2.4s, v5.4s
124         eor     v3.16b, v2.16b, v3.16b
125         add     v2.4s, v2.4s, v0.4s
126         rev32   v3.8h, v3.8h
127         add     v4.4s, v3.4s, v4.4s
128         eor     v5.16b, v4.16b, v5.16b
129         ushr    v6.4s, v5.4s, #12
130         shl     v5.4s, v5.4s, #20
131         orr     v5.16b, v5.16b, v6.16b
132         add     v2.4s, v2.4s, v5.4s
133         eor     v3.16b, v2.16b, v3.16b
134         ushr    v6.4s, v3.4s, #8
135         shl     v3.4s, v3.4s, #24
136         orr     v3.16b, v3.16b, v6.16b
137         ld2     { v6.4s, v7.4s }, [x8]
138         add     v4.4s, v3.4s, v4.4s
139         ext     v3.16b, v3.16b, v3.16b, #8
140         add     v2.4s, v2.4s, v6.4s
141         eor     v5.16b, v4.16b, v5.16b
142         ext     v4.16b, v4.16b, v4.16b, #4
143         ext     v6.16b, v6.16b, v6.16b, #12
144         ext     v2.16b, v2.16b, v2.16b, #12
145         ushr    v16.4s, v5.4s, #7
146         shl     v5.4s, v5.4s, #25
147         orr     v5.16b, v5.16b, v16.16b
148         ext     v16.16b, v7.16b, v7.16b, #12
149         add     v2.4s, v2.4s, v5.4s
150         mov     v7.16b, v16.16b
151         eor     v3.16b, v3.16b, v2.16b
152         add     v2.4s, v2.4s, v16.4s
153         mov     v7.s[1], v6.s[2]
154         rev32   v3.8h, v3.8h
155         add     v4.4s, v4.4s, v3.4s
156         eor     v5.16b, v4.16b, v5.16b
157         ushr    v17.4s, v5.4s, #12
158         shl     v5.4s, v5.4s, #20
159         orr     v5.16b, v5.16b, v17.16b
160         add     v2.4s, v2.4s, v5.4s
161         eor     v3.16b, v2.16b, v3.16b
162         ushr    v17.4s, v3.4s, #8
163         shl     v3.4s, v3.4s, #24
164         orr     v3.16b, v3.16b, v17.16b
165         ext     v17.16b, v18.16b, v1.16b, #8
166         add     v4.4s, v3.4s, v4.4s
167         uzp2    v17.4s, v17.4s, v0.4s
168         ext     v3.16b, v3.16b, v3.16b, #8
169         eor     v5.16b, v4.16b, v5.16b
170         add     v2.4s, v2.4s, v17.4s
171         ext     v4.16b, v4.16b, v4.16b, #12
172         ushr    v18.4s, v5.4s, #7
173         shl     v5.4s, v5.4s, #25
174         ext     v2.16b, v2.16b, v2.16b, #4
175         orr     v5.16b, v5.16b, v18.16b
176         ext     v18.16b, v1.16b, v1.16b, #12
177         add     v2.4s, v2.4s, v5.4s
178         ext     v1.16b, v1.16b, v18.16b, #12
179         zip1    v18.2d, v16.2d, v0.2d
180         zip2    v0.4s, v0.4s, v16.4s
181         eor     v3.16b, v3.16b, v2.16b
182         rev64   v1.4s, v1.4s
183         mov     v18.s[3], v6.s[3]
184         zip1    v16.4s, v0.4s, v6.4s
185         rev32   v3.8h, v3.8h
186         trn2    v1.4s, v1.4s, v7.4s
187         zip1    v0.4s, v6.4s, v0.4s
188         add     v4.4s, v4.4s, v3.4s
189         add     v2.4s, v2.4s, v1.4s
190         ext     v6.16b, v0.16b, v16.16b, #8
191         eor     v5.16b, v4.16b, v5.16b
192         ushr    v7.4s, v5.4s, #12
193         shl     v5.4s, v5.4s, #20
194         orr     v5.16b, v5.16b, v7.16b
195         add     v7.4s, v2.4s, v5.4s
196         eor     v2.16b, v7.16b, v3.16b
197         ext     v7.16b, v7.16b, v7.16b, #12
198         ushr    v3.4s, v2.4s, #8
199         shl     v2.4s, v2.4s, #24
200         orr     v3.16b, v2.16b, v3.16b
201         ext     v2.16b, v18.16b, v18.16b, #12
202         add     v4.4s, v3.4s, v4.4s
203         uzp1    v2.4s, v18.4s, v2.4s
204         ext     v3.16b, v3.16b, v3.16b, #8
205         eor     v5.16b, v4.16b, v5.16b
206         add     v7.4s, v7.4s, v2.4s
207         ext     v4.16b, v4.16b, v4.16b, #4
208         ushr    v18.4s, v5.4s, #7
209         shl     v5.4s, v5.4s, #25
210         orr     v5.16b, v5.16b, v18.16b
211         add     v7.4s, v7.4s, v5.4s
212         eor     v3.16b, v3.16b, v7.16b
213         add     v7.4s, v7.4s, v6.4s
214         rev32   v3.8h, v3.8h
215         add     v4.4s, v4.4s, v3.4s
216         eor     v5.16b, v4.16b, v5.16b
217         ushr    v0.4s, v5.4s, #12
218         shl     v5.4s, v5.4s, #20
219         orr     v0.16b, v5.16b, v0.16b
220         add     v5.4s, v7.4s, v0.4s
221         ext     v7.16b, v17.16b, v17.16b, #4
222         eor     v3.16b, v5.16b, v3.16b
223         uzp1    v17.4s, v7.4s, v7.4s
224         ushr    v16.4s, v3.4s, #8
225         shl     v3.4s, v3.4s, #24
226         orr     v3.16b, v3.16b, v16.16b
227         ext     v16.16b, v17.16b, v7.16b, #8
228         add     v4.4s, v3.4s, v4.4s
229         uzp2    v16.4s, v16.4s, v1.4s
230         ext     v3.16b, v3.16b, v3.16b, #8
231         eor     v0.16b, v4.16b, v0.16b
232         add     v5.4s, v5.4s, v16.4s
233         ext     v4.16b, v4.16b, v4.16b, #12
234         ushr    v17.4s, v0.4s, #7
235         shl     v0.4s, v0.4s, #25
236         ext     v5.16b, v5.16b, v5.16b, #4
237         orr     v0.16b, v0.16b, v17.16b
238         ext     v17.16b, v7.16b, v7.16b, #12
239         add     v5.4s, v5.4s, v0.4s
240         ext     v7.16b, v7.16b, v17.16b, #12
241         mov     v17.16b, v6.16b
242         eor     v3.16b, v3.16b, v5.16b
243         rev64   v7.4s, v7.4s
244         mov     v17.s[1], v2.s[2]
245         rev32   v3.8h, v3.8h
246         add     v4.4s, v4.4s, v3.4s
247         eor     v18.16b, v4.16b, v0.16b
248         trn2    v0.4s, v7.4s, v17.4s
249         ushr    v7.4s, v18.4s, #12
250         shl     v17.4s, v18.4s, #20
251         add     v5.4s, v5.4s, v0.4s
252         zip1    v18.2d, v6.2d, v1.2d
253         zip2    v1.4s, v1.4s, v6.4s
254         orr     v7.16b, v17.16b, v7.16b
255         mov     v18.s[3], v2.s[3]
256         zip1    v6.4s, v1.4s, v2.4s
257         add     v5.4s, v5.4s, v7.4s
258         zip1    v1.4s, v2.4s, v1.4s
259         eor     v3.16b, v5.16b, v3.16b
260         ext     v5.16b, v5.16b, v5.16b, #12
261         ext     v6.16b, v1.16b, v6.16b, #8
262         ushr    v17.4s, v3.4s, #8
263         shl     v3.4s, v3.4s, #24
264         orr     v17.16b, v3.16b, v17.16b
265         ext     v3.16b, v18.16b, v18.16b, #12
266         add     v4.4s, v17.4s, v4.4s
267         uzp1    v3.4s, v18.4s, v3.4s
268         ext     v17.16b, v17.16b, v17.16b, #8
269         eor     v7.16b, v4.16b, v7.16b
270         add     v5.4s, v5.4s, v3.4s
271         ext     v4.16b, v4.16b, v4.16b, #4
272         ushr    v18.4s, v7.4s, #7
273         shl     v7.4s, v7.4s, #25
274         orr     v7.16b, v7.16b, v18.16b
275         add     v5.4s, v5.4s, v7.4s
276         eor     v17.16b, v17.16b, v5.16b
277         add     v5.4s, v5.4s, v6.4s
278         rev32   v17.8h, v17.8h
279         add     v4.4s, v4.4s, v17.4s
280         eor     v2.16b, v4.16b, v7.16b
281         ext     v7.16b, v16.16b, v16.16b, #4
282         ushr    v1.4s, v2.4s, #12
283         shl     v2.4s, v2.4s, #20
284         orr     v1.16b, v2.16b, v1.16b
285         add     v2.4s, v5.4s, v1.4s
286         eor     v5.16b, v2.16b, v17.16b
287         uzp1    v17.4s, v7.4s, v7.4s
288         ushr    v16.4s, v5.4s, #8
289         shl     v5.4s, v5.4s, #24
290         orr     v5.16b, v5.16b, v16.16b
291         ext     v16.16b, v17.16b, v7.16b, #8
292         add     v4.4s, v5.4s, v4.4s
293         uzp2    v16.4s, v16.4s, v0.4s
294         ext     v5.16b, v5.16b, v5.16b, #8
295         eor     v1.16b, v4.16b, v1.16b
296         add     v2.4s, v2.4s, v16.4s
297         ext     v4.16b, v4.16b, v4.16b, #12
298         ushr    v17.4s, v1.4s, #7
299         shl     v1.4s, v1.4s, #25
300         ext     v2.16b, v2.16b, v2.16b, #4
301         orr     v1.16b, v1.16b, v17.16b
302         ext     v17.16b, v7.16b, v7.16b, #12
303         add     v2.4s, v2.4s, v1.4s
304         ext     v7.16b, v7.16b, v17.16b, #12
305         mov     v17.16b, v6.16b
306         eor     v5.16b, v5.16b, v2.16b
307         rev64   v7.4s, v7.4s
308         mov     v17.s[1], v3.s[2]
309         rev32   v5.8h, v5.8h
310         add     v4.4s, v4.4s, v5.4s
311         eor     v18.16b, v4.16b, v1.16b
312         trn2    v1.4s, v7.4s, v17.4s
313         ushr    v7.4s, v18.4s, #12
314         shl     v17.4s, v18.4s, #20
315         add     v2.4s, v2.4s, v1.4s
316         zip1    v18.2d, v6.2d, v0.2d
317         zip2    v0.4s, v0.4s, v6.4s
318         orr     v7.16b, v17.16b, v7.16b
319         mov     v18.s[3], v3.s[3]
320         add     v2.4s, v2.4s, v7.4s
321         eor     v5.16b, v2.16b, v5.16b
322         ext     v2.16b, v2.16b, v2.16b, #12
323         ushr    v17.4s, v5.4s, #8
324         shl     v5.4s, v5.4s, #24
325         orr     v5.16b, v5.16b, v17.16b
326         add     v17.4s, v5.4s, v4.4s
327         ext     v4.16b, v18.16b, v18.16b, #12
328         ext     v5.16b, v5.16b, v5.16b, #8
329         eor     v7.16b, v17.16b, v7.16b
330         uzp1    v4.4s, v18.4s, v4.4s
331         ext     v17.16b, v17.16b, v17.16b, #4
332         ushr    v18.4s, v7.4s, #7
333         shl     v7.4s, v7.4s, #25
334         add     v2.4s, v2.4s, v4.4s
335         orr     v7.16b, v7.16b, v18.16b
336         add     v2.4s, v2.4s, v7.4s
337         eor     v5.16b, v5.16b, v2.16b
338         rev32   v5.8h, v5.8h
339         add     v6.4s, v17.4s, v5.4s
340         zip1    v17.4s, v0.4s, v3.4s
341         zip1    v0.4s, v3.4s, v0.4s
342         eor     v3.16b, v6.16b, v7.16b
343         ext     v0.16b, v0.16b, v17.16b, #8
344         ushr    v7.4s, v3.4s, #12
345         shl     v3.4s, v3.4s, #20
346         add     v2.4s, v2.4s, v0.4s
347         orr     v3.16b, v3.16b, v7.16b
348         ext     v7.16b, v16.16b, v16.16b, #4
349         add     v2.4s, v2.4s, v3.4s
350         uzp1    v17.4s, v7.4s, v7.4s
351         eor     v5.16b, v2.16b, v5.16b
352         ushr    v16.4s, v5.4s, #8
353         shl     v5.4s, v5.4s, #24
354         orr     v5.16b, v5.16b, v16.16b
355         ext     v16.16b, v17.16b, v7.16b, #8
356         add     v6.4s, v5.4s, v6.4s
357         uzp2    v16.4s, v16.4s, v1.4s
358         ext     v5.16b, v5.16b, v5.16b, #8
359         eor     v3.16b, v6.16b, v3.16b
360         add     v2.4s, v2.4s, v16.4s
361         ext     v6.16b, v6.16b, v6.16b, #12
362         ushr    v17.4s, v3.4s, #7
363         shl     v3.4s, v3.4s, #25
364         ext     v2.16b, v2.16b, v2.16b, #4
365         orr     v3.16b, v3.16b, v17.16b
366         add     v17.4s, v2.4s, v3.4s
367         eor     v2.16b, v5.16b, v17.16b
368         ext     v5.16b, v7.16b, v7.16b, #12
369         rev32   v18.8h, v2.8h
370         ext     v2.16b, v7.16b, v5.16b, #12
371         mov     v5.16b, v0.16b
372         add     v6.4s, v6.4s, v18.4s
373         rev64   v2.4s, v2.4s
374         mov     v5.s[1], v4.s[2]
375         eor     v3.16b, v6.16b, v3.16b
376         trn2    v2.4s, v2.4s, v5.4s
377         ushr    v5.4s, v3.4s, #12
378         shl     v3.4s, v3.4s, #20
379         add     v7.4s, v17.4s, v2.4s
380         orr     v3.16b, v3.16b, v5.16b
381         add     v5.4s, v7.4s, v3.4s
382         eor     v7.16b, v5.16b, v18.16b
383         zip1    v18.2d, v0.2d, v1.2d
384         ext     v5.16b, v5.16b, v5.16b, #12
385         zip2    v0.4s, v1.4s, v0.4s
386         ushr    v17.4s, v7.4s, #8
387         shl     v7.4s, v7.4s, #24
388         mov     v18.s[3], v4.s[3]
389         orr     v7.16b, v7.16b, v17.16b
390         ext     v17.16b, v18.16b, v18.16b, #12
391         add     v6.4s, v7.4s, v6.4s
392         ext     v7.16b, v7.16b, v7.16b, #8
393         eor     v19.16b, v6.16b, v3.16b
394         uzp1    v3.4s, v18.4s, v17.4s
395         ext     v6.16b, v6.16b, v6.16b, #4
396         ushr    v17.4s, v19.4s, #7
397         shl     v18.4s, v19.4s, #25
398         add     v5.4s, v5.4s, v3.4s
399         orr     v17.16b, v18.16b, v17.16b
400         add     v5.4s, v5.4s, v17.4s
401         eor     v7.16b, v7.16b, v5.16b
402         rev32   v7.8h, v7.8h
403         add     v1.4s, v6.4s, v7.4s
404         zip1    v6.4s, v0.4s, v4.4s
405         zip1    v0.4s, v4.4s, v0.4s
406         eor     v4.16b, v1.16b, v17.16b
407         ext     v6.16b, v0.16b, v6.16b, #8
408         ushr    v0.4s, v4.4s, #12
409         shl     v4.4s, v4.4s, #20
410         add     v5.4s, v5.4s, v6.4s
411         zip1    v20.2d, v6.2d, v2.2d
412         orr     v0.16b, v4.16b, v0.16b
413         mov     v20.s[3], v3.s[3]
414         add     v4.4s, v5.4s, v0.4s
415         eor     v5.16b, v4.16b, v7.16b
416         ext     v7.16b, v16.16b, v16.16b, #4
417         ushr    v16.4s, v5.4s, #8
418         shl     v5.4s, v5.4s, #24
419         uzp1    v17.4s, v7.4s, v7.4s
420         orr     v5.16b, v5.16b, v16.16b
421         ext     v16.16b, v17.16b, v7.16b, #8
422         add     v1.4s, v5.4s, v1.4s
423         uzp2    v16.4s, v16.4s, v2.4s
424         zip2    v2.4s, v2.4s, v6.4s
425         eor     v0.16b, v1.16b, v0.16b
426         add     v4.4s, v4.4s, v16.4s
427         ext     v1.16b, v1.16b, v1.16b, #12
428         ext     v16.16b, v16.16b, v16.16b, #4
429         ushr    v17.4s, v0.4s, #7
430         shl     v0.4s, v0.4s, #25
431         ext     v4.16b, v4.16b, v4.16b, #4
432         orr     v17.16b, v0.16b, v17.16b
433         ext     v0.16b, v5.16b, v5.16b, #8
434         ext     v5.16b, v7.16b, v7.16b, #12
435         add     v4.4s, v4.4s, v17.4s
436         eor     v0.16b, v0.16b, v4.16b
437         rev32   v18.8h, v0.8h
438         ext     v0.16b, v7.16b, v5.16b, #12
439         mov     v5.16b, v6.16b
440         add     v7.4s, v1.4s, v18.4s
441         rev64   v1.4s, v0.4s
442         mov     v5.s[1], v3.s[2]
443         eor     v17.16b, v7.16b, v17.16b
444         trn2    v1.4s, v1.4s, v5.4s
445         ushr    v19.4s, v17.4s, #12
446         shl     v17.4s, v17.4s, #20
447         add     v4.4s, v4.4s, v1.4s
448         orr     v17.16b, v17.16b, v19.16b
449         add     v19.4s, v4.4s, v17.4s
450         eor     v4.16b, v19.16b, v18.16b
451         ext     v19.16b, v19.16b, v19.16b, #12
452         ushr    v18.4s, v4.4s, #8
453         shl     v4.4s, v4.4s, #24
454         orr     v18.16b, v4.16b, v18.16b
455         ext     v4.16b, v20.16b, v20.16b, #12
456         add     v7.4s, v18.4s, v7.4s
457         uzp1    v4.4s, v20.4s, v4.4s
458         ext     v18.16b, v18.16b, v18.16b, #8
459         eor     v17.16b, v7.16b, v17.16b
460         add     v19.4s, v19.4s, v4.4s
461         ext     v7.16b, v7.16b, v7.16b, #4
462         ushr    v20.4s, v17.4s, #7
463         shl     v17.4s, v17.4s, #25
464         orr     v17.16b, v17.16b, v20.16b
465         add     v19.4s, v19.4s, v17.4s
466         eor     v18.16b, v18.16b, v19.16b
467         rev32   v18.8h, v18.8h
468         add     v6.4s, v7.4s, v18.4s
469         zip1    v7.4s, v2.4s, v3.4s
470         zip1    v2.4s, v3.4s, v2.4s
471         eor     v3.16b, v6.16b, v17.16b
472         ext     v2.16b, v2.16b, v7.16b, #8
473         ushr    v7.4s, v3.4s, #12
474         shl     v3.4s, v3.4s, #20
475         add     v17.4s, v19.4s, v2.4s
476         zip1    v1.2d, v2.2d, v1.2d
477         zip2    v0.4s, v0.4s, v2.4s
478         orr     v3.16b, v3.16b, v7.16b
479         mov     v1.s[3], v4.s[3]
480         add     v7.4s, v17.4s, v3.4s
481         eor     v17.16b, v7.16b, v18.16b
482         ext     v7.16b, v7.16b, v7.16b, #4
483         ushr    v18.4s, v17.4s, #8
484         shl     v17.4s, v17.4s, #24
485         orr     v17.16b, v17.16b, v18.16b
486         ext     v18.16b, v16.16b, v16.16b, #8
487         add     v6.4s, v17.4s, v6.4s
488         uzp2    v5.4s, v18.4s, v5.4s
489         eor     v3.16b, v6.16b, v3.16b
490         ext     v5.16b, v5.16b, v18.16b, #4
491         ext     v6.16b, v6.16b, v6.16b, #12
492         ushr    v18.4s, v3.4s, #7
493         shl     v3.4s, v3.4s, #25
494         add     v5.4s, v7.4s, v5.4s
495         ext     v7.16b, v17.16b, v17.16b, #8
496         ext     v17.16b, v16.16b, v16.16b, #12
497         orr     v3.16b, v3.16b, v18.16b
498         ext     v16.16b, v16.16b, v17.16b, #12
499         add     v5.4s, v3.4s, v5.4s
500         mov     v17.16b, v2.16b
501         rev64   v16.4s, v16.4s
502         eor     v7.16b, v7.16b, v5.16b
503         mov     v17.s[1], v4.s[2]
504         rev32   v7.8h, v7.8h
505         trn2    v16.4s, v16.4s, v17.4s
506         add     v6.4s, v6.4s, v7.4s
507         add     v5.4s, v5.4s, v16.4s
508         eor     v3.16b, v6.16b, v3.16b
509         ushr    v17.4s, v3.4s, #12
510         shl     v3.4s, v3.4s, #20
511         orr     v3.16b, v3.16b, v17.16b
512         add     v5.4s, v5.4s, v3.4s
513         eor     v7.16b, v5.16b, v7.16b
514         ext     v5.16b, v5.16b, v5.16b, #12
515         ushr    v16.4s, v7.4s, #8
516         shl     v7.4s, v7.4s, #24
517         orr     v7.16b, v7.16b, v16.16b
518         ext     v16.16b, v1.16b, v1.16b, #12
519         add     v6.4s, v7.4s, v6.4s
520         uzp1    v1.4s, v1.4s, v16.4s
521         eor     v3.16b, v6.16b, v3.16b
522         add     v1.4s, v5.4s, v1.4s
523         ext     v5.16b, v7.16b, v7.16b, #8
524         ext     v6.16b, v6.16b, v6.16b, #4
525         ushr    v16.4s, v3.4s, #7
526         shl     v3.4s, v3.4s, #25
527         orr     v3.16b, v3.16b, v16.16b
528         add     v1.4s, v1.4s, v3.4s
529         eor     v5.16b, v5.16b, v1.16b
530         rev32   v5.8h, v5.8h
531         add     v2.4s, v6.4s, v5.4s
532         zip1    v6.4s, v0.4s, v4.4s
533         zip1    v0.4s, v4.4s, v0.4s
534         eor     v3.16b, v2.16b, v3.16b
535         ext     v0.16b, v0.16b, v6.16b, #8
536         ushr    v4.4s, v3.4s, #12
537         shl     v3.4s, v3.4s, #20
538         add     v0.4s, v1.4s, v0.4s
539         orr     v1.16b, v3.16b, v4.16b
540         add     v0.4s, v0.4s, v1.4s
541         eor     v3.16b, v0.16b, v5.16b
542         ext     v0.16b, v0.16b, v0.16b, #4
543         ushr    v4.4s, v3.4s, #8
544         shl     v3.4s, v3.4s, #24
545         orr     v3.16b, v3.16b, v4.16b
546         add     v2.4s, v3.4s, v2.4s
547         ext     v3.16b, v3.16b, v3.16b, #8
548         eor     v1.16b, v2.16b, v1.16b
549         ext     v2.16b, v2.16b, v2.16b, #12
550         ushr    v4.4s, v1.4s, #7
551         shl     v1.4s, v1.4s, #25
552         stp     q2, q3, [x0, #32]
553         orr     v1.16b, v1.16b, v4.16b
554         stp     q0, q1, [x0]
555         ret
556 .Lfunc_end1:
557         .size   compress_pre, .Lfunc_end1-compress_pre
558         .cfi_endproc
560         .globl  zfs_blake3_compress_xof_sse2
561         .p2align        2
562         .type   zfs_blake3_compress_xof_sse2,@function
563 zfs_blake3_compress_xof_sse2:
564         .cfi_startproc
565         hint    #25
566         CFI_NEGATE_RA_STATE
567         sub     sp, sp, #96
568         stp     x29, x30, [sp, #64]
569         add     x29, sp, #64
570         stp     x20, x19, [sp, #80]
571         .cfi_def_cfa w29, 32
572         .cfi_offset w19, -8
573         .cfi_offset w20, -16
574         .cfi_offset w30, -24
575         .cfi_offset w29, -32
576         mov     x20, x0
577         mov     x19, x5
578         mov     w5, w4
579         mov     x4, x3
580         mov     w3, w2
581         mov     x2, x1
582         mov     x0, sp
583         mov     x1, x20
584         bl      compress_pre
585         ldp     q0, q1, [sp]
586         ldp     q2, q3, [sp, #32]
587         eor     v0.16b, v2.16b, v0.16b
588         eor     v1.16b, v3.16b, v1.16b
589         ldp     x29, x30, [sp, #64]
590         stp     q0, q1, [x19]
591         ldr     q0, [x20]
592         eor     v0.16b, v0.16b, v2.16b
593         str     q0, [x19, #32]
594         ldr     q0, [x20, #16]
595         eor     v0.16b, v0.16b, v3.16b
596         str     q0, [x19, #48]
597         ldp     x20, x19, [sp, #80]
598         add     sp, sp, #96
599         hint    #29
600         ret
601 .Lfunc_end2:
602         .size   zfs_blake3_compress_xof_sse2, .Lfunc_end2-zfs_blake3_compress_xof_sse2
603         .cfi_endproc
605         .section        .rodata.cst16,"aM",@progbits,16
606         .p2align        4
607 .LCPI3_0:
608         .word   0
609         .word   1
610         .word   2
611         .word   3
612         .text
613         .globl  zfs_blake3_hash_many_sse2
614         .p2align        2
615         .type   zfs_blake3_hash_many_sse2,@function
616 zfs_blake3_hash_many_sse2:
617         .cfi_startproc
618         hint    #25
619         CFI_NEGATE_RA_STATE
620         stp     d15, d14, [sp, #-160]!
621         stp     d13, d12, [sp, #16]
622         stp     d11, d10, [sp, #32]
623         stp     d9, d8, [sp, #48]
624         stp     x29, x30, [sp, #64]
625         add     x29, sp, #64
626         stp     x28, x27, [sp, #80]
627         stp     x26, x25, [sp, #96]
628         stp     x24, x23, [sp, #112]
629         stp     x22, x21, [sp, #128]
630         stp     x20, x19, [sp, #144]
631         sub     sp, sp, #464
632         .cfi_def_cfa w29, 96
633         .cfi_offset w19, -8
634         .cfi_offset w20, -16
635         .cfi_offset w21, -24
636         .cfi_offset w22, -32
637         .cfi_offset w23, -40
638         .cfi_offset w24, -48
639         .cfi_offset w25, -56
640         .cfi_offset w26, -64
641         .cfi_offset w27, -72
642         .cfi_offset w28, -80
643         .cfi_offset w30, -88
644         .cfi_offset w29, -96
645         .cfi_offset b8, -104
646         .cfi_offset b9, -112
647         .cfi_offset b10, -120
648         .cfi_offset b11, -128
649         .cfi_offset b12, -136
650         .cfi_offset b13, -144
651         .cfi_offset b14, -152
652         .cfi_offset b15, -160
653         mov     w19, w6
654         mov     x20, x4
655         mov     x24, x1
656         ldr     x26, [x29, #104]
657         ldrb    w27, [x29, #96]
658         cmp     x1, #4
659         str     x3, [sp, #40]
660         b.lo    .LBB3_6
661         adrp    x8, .LCPI3_0
662         sbfx    w9, w5, #0, #1
663         mov     w10, #44677
664         mov     w11, #62322
665         movk    w10, #47975, lsl #16
666         movk    w11, #15470, lsl #16
667         ldr     q0, [x8, :lo12:.LCPI3_0]
668         dup     v1.4s, w9
669         mov     w9, #58983
670         orr     w8, w7, w19
671         movk    w9, #27145, lsl #16
672         and     v0.16b, v1.16b, v0.16b
673         dup     v1.4s, w11
674         movi    v24.4s, #64
675         dup     v2.4s, w9
676         mov     w9, #62778
677         movk    w9, #42319, lsl #16
678         str     q0, [sp, #16]
679         orr     v0.4s, #128, lsl #24
680         stp     q2, q1, [sp, #48]
681         str     q0, [sp]
682         dup     v0.4s, w10
683         str     q0, [sp, #80]
684         b       .LBB3_3
685 .LBB3_2:
686         zip1    v0.4s, v12.4s, v31.4s
687         add     x10, x20, #4
688         zip1    v1.4s, v29.4s, v30.4s
689         tst     w5, #0x1
690         zip1    v2.4s, v28.4s, v23.4s
691         csel    x20, x10, x20, ne
692         zip1    v3.4s, v13.4s, v25.4s
693         add     x0, x0, #32
694         zip2    v6.4s, v12.4s, v31.4s
695         sub     x24, x24, #4
696         zip1    v4.2d, v0.2d, v1.2d
697         cmp     x24, #3
698         zip2    v7.4s, v29.4s, v30.4s
699         zip1    v5.2d, v2.2d, v3.2d
700         zip2    v0.2d, v0.2d, v1.2d
701         zip2    v1.2d, v2.2d, v3.2d
702         zip2    v2.4s, v28.4s, v23.4s
703         zip2    v3.4s, v13.4s, v25.4s
704         stp     q4, q5, [x26]
705         zip2    v4.2d, v6.2d, v7.2d
706         stp     q0, q1, [x26, #32]
707         zip1    v0.2d, v6.2d, v7.2d
708         zip1    v1.2d, v2.2d, v3.2d
709         zip2    v2.2d, v2.2d, v3.2d
710         stp     q0, q1, [x26, #64]
711         stp     q4, q2, [x26, #96]
712         add     x26, x26, #128
713         b.ls    .LBB3_6
714 .LBB3_3:
715         ldr     x14, [sp, #40]
716         mov     x10, x14
717         add     x11, x14, #8
718         add     x12, x14, #12
719         add     x13, x14, #16
720         ld1r    { v12.4s }, [x10], #4
721         ld1r    { v29.4s }, [x11]
722         add     x11, x14, #20
723         ld1r    { v30.4s }, [x12]
724         add     x12, x14, #24
725         ld1r    { v28.4s }, [x13]
726         ld1r    { v23.4s }, [x11]
727         add     x11, x14, #28
728         ld1r    { v13.4s }, [x12]
729         ld1r    { v31.4s }, [x10]
730         ld1r    { v25.4s }, [x11]
731         cbz     x2, .LBB3_2
732         ldr     q1, [sp, #16]
733         dup     v0.4s, w20
734         lsr     x12, x20, #32
735         mov     x10, xzr
736         ldp     x13, x14, [x0, #16]
737         add     v1.4s, v0.4s, v1.4s
738         mov     x15, x2
739         movi    v0.4s, #128, lsl #24
740         mov     w4, w8
741         str     q1, [sp, #112]
742         eor     v0.16b, v1.16b, v0.16b
743         ldr     q1, [sp]
744         cmgt    v0.4s, v1.4s, v0.4s
745         dup     v1.4s, w12
746         ldp     x11, x12, [x0]
747         sub     v0.4s, v1.4s, v0.4s
748         str     q0, [sp, #96]
749 .LBB3_5:
750         add     x17, x11, x10
751         add     x21, x12, x10
752         add     x16, x13, x10
753         add     x6, x14, x10
754         subs    x15, x15, #1
755         add     x10, x10, #64
756         ldp     q0, q1, [x17]
757         csel    w3, w27, wzr, eq
758         orr     w3, w3, w4
759         mov     w4, w19
760         and     w3, w3, #0xff
761         ldp     q3, q6, [x21]
762         dup     v2.4s, w3
763         zip1    v21.4s, v0.4s, v3.4s
764         zip2    v19.4s, v0.4s, v3.4s
765         ldp     q5, q7, [x16]
766         zip1    v17.4s, v1.4s, v6.4s
767         zip2    v22.4s, v1.4s, v6.4s
768         ldp     q16, q18, [x6]
769         zip1    v4.4s, v5.4s, v16.4s
770         zip2    v0.4s, v5.4s, v16.4s
771         ldp     q26, q27, [x17, #32]
772         zip1    v1.4s, v7.4s, v18.4s
773         zip2    v3.4s, v7.4s, v18.4s
774         zip2    v20.2d, v19.2d, v0.2d
775         mov     v19.d[1], v0.d[0]
776         dup     v18.4s, w9
777         ldp     q8, q9, [x21, #32]
778         stur    q19, [x29, #-208]
779         zip2    v7.4s, v26.4s, v8.4s
780         zip1    v10.4s, v26.4s, v8.4s
781         ldp     q11, q5, [x16, #32]
782         zip2    v26.2d, v17.2d, v1.2d
783         stp     q7, q26, [sp, #192]
784         mov     v17.d[1], v1.d[0]
785         add     v1.4s, v23.4s, v31.4s
786         ldp     q16, q6, [x6, #32]
787         stur    q17, [x29, #-256]
788         add     v1.4s, v1.4s, v19.4s
789         zip1    v8.4s, v11.4s, v16.4s
790         zip2    v7.4s, v11.4s, v16.4s
791         zip1    v11.4s, v27.4s, v9.4s
792         zip2    v9.4s, v27.4s, v9.4s
793         zip2    v27.2d, v21.2d, v4.2d
794         mov     v21.d[1], v4.d[0]
795         str     q7, [sp, #224]
796         add     v4.4s, v28.4s, v12.4s
797         zip1    v15.4s, v5.4s, v6.4s
798         zip2    v14.4s, v5.4s, v6.4s
799         stur    q27, [x29, #-192]
800         zip2    v16.2d, v22.2d, v3.2d
801         stp     q20, q21, [x29, #-240]
802         add     v0.4s, v4.4s, v21.4s
803         ldp     q6, q4, [sp, #96]
804         mov     v22.d[1], v3.d[0]
805         add     v5.4s, v25.4s, v30.4s
806         add     v3.4s, v13.4s, v29.4s
807         eor     v6.16b, v1.16b, v6.16b
808         add     v1.4s, v1.4s, v20.4s
809         str     q22, [sp, #256]
810         eor     v4.16b, v0.16b, v4.16b
811         add     v5.4s, v5.4s, v22.4s
812         add     v3.4s, v3.4s, v17.4s
813         ldr     q17, [sp, #48]
814         rev32   v6.8h, v6.8h
815         rev32   v4.8h, v4.8h
816         eor     v2.16b, v5.16b, v2.16b
817         eor     v7.16b, v3.16b, v24.16b
818         add     v0.4s, v0.4s, v27.4s
819         add     v21.4s, v4.4s, v17.4s
820         rev32   v31.8h, v2.8h
821         ldr     q2, [sp, #80]
822         rev32   v7.8h, v7.8h
823         mov     v27.16b, v16.16b
824         eor     v17.16b, v21.16b, v28.16b
825         add     v29.4s, v6.4s, v2.4s
826         ldr     q2, [sp, #64]
827         add     v24.4s, v31.4s, v18.4s
828         str     q27, [sp, #176]
829         ushr    v19.4s, v17.4s, #12
830         shl     v17.4s, v17.4s, #20
831         add     v30.4s, v7.4s, v2.4s
832         eor     v18.16b, v29.16b, v23.16b
833         orr     v12.16b, v17.16b, v19.16b
834         eor     v17.16b, v30.16b, v13.16b
835         eor     v19.16b, v24.16b, v25.16b
836         ushr    v23.4s, v18.4s, #12
837         shl     v18.4s, v18.4s, #20
838         ushr    v25.4s, v17.4s, #12
839         shl     v17.4s, v17.4s, #20
840         ushr    v28.4s, v19.4s, #12
841         shl     v19.4s, v19.4s, #20
842         orr     v13.16b, v18.16b, v23.16b
843         orr     v25.16b, v17.16b, v25.16b
844         orr     v2.16b, v19.16b, v28.16b
845         add     v28.4s, v0.4s, v12.4s
846         add     v0.4s, v3.4s, v26.4s
847         add     v18.4s, v1.4s, v13.4s
848         add     v3.4s, v5.4s, v16.4s
849         eor     v1.16b, v28.16b, v4.16b
850         add     v17.4s, v0.4s, v25.4s
851         eor     v0.16b, v18.16b, v6.16b
852         add     v19.4s, v3.4s, v2.4s
853         ushr    v16.4s, v1.4s, #8
854         shl     v3.4s, v1.4s, #24
855         eor     v4.16b, v17.16b, v7.16b
856         ushr    v6.4s, v0.4s, #8
857         shl     v1.4s, v0.4s, #24
858         eor     v5.16b, v19.16b, v31.16b
859         ushr    v23.4s, v4.4s, #8
860         shl     v4.4s, v4.4s, #24
861         orr     v7.16b, v3.16b, v16.16b
862         orr     v6.16b, v1.16b, v6.16b
863         ushr    v31.4s, v5.4s, #8
864         shl     v0.4s, v5.4s, #24
865         orr     v5.16b, v4.16b, v23.16b
866         add     v4.4s, v7.4s, v21.4s
867         ldr     q21, [sp, #192]
868         add     v3.4s, v6.4s, v29.4s
869         orr     v31.16b, v0.16b, v31.16b
870         add     v23.4s, v5.4s, v30.4s
871         eor     v0.16b, v4.16b, v12.16b
872         eor     v1.16b, v3.16b, v13.16b
873         add     v16.4s, v31.4s, v24.4s
874         eor     v20.16b, v23.16b, v25.16b
875         ushr    v24.4s, v0.4s, #7
876         shl     v0.4s, v0.4s, #25
877         ushr    v29.4s, v1.4s, #7
878         shl     v1.4s, v1.4s, #25
879         ushr    v30.4s, v20.4s, #7
880         shl     v20.4s, v20.4s, #25
881         orr     v25.16b, v0.16b, v24.16b
882         orr     v0.16b, v1.16b, v29.16b
883         mov     v29.16b, v10.16b
884         orr     v1.16b, v20.16b, v30.16b
885         mov     v20.16b, v10.16b
886         mov     v24.16b, v21.16b
887         ldr     q20, [sp, #224]
888         mov     v29.d[1], v8.d[0]
889         mov     v13.16b, v9.16b
890         zip2    v30.2d, v10.2d, v8.2d
891         zip2    v8.2d, v21.2d, v20.2d
892         mov     v26.16b, v11.16b
893         mov     v24.d[1], v20.d[0]
894         add     v20.4s, v28.4s, v29.4s
895         mov     v13.d[1], v14.d[0]
896         str     q8, [sp, #128]
897         eor     v2.16b, v16.16b, v2.16b
898         mov     v26.d[1], v15.d[0]
899         str     q24, [sp, #192]
900         add     v20.4s, v20.4s, v0.4s
901         add     v19.4s, v19.4s, v13.4s
902         ushr    v12.4s, v2.4s, #7
903         shl     v2.4s, v2.4s, #25
904         zip2    v10.2d, v9.2d, v14.2d
905         add     v18.4s, v18.4s, v24.4s
906         add     v17.4s, v17.4s, v26.4s
907         mov     v14.16b, v26.16b
908         eor     v26.16b, v20.16b, v31.16b
909         stp     q10, q30, [sp, #224]
910         add     v19.4s, v19.4s, v25.4s
911         orr     v2.16b, v2.16b, v12.16b
912         add     v18.4s, v18.4s, v1.4s
913         rev32   v26.8h, v26.8h
914         eor     v5.16b, v19.16b, v5.16b
915         add     v17.4s, v17.4s, v2.4s
916         eor     v7.16b, v18.16b, v7.16b
917         add     v23.4s, v23.4s, v26.4s
918         rev32   v5.8h, v5.8h
919         eor     v6.16b, v17.16b, v6.16b
920         rev32   v7.8h, v7.8h
921         eor     v0.16b, v23.16b, v0.16b
922         add     v3.4s, v3.4s, v5.4s
923         rev32   v6.8h, v6.8h
924         add     v16.4s, v16.4s, v7.4s
925         ushr    v31.4s, v0.4s, #12
926         shl     v0.4s, v0.4s, #20
927         eor     v25.16b, v3.16b, v25.16b
928         add     v4.4s, v4.4s, v6.4s
929         eor     v1.16b, v16.16b, v1.16b
930         orr     v0.16b, v0.16b, v31.16b
931         ushr    v31.4s, v25.4s, #12
932         shl     v25.4s, v25.4s, #20
933         add     v20.4s, v20.4s, v30.4s
934         zip2    v21.2d, v11.2d, v15.2d
935         ushr    v11.4s, v1.4s, #12
936         shl     v1.4s, v1.4s, #20
937         eor     v2.16b, v4.16b, v2.16b
938         orr     v25.16b, v25.16b, v31.16b
939         add     v19.4s, v19.4s, v10.4s
940         add     v20.4s, v20.4s, v0.4s
941         orr     v1.16b, v1.16b, v11.16b
942         ushr    v11.4s, v2.4s, #12
943         shl     v2.4s, v2.4s, #20
944         add     v18.4s, v18.4s, v8.4s
945         add     v19.4s, v19.4s, v25.4s
946         eor     v26.16b, v20.16b, v26.16b
947         orr     v2.16b, v2.16b, v11.16b
948         add     v17.4s, v17.4s, v21.4s
949         add     v18.4s, v18.4s, v1.4s
950         eor     v5.16b, v19.16b, v5.16b
951         ushr    v31.4s, v26.4s, #8
952         shl     v26.4s, v26.4s, #24
953         add     v17.4s, v17.4s, v2.4s
954         ushr    v11.4s, v5.4s, #8
955         shl     v5.4s, v5.4s, #24
956         eor     v7.16b, v18.16b, v7.16b
957         orr     v26.16b, v26.16b, v31.16b
958         eor     v6.16b, v17.16b, v6.16b
959         orr     v5.16b, v5.16b, v11.16b
960         ushr    v31.4s, v7.4s, #8
961         shl     v7.4s, v7.4s, #24
962         add     v23.4s, v26.4s, v23.4s
963         ushr    v11.4s, v6.4s, #8
964         shl     v6.4s, v6.4s, #24
965         orr     v7.16b, v7.16b, v31.16b
966         add     v3.4s, v5.4s, v3.4s
967         eor     v0.16b, v23.16b, v0.16b
968         ldp     q28, q12, [x29, #-256]
969         orr     v6.16b, v6.16b, v11.16b
970         add     v16.4s, v7.4s, v16.4s
971         eor     v25.16b, v3.16b, v25.16b
972         ushr    v31.4s, v0.4s, #7
973         shl     v0.4s, v0.4s, #25
974         add     v4.4s, v6.4s, v4.4s
975         ushr    v11.4s, v25.4s, #7
976         shl     v25.4s, v25.4s, #25
977         eor     v1.16b, v16.16b, v1.16b
978         orr     v0.16b, v0.16b, v31.16b
979         add     v18.4s, v18.4s, v12.4s
980         mov     v15.16b, v29.16b
981         ldur    q29, [x29, #-208]
982         eor     v2.16b, v4.16b, v2.16b
983         orr     v25.16b, v25.16b, v11.16b
984         ushr    v31.4s, v1.4s, #7
985         shl     v1.4s, v1.4s, #25
986         str     q15, [sp, #160]
987         add     v20.4s, v20.4s, v29.4s
988         add     v18.4s, v18.4s, v0.4s
989         ushr    v11.4s, v2.4s, #7
990         shl     v2.4s, v2.4s, #25
991         orr     v1.16b, v1.16b, v31.16b
992         add     v20.4s, v20.4s, v25.4s
993         add     v17.4s, v17.4s, v27.4s
994         eor     v6.16b, v6.16b, v18.16b
995         orr     v2.16b, v2.16b, v11.16b
996         add     v19.4s, v19.4s, v28.4s
997         eor     v7.16b, v7.16b, v20.16b
998         add     v17.4s, v17.4s, v1.4s
999         rev32   v6.8h, v6.8h
1000         add     v19.4s, v19.4s, v2.4s
1001         rev32   v7.8h, v7.8h
1002         eor     v5.16b, v17.16b, v5.16b
1003         add     v3.4s, v3.4s, v6.4s
1004         eor     v26.16b, v19.16b, v26.16b
1005         add     v4.4s, v4.4s, v7.4s
1006         rev32   v5.8h, v5.8h
1007         eor     v0.16b, v3.16b, v0.16b
1008         rev32   v26.8h, v26.8h
1009         eor     v25.16b, v4.16b, v25.16b
1010         add     v23.4s, v23.4s, v5.4s
1011         ushr    v11.4s, v0.4s, #12
1012         shl     v0.4s, v0.4s, #20
1013         add     v16.4s, v16.4s, v26.4s
1014         ushr    v31.4s, v25.4s, #12
1015         shl     v25.4s, v25.4s, #20
1016         eor     v1.16b, v23.16b, v1.16b
1017         orr     v0.16b, v0.16b, v11.16b
1018         add     v18.4s, v18.4s, v24.4s
1019         orr     v25.16b, v25.16b, v31.16b
1020         eor     v2.16b, v16.16b, v2.16b
1021         ushr    v31.4s, v1.4s, #12
1022         shl     v1.4s, v1.4s, #20
1023         add     v20.4s, v20.4s, v22.4s
1024         add     v18.4s, v18.4s, v0.4s
1025         mov     v9.16b, v30.16b
1026         mov     v30.16b, v21.16b
1027         ldur    q21, [x29, #-224]
1028         ushr    v11.4s, v2.4s, #12
1029         shl     v2.4s, v2.4s, #20
1030         orr     v1.16b, v1.16b, v31.16b
1031         add     v20.4s, v20.4s, v25.4s
1032         str     q30, [sp, #144]
1033         add     v17.4s, v17.4s, v21.4s
1034         ldur    q21, [x29, #-192]
1035         eor     v6.16b, v18.16b, v6.16b
1036         orr     v2.16b, v2.16b, v11.16b
1037         add     v19.4s, v19.4s, v30.4s
1038         eor     v7.16b, v20.16b, v7.16b
1039         add     v17.4s, v17.4s, v1.4s
1040         ushr    v11.4s, v6.4s, #8
1041         shl     v6.4s, v6.4s, #24
1042         add     v19.4s, v19.4s, v2.4s
1043         ushr    v31.4s, v7.4s, #8
1044         shl     v7.4s, v7.4s, #24
1045         eor     v5.16b, v17.16b, v5.16b
1046         orr     v6.16b, v6.16b, v11.16b
1047         eor     v26.16b, v19.16b, v26.16b
1048         orr     v7.16b, v7.16b, v31.16b
1049         ushr    v31.4s, v5.4s, #8
1050         shl     v5.4s, v5.4s, #24
1051         add     v3.4s, v6.4s, v3.4s
1052         ushr    v11.4s, v26.4s, #8
1053         shl     v26.4s, v26.4s, #24
1054         add     v4.4s, v7.4s, v4.4s
1055         orr     v5.16b, v5.16b, v31.16b
1056         eor     v0.16b, v3.16b, v0.16b
1057         orr     v26.16b, v26.16b, v11.16b
1058         eor     v25.16b, v4.16b, v25.16b
1059         add     v23.4s, v5.4s, v23.4s
1060         ushr    v11.4s, v0.4s, #7
1061         shl     v0.4s, v0.4s, #25
1062         add     v16.4s, v26.4s, v16.4s
1063         ushr    v31.4s, v25.4s, #7
1064         shl     v25.4s, v25.4s, #25
1065         eor     v1.16b, v23.16b, v1.16b
1066         orr     v0.16b, v0.16b, v11.16b
1067         add     v20.4s, v20.4s, v21.4s
1068         orr     v25.16b, v25.16b, v31.16b
1069         eor     v2.16b, v16.16b, v2.16b
1070         ushr    v31.4s, v1.4s, #7
1071         shl     v1.4s, v1.4s, #25
1072         add     v20.4s, v20.4s, v0.4s
1073         add     v19.4s, v19.4s, v10.4s
1074         ushr    v11.4s, v2.4s, #7
1075         shl     v2.4s, v2.4s, #25
1076         orr     v1.16b, v1.16b, v31.16b
1077         add     v18.4s, v18.4s, v14.4s
1078         eor     v26.16b, v20.16b, v26.16b
1079         add     v19.4s, v19.4s, v25.4s
1080         orr     v2.16b, v2.16b, v11.16b
1081         add     v17.4s, v17.4s, v9.4s
1082         ldr     q9, [sp, #208]
1083         add     v18.4s, v18.4s, v1.4s
1084         rev32   v26.8h, v26.8h
1085         eor     v5.16b, v19.16b, v5.16b
1086         add     v17.4s, v17.4s, v2.4s
1087         eor     v7.16b, v18.16b, v7.16b
1088         add     v23.4s, v23.4s, v26.4s
1089         rev32   v5.8h, v5.8h
1090         eor     v6.16b, v17.16b, v6.16b
1091         rev32   v7.8h, v7.8h
1092         eor     v0.16b, v23.16b, v0.16b
1093         add     v3.4s, v3.4s, v5.4s
1094         rev32   v6.8h, v6.8h
1095         add     v16.4s, v16.4s, v7.4s
1096         ushr    v31.4s, v0.4s, #12
1097         shl     v0.4s, v0.4s, #20
1098         eor     v25.16b, v3.16b, v25.16b
1099         add     v4.4s, v4.4s, v6.4s
1100         eor     v1.16b, v16.16b, v1.16b
1101         orr     v0.16b, v0.16b, v31.16b
1102         ushr    v31.4s, v25.4s, #12
1103         shl     v25.4s, v25.4s, #20
1104         add     v20.4s, v20.4s, v8.4s
1105         ushr    v11.4s, v1.4s, #12
1106         shl     v1.4s, v1.4s, #20
1107         eor     v2.16b, v4.16b, v2.16b
1108         orr     v25.16b, v25.16b, v31.16b
1109         add     v19.4s, v19.4s, v15.4s
1110         add     v20.4s, v20.4s, v0.4s
1111         orr     v1.16b, v1.16b, v11.16b
1112         ushr    v11.4s, v2.4s, #12
1113         shl     v2.4s, v2.4s, #20
1114         add     v18.4s, v18.4s, v9.4s
1115         add     v19.4s, v19.4s, v25.4s
1116         eor     v26.16b, v20.16b, v26.16b
1117         orr     v2.16b, v2.16b, v11.16b
1118         add     v17.4s, v17.4s, v13.4s
1119         add     v18.4s, v18.4s, v1.4s
1120         eor     v5.16b, v19.16b, v5.16b
1121         ushr    v31.4s, v26.4s, #8
1122         shl     v26.4s, v26.4s, #24
1123         add     v17.4s, v17.4s, v2.4s
1124         ushr    v11.4s, v5.4s, #8
1125         shl     v5.4s, v5.4s, #24
1126         eor     v7.16b, v18.16b, v7.16b
1127         orr     v26.16b, v26.16b, v31.16b
1128         eor     v6.16b, v17.16b, v6.16b
1129         orr     v5.16b, v5.16b, v11.16b
1130         ushr    v31.4s, v7.4s, #8
1131         shl     v7.4s, v7.4s, #24
1132         add     v23.4s, v26.4s, v23.4s
1133         ushr    v11.4s, v6.4s, #8
1134         shl     v6.4s, v6.4s, #24
1135         orr     v7.16b, v7.16b, v31.16b
1136         add     v3.4s, v5.4s, v3.4s
1137         eor     v0.16b, v23.16b, v0.16b
1138         orr     v6.16b, v6.16b, v11.16b
1139         add     v16.4s, v7.4s, v16.4s
1140         eor     v25.16b, v3.16b, v25.16b
1141         ushr    v31.4s, v0.4s, #7
1142         shl     v0.4s, v0.4s, #25
1143         add     v4.4s, v6.4s, v4.4s
1144         ushr    v11.4s, v25.4s, #7
1145         shl     v25.4s, v25.4s, #25
1146         eor     v1.16b, v16.16b, v1.16b
1147         orr     v0.16b, v0.16b, v31.16b
1148         add     v18.4s, v18.4s, v24.4s
1149         eor     v2.16b, v4.16b, v2.16b
1150         orr     v25.16b, v25.16b, v11.16b
1151         ushr    v31.4s, v1.4s, #7
1152         shl     v1.4s, v1.4s, #25
1153         add     v20.4s, v20.4s, v12.4s
1154         add     v18.4s, v18.4s, v0.4s
1155         ushr    v11.4s, v2.4s, #7
1156         shl     v2.4s, v2.4s, #25
1157         orr     v1.16b, v1.16b, v31.16b
1158         add     v20.4s, v20.4s, v25.4s
1159         add     v17.4s, v17.4s, v30.4s
1160         eor     v6.16b, v6.16b, v18.16b
1161         orr     v2.16b, v2.16b, v11.16b
1162         add     v19.4s, v19.4s, v27.4s
1163         eor     v7.16b, v7.16b, v20.16b
1164         add     v17.4s, v17.4s, v1.4s
1165         rev32   v6.8h, v6.8h
1166         add     v19.4s, v19.4s, v2.4s
1167         rev32   v7.8h, v7.8h
1168         eor     v5.16b, v17.16b, v5.16b
1169         add     v3.4s, v3.4s, v6.4s
1170         eor     v26.16b, v19.16b, v26.16b
1171         add     v4.4s, v4.4s, v7.4s
1172         rev32   v5.8h, v5.8h
1173         eor     v0.16b, v3.16b, v0.16b
1174         rev32   v26.8h, v26.8h
1175         eor     v25.16b, v4.16b, v25.16b
1176         add     v23.4s, v23.4s, v5.4s
1177         ushr    v11.4s, v0.4s, #12
1178         shl     v0.4s, v0.4s, #20
1179         add     v16.4s, v16.4s, v26.4s
1180         ushr    v31.4s, v25.4s, #12
1181         shl     v25.4s, v25.4s, #20
1182         eor     v1.16b, v23.16b, v1.16b
1183         orr     v0.16b, v0.16b, v11.16b
1184         add     v18.4s, v18.4s, v14.4s
1185         orr     v25.16b, v25.16b, v31.16b
1186         eor     v2.16b, v16.16b, v2.16b
1187         ushr    v31.4s, v1.4s, #12
1188         shl     v1.4s, v1.4s, #20
1189         add     v20.4s, v20.4s, v28.4s
1190         add     v18.4s, v18.4s, v0.4s
1191         mov     v10.16b, v13.16b
1192         ushr    v11.4s, v2.4s, #12
1193         shl     v2.4s, v2.4s, #20
1194         orr     v1.16b, v1.16b, v31.16b
1195         add     v20.4s, v20.4s, v25.4s
1196         add     v17.4s, v17.4s, v29.4s
1197         eor     v6.16b, v18.16b, v6.16b
1198         orr     v2.16b, v2.16b, v11.16b
1199         add     v19.4s, v19.4s, v10.4s
1200         eor     v7.16b, v20.16b, v7.16b
1201         add     v17.4s, v17.4s, v1.4s
1202         ushr    v11.4s, v6.4s, #8
1203         shl     v6.4s, v6.4s, #24
1204         add     v19.4s, v19.4s, v2.4s
1205         ushr    v31.4s, v7.4s, #8
1206         shl     v7.4s, v7.4s, #24
1207         eor     v5.16b, v17.16b, v5.16b
1208         orr     v6.16b, v6.16b, v11.16b
1209         eor     v26.16b, v19.16b, v26.16b
1210         orr     v7.16b, v7.16b, v31.16b
1211         ushr    v31.4s, v5.4s, #8
1212         shl     v5.4s, v5.4s, #24
1213         add     v3.4s, v6.4s, v3.4s
1214         ushr    v11.4s, v26.4s, #8
1215         shl     v26.4s, v26.4s, #24
1216         add     v4.4s, v7.4s, v4.4s
1217         orr     v5.16b, v5.16b, v31.16b
1218         eor     v0.16b, v3.16b, v0.16b
1219         mov     v22.16b, v8.16b
1220         ldp     q8, q28, [sp, #240]
1221         orr     v26.16b, v26.16b, v11.16b
1222         eor     v25.16b, v4.16b, v25.16b
1223         add     v23.4s, v5.4s, v23.4s
1224         ushr    v11.4s, v0.4s, #7
1225         shl     v0.4s, v0.4s, #25
1226         add     v16.4s, v26.4s, v16.4s
1227         ushr    v31.4s, v25.4s, #7
1228         shl     v25.4s, v25.4s, #25
1229         eor     v1.16b, v23.16b, v1.16b
1230         orr     v0.16b, v0.16b, v11.16b
1231         add     v20.4s, v20.4s, v28.4s
1232         orr     v25.16b, v25.16b, v31.16b
1233         eor     v2.16b, v16.16b, v2.16b
1234         ushr    v31.4s, v1.4s, #7
1235         shl     v1.4s, v1.4s, #25
1236         add     v20.4s, v20.4s, v0.4s
1237         add     v19.4s, v19.4s, v15.4s
1238         ushr    v11.4s, v2.4s, #7
1239         shl     v2.4s, v2.4s, #25
1240         orr     v1.16b, v1.16b, v31.16b
1241         add     v18.4s, v18.4s, v8.4s
1242         eor     v26.16b, v20.16b, v26.16b
1243         add     v19.4s, v19.4s, v25.4s
1244         orr     v2.16b, v2.16b, v11.16b
1245         add     v17.4s, v17.4s, v22.4s
1246         ldur    q22, [x29, #-256]
1247         add     v18.4s, v18.4s, v1.4s
1248         rev32   v26.8h, v26.8h
1249         eor     v5.16b, v19.16b, v5.16b
1250         add     v17.4s, v17.4s, v2.4s
1251         eor     v7.16b, v18.16b, v7.16b
1252         add     v23.4s, v23.4s, v26.4s
1253         rev32   v5.8h, v5.8h
1254         eor     v6.16b, v17.16b, v6.16b
1255         rev32   v7.8h, v7.8h
1256         eor     v0.16b, v23.16b, v0.16b
1257         add     v3.4s, v3.4s, v5.4s
1258         rev32   v6.8h, v6.8h
1259         add     v16.4s, v16.4s, v7.4s
1260         ushr    v31.4s, v0.4s, #12
1261         shl     v0.4s, v0.4s, #20
1262         eor     v25.16b, v3.16b, v25.16b
1263         add     v4.4s, v4.4s, v6.4s
1264         eor     v1.16b, v16.16b, v1.16b
1265         orr     v0.16b, v0.16b, v31.16b
1266         ushr    v31.4s, v25.4s, #12
1267         shl     v25.4s, v25.4s, #20
1268         add     v20.4s, v20.4s, v9.4s
1269         mov     v13.16b, v12.16b
1270         mov     v12.16b, v27.16b
1271         mov     v27.16b, v9.16b
1272         ldur    q9, [x29, #-192]
1273         mov     v21.16b, v15.16b
1274         ldr     q15, [sp, #224]
1275         ushr    v11.4s, v1.4s, #12
1276         ldur    q21, [x29, #-224]
1277         shl     v1.4s, v1.4s, #20
1278         eor     v2.16b, v4.16b, v2.16b
1279         orr     v25.16b, v25.16b, v31.16b
1280         add     v19.4s, v19.4s, v9.4s
1281         add     v20.4s, v20.4s, v0.4s
1282         orr     v1.16b, v1.16b, v11.16b
1283         ushr    v11.4s, v2.4s, #12
1284         shl     v2.4s, v2.4s, #20
1285         add     v18.4s, v18.4s, v21.4s
1286         add     v19.4s, v19.4s, v25.4s
1287         eor     v26.16b, v20.16b, v26.16b
1288         orr     v2.16b, v2.16b, v11.16b
1289         add     v17.4s, v17.4s, v15.4s
1290         add     v18.4s, v18.4s, v1.4s
1291         eor     v5.16b, v19.16b, v5.16b
1292         ushr    v31.4s, v26.4s, #8
1293         shl     v26.4s, v26.4s, #24
1294         add     v17.4s, v17.4s, v2.4s
1295         ushr    v11.4s, v5.4s, #8
1296         shl     v5.4s, v5.4s, #24
1297         eor     v7.16b, v18.16b, v7.16b
1298         orr     v26.16b, v26.16b, v31.16b
1299         eor     v6.16b, v17.16b, v6.16b
1300         orr     v5.16b, v5.16b, v11.16b
1301         ushr    v31.4s, v7.4s, #8
1302         shl     v7.4s, v7.4s, #24
1303         add     v23.4s, v26.4s, v23.4s
1304         ushr    v11.4s, v6.4s, #8
1305         shl     v6.4s, v6.4s, #24
1306         orr     v7.16b, v7.16b, v31.16b
1307         add     v3.4s, v5.4s, v3.4s
1308         eor     v0.16b, v23.16b, v0.16b
1309         orr     v6.16b, v6.16b, v11.16b
1310         add     v16.4s, v7.4s, v16.4s
1311         eor     v25.16b, v3.16b, v25.16b
1312         ushr    v31.4s, v0.4s, #7
1313         shl     v0.4s, v0.4s, #25
1314         add     v4.4s, v6.4s, v4.4s
1315         ushr    v11.4s, v25.4s, #7
1316         shl     v25.4s, v25.4s, #25
1317         eor     v1.16b, v16.16b, v1.16b
1318         orr     v0.16b, v0.16b, v31.16b
1319         add     v18.4s, v18.4s, v14.4s
1320         eor     v2.16b, v4.16b, v2.16b
1321         orr     v25.16b, v25.16b, v11.16b
1322         ushr    v31.4s, v1.4s, #7
1323         shl     v1.4s, v1.4s, #25
1324         add     v20.4s, v20.4s, v24.4s
1325         add     v18.4s, v18.4s, v0.4s
1326         ushr    v11.4s, v2.4s, #7
1327         shl     v2.4s, v2.4s, #25
1328         orr     v1.16b, v1.16b, v31.16b
1329         add     v20.4s, v20.4s, v25.4s
1330         add     v17.4s, v17.4s, v10.4s
1331         eor     v6.16b, v6.16b, v18.16b
1332         orr     v2.16b, v2.16b, v11.16b
1333         add     v19.4s, v19.4s, v30.4s
1334         eor     v7.16b, v7.16b, v20.16b
1335         add     v17.4s, v17.4s, v1.4s
1336         rev32   v6.8h, v6.8h
1337         add     v19.4s, v19.4s, v2.4s
1338         rev32   v7.8h, v7.8h
1339         eor     v5.16b, v17.16b, v5.16b
1340         add     v3.4s, v3.4s, v6.4s
1341         eor     v26.16b, v19.16b, v26.16b
1342         add     v4.4s, v4.4s, v7.4s
1343         rev32   v5.8h, v5.8h
1344         eor     v0.16b, v3.16b, v0.16b
1345         rev32   v26.8h, v26.8h
1346         eor     v25.16b, v4.16b, v25.16b
1347         add     v23.4s, v23.4s, v5.4s
1348         ushr    v11.4s, v0.4s, #12
1349         shl     v0.4s, v0.4s, #20
1350         add     v16.4s, v16.4s, v26.4s
1351         ushr    v31.4s, v25.4s, #12
1352         shl     v25.4s, v25.4s, #20
1353         eor     v1.16b, v23.16b, v1.16b
1354         orr     v0.16b, v0.16b, v11.16b
1355         add     v18.4s, v18.4s, v8.4s
1356         orr     v25.16b, v25.16b, v31.16b
1357         eor     v2.16b, v16.16b, v2.16b
1358         ushr    v31.4s, v1.4s, #12
1359         shl     v1.4s, v1.4s, #20
1360         add     v20.4s, v20.4s, v12.4s
1361         add     v18.4s, v18.4s, v0.4s
1362         ushr    v11.4s, v2.4s, #12
1363         shl     v2.4s, v2.4s, #20
1364         orr     v1.16b, v1.16b, v31.16b
1365         add     v20.4s, v20.4s, v25.4s
1366         add     v17.4s, v17.4s, v13.4s
1367         ldr     q13, [sp, #160]
1368         eor     v6.16b, v18.16b, v6.16b
1369         orr     v2.16b, v2.16b, v11.16b
1370         add     v19.4s, v19.4s, v15.4s
1371         eor     v7.16b, v20.16b, v7.16b
1372         add     v17.4s, v17.4s, v1.4s
1373         ushr    v11.4s, v6.4s, #8
1374         shl     v6.4s, v6.4s, #24
1375         add     v19.4s, v19.4s, v2.4s
1376         ushr    v31.4s, v7.4s, #8
1377         shl     v7.4s, v7.4s, #24
1378         eor     v5.16b, v17.16b, v5.16b
1379         orr     v6.16b, v6.16b, v11.16b
1380         eor     v26.16b, v19.16b, v26.16b
1381         orr     v7.16b, v7.16b, v31.16b
1382         ushr    v31.4s, v5.4s, #8
1383         shl     v5.4s, v5.4s, #24
1384         add     v3.4s, v6.4s, v3.4s
1385         ushr    v11.4s, v26.4s, #8
1386         shl     v26.4s, v26.4s, #24
1387         add     v4.4s, v7.4s, v4.4s
1388         orr     v5.16b, v5.16b, v31.16b
1389         eor     v0.16b, v3.16b, v0.16b
1390         orr     v26.16b, v26.16b, v11.16b
1391         eor     v25.16b, v4.16b, v25.16b
1392         add     v23.4s, v5.4s, v23.4s
1393         ushr    v11.4s, v0.4s, #7
1394         shl     v0.4s, v0.4s, #25
1395         add     v16.4s, v26.4s, v16.4s
1396         ushr    v31.4s, v25.4s, #7
1397         shl     v25.4s, v25.4s, #25
1398         eor     v1.16b, v23.16b, v1.16b
1399         orr     v0.16b, v0.16b, v11.16b
1400         add     v20.4s, v20.4s, v22.4s
1401         orr     v25.16b, v25.16b, v31.16b
1402         eor     v2.16b, v16.16b, v2.16b
1403         ushr    v31.4s, v1.4s, #7
1404         shl     v1.4s, v1.4s, #25
1405         add     v20.4s, v20.4s, v0.4s
1406         add     v19.4s, v19.4s, v9.4s
1407         mov     v29.16b, v14.16b
1408         ldr     q14, [sp, #128]
1409         ushr    v11.4s, v2.4s, #7
1410         shl     v2.4s, v2.4s, #25
1411         orr     v1.16b, v1.16b, v31.16b
1412         add     v18.4s, v18.4s, v14.4s
1413         eor     v26.16b, v20.16b, v26.16b
1414         add     v19.4s, v19.4s, v25.4s
1415         orr     v2.16b, v2.16b, v11.16b
1416         add     v17.4s, v17.4s, v27.4s
1417         add     v18.4s, v18.4s, v1.4s
1418         rev32   v26.8h, v26.8h
1419         eor     v5.16b, v19.16b, v5.16b
1420         add     v17.4s, v17.4s, v2.4s
1421         eor     v7.16b, v18.16b, v7.16b
1422         add     v23.4s, v23.4s, v26.4s
1423         rev32   v5.8h, v5.8h
1424         eor     v6.16b, v17.16b, v6.16b
1425         rev32   v7.8h, v7.8h
1426         eor     v0.16b, v23.16b, v0.16b
1427         add     v3.4s, v3.4s, v5.4s
1428         rev32   v6.8h, v6.8h
1429         add     v16.4s, v16.4s, v7.4s
1430         ushr    v31.4s, v0.4s, #12
1431         shl     v0.4s, v0.4s, #20
1432         eor     v25.16b, v3.16b, v25.16b
1433         add     v4.4s, v4.4s, v6.4s
1434         eor     v1.16b, v16.16b, v1.16b
1435         orr     v0.16b, v0.16b, v31.16b
1436         ushr    v31.4s, v25.4s, #12
1437         shl     v25.4s, v25.4s, #20
1438         add     v20.4s, v20.4s, v21.4s
1439         ushr    v11.4s, v1.4s, #12
1440         shl     v1.4s, v1.4s, #20
1441         eor     v2.16b, v4.16b, v2.16b
1442         orr     v25.16b, v25.16b, v31.16b
1443         add     v19.4s, v19.4s, v28.4s
1444         add     v20.4s, v20.4s, v0.4s
1445         mov     v12.16b, v27.16b
1446         ldur    q27, [x29, #-208]
1447         orr     v1.16b, v1.16b, v11.16b
1448         ushr    v11.4s, v2.4s, #12
1449         shl     v2.4s, v2.4s, #20
1450         add     v18.4s, v18.4s, v27.4s
1451         add     v19.4s, v19.4s, v25.4s
1452         eor     v26.16b, v20.16b, v26.16b
1453         orr     v2.16b, v2.16b, v11.16b
1454         add     v17.4s, v17.4s, v13.4s
1455         add     v18.4s, v18.4s, v1.4s
1456         eor     v5.16b, v19.16b, v5.16b
1457         ushr    v31.4s, v26.4s, #8
1458         shl     v26.4s, v26.4s, #24
1459         add     v17.4s, v17.4s, v2.4s
1460         ushr    v11.4s, v5.4s, #8
1461         shl     v5.4s, v5.4s, #24
1462         eor     v7.16b, v18.16b, v7.16b
1463         orr     v26.16b, v26.16b, v31.16b
1464         eor     v6.16b, v17.16b, v6.16b
1465         orr     v5.16b, v5.16b, v11.16b
1466         ushr    v31.4s, v7.4s, #8
1467         shl     v7.4s, v7.4s, #24
1468         add     v23.4s, v26.4s, v23.4s
1469         ushr    v11.4s, v6.4s, #8
1470         shl     v6.4s, v6.4s, #24
1471         orr     v7.16b, v7.16b, v31.16b
1472         add     v3.4s, v5.4s, v3.4s
1473         eor     v0.16b, v23.16b, v0.16b
1474         orr     v6.16b, v6.16b, v11.16b
1475         add     v16.4s, v7.4s, v16.4s
1476         eor     v25.16b, v3.16b, v25.16b
1477         ushr    v31.4s, v0.4s, #7
1478         shl     v0.4s, v0.4s, #25
1479         add     v4.4s, v6.4s, v4.4s
1480         ushr    v11.4s, v25.4s, #7
1481         shl     v25.4s, v25.4s, #25
1482         eor     v1.16b, v16.16b, v1.16b
1483         orr     v0.16b, v0.16b, v31.16b
1484         add     v18.4s, v18.4s, v8.4s
1485         eor     v2.16b, v4.16b, v2.16b
1486         orr     v25.16b, v25.16b, v11.16b
1487         ushr    v31.4s, v1.4s, #7
1488         shl     v1.4s, v1.4s, #25
1489         add     v20.4s, v20.4s, v29.4s
1490         add     v18.4s, v18.4s, v0.4s
1491         ushr    v11.4s, v2.4s, #7
1492         shl     v2.4s, v2.4s, #25
1493         orr     v1.16b, v1.16b, v31.16b
1494         add     v20.4s, v20.4s, v25.4s
1495         add     v17.4s, v17.4s, v15.4s
1496         eor     v6.16b, v6.16b, v18.16b
1497         orr     v2.16b, v2.16b, v11.16b
1498         add     v19.4s, v19.4s, v10.4s
1499         eor     v7.16b, v7.16b, v20.16b
1500         add     v17.4s, v17.4s, v1.4s
1501         rev32   v6.8h, v6.8h
1502         add     v19.4s, v19.4s, v2.4s
1503         rev32   v7.8h, v7.8h
1504         eor     v5.16b, v17.16b, v5.16b
1505         add     v3.4s, v3.4s, v6.4s
1506         eor     v26.16b, v19.16b, v26.16b
1507         add     v4.4s, v4.4s, v7.4s
1508         rev32   v5.8h, v5.8h
1509         eor     v0.16b, v3.16b, v0.16b
1510         rev32   v26.8h, v26.8h
1511         eor     v25.16b, v4.16b, v25.16b
1512         add     v23.4s, v23.4s, v5.4s
1513         ushr    v11.4s, v0.4s, #12
1514         shl     v0.4s, v0.4s, #20
1515         add     v16.4s, v16.4s, v26.4s
1516         ushr    v31.4s, v25.4s, #12
1517         shl     v25.4s, v25.4s, #20
1518         eor     v1.16b, v23.16b, v1.16b
1519         orr     v0.16b, v0.16b, v11.16b
1520         add     v18.4s, v18.4s, v14.4s
1521         mov     v30.16b, v29.16b
1522         mov     v29.16b, v15.16b
1523         ldr     q15, [sp, #144]
1524         orr     v25.16b, v25.16b, v31.16b
1525         eor     v2.16b, v16.16b, v2.16b
1526         ushr    v31.4s, v1.4s, #12
1527         shl     v1.4s, v1.4s, #20
1528         add     v20.4s, v20.4s, v15.4s
1529         add     v18.4s, v18.4s, v0.4s
1530         ushr    v11.4s, v2.4s, #12
1531         shl     v2.4s, v2.4s, #20
1532         orr     v1.16b, v1.16b, v31.16b
1533         add     v20.4s, v20.4s, v25.4s
1534         add     v17.4s, v17.4s, v24.4s
1535         eor     v6.16b, v18.16b, v6.16b
1536         orr     v2.16b, v2.16b, v11.16b
1537         add     v19.4s, v19.4s, v13.4s
1538         eor     v7.16b, v20.16b, v7.16b
1539         add     v17.4s, v17.4s, v1.4s
1540         ushr    v11.4s, v6.4s, #8
1541         shl     v6.4s, v6.4s, #24
1542         add     v19.4s, v19.4s, v2.4s
1543         ushr    v31.4s, v7.4s, #8
1544         shl     v7.4s, v7.4s, #24
1545         eor     v5.16b, v17.16b, v5.16b
1546         orr     v6.16b, v6.16b, v11.16b
1547         eor     v26.16b, v19.16b, v26.16b
1548         orr     v7.16b, v7.16b, v31.16b
1549         ushr    v31.4s, v5.4s, #8
1550         shl     v5.4s, v5.4s, #24
1551         add     v3.4s, v6.4s, v3.4s
1552         ushr    v11.4s, v26.4s, #8
1553         shl     v26.4s, v26.4s, #24
1554         add     v4.4s, v7.4s, v4.4s
1555         orr     v5.16b, v5.16b, v31.16b
1556         eor     v0.16b, v3.16b, v0.16b
1557         orr     v26.16b, v26.16b, v11.16b
1558         eor     v25.16b, v4.16b, v25.16b
1559         add     v23.4s, v5.4s, v23.4s
1560         ushr    v11.4s, v0.4s, #7
1561         shl     v0.4s, v0.4s, #25
1562         mov     v9.16b, v28.16b
1563         mov     v28.16b, v10.16b
1564         ldr     q10, [sp, #176]
1565         add     v16.4s, v26.4s, v16.4s
1566         ushr    v31.4s, v25.4s, #7
1567         shl     v25.4s, v25.4s, #25
1568         eor     v1.16b, v23.16b, v1.16b
1569         orr     v0.16b, v0.16b, v11.16b
1570         add     v20.4s, v20.4s, v10.4s
1571         orr     v25.16b, v25.16b, v31.16b
1572         eor     v2.16b, v16.16b, v2.16b
1573         ushr    v31.4s, v1.4s, #7
1574         shl     v1.4s, v1.4s, #25
1575         add     v20.4s, v20.4s, v0.4s
1576         add     v19.4s, v19.4s, v9.4s
1577         ushr    v11.4s, v2.4s, #7
1578         shl     v2.4s, v2.4s, #25
1579         orr     v1.16b, v1.16b, v31.16b
1580         add     v18.4s, v18.4s, v12.4s
1581         eor     v26.16b, v20.16b, v26.16b
1582         add     v19.4s, v19.4s, v25.4s
1583         orr     v2.16b, v2.16b, v11.16b
1584         add     v17.4s, v17.4s, v21.4s
1585         add     v18.4s, v18.4s, v1.4s
1586         rev32   v26.8h, v26.8h
1587         eor     v5.16b, v19.16b, v5.16b
1588         add     v17.4s, v17.4s, v2.4s
1589         eor     v7.16b, v18.16b, v7.16b
1590         add     v23.4s, v23.4s, v26.4s
1591         rev32   v5.8h, v5.8h
1592         eor     v6.16b, v17.16b, v6.16b
1593         rev32   v7.8h, v7.8h
1594         eor     v0.16b, v23.16b, v0.16b
1595         add     v3.4s, v3.4s, v5.4s
1596         rev32   v6.8h, v6.8h
1597         add     v16.4s, v16.4s, v7.4s
1598         ushr    v31.4s, v0.4s, #12
1599         shl     v0.4s, v0.4s, #20
1600         eor     v25.16b, v3.16b, v25.16b
1601         add     v4.4s, v4.4s, v6.4s
1602         eor     v1.16b, v16.16b, v1.16b
1603         orr     v0.16b, v0.16b, v31.16b
1604         ushr    v31.4s, v25.4s, #12
1605         shl     v25.4s, v25.4s, #20
1606         ushr    v11.4s, v1.4s, #12
1607         shl     v1.4s, v1.4s, #20
1608         eor     v2.16b, v4.16b, v2.16b
1609         add     v20.4s, v20.4s, v27.4s
1610         orr     v25.16b, v25.16b, v31.16b
1611         add     v19.4s, v19.4s, v22.4s
1612         mov     v9.16b, v22.16b
1613         ldur    q22, [x29, #-240]
1614         orr     v1.16b, v1.16b, v11.16b
1615         ushr    v11.4s, v2.4s, #12
1616         shl     v2.4s, v2.4s, #20
1617         add     v20.4s, v20.4s, v0.4s
1618         add     v18.4s, v18.4s, v22.4s
1619         add     v19.4s, v19.4s, v25.4s
1620         mov     v24.16b, v21.16b
1621         ldur    q21, [x29, #-192]
1622         orr     v2.16b, v2.16b, v11.16b
1623         eor     v26.16b, v20.16b, v26.16b
1624         add     v17.4s, v17.4s, v21.4s
1625         add     v18.4s, v18.4s, v1.4s
1626         eor     v5.16b, v19.16b, v5.16b
1627         ushr    v31.4s, v26.4s, #8
1628         add     v17.4s, v17.4s, v2.4s
1629         shl     v26.4s, v26.4s, #24
1630         ushr    v11.4s, v5.4s, #8
1631         shl     v5.4s, v5.4s, #24
1632         eor     v7.16b, v18.16b, v7.16b
1633         orr     v26.16b, v26.16b, v31.16b
1634         eor     v6.16b, v17.16b, v6.16b
1635         orr     v5.16b, v5.16b, v11.16b
1636         ushr    v31.4s, v7.4s, #8
1637         shl     v7.4s, v7.4s, #24
1638         ushr    v11.4s, v6.4s, #8
1639         shl     v6.4s, v6.4s, #24
1640         add     v23.4s, v26.4s, v23.4s
1641         orr     v7.16b, v7.16b, v31.16b
1642         add     v3.4s, v5.4s, v3.4s
1643         orr     v6.16b, v6.16b, v11.16b
1644         eor     v0.16b, v23.16b, v0.16b
1645         add     v16.4s, v7.4s, v16.4s
1646         eor     v25.16b, v3.16b, v25.16b
1647         add     v4.4s, v6.4s, v4.4s
1648         ushr    v31.4s, v0.4s, #7
1649         shl     v0.4s, v0.4s, #25
1650         ushr    v11.4s, v25.4s, #7
1651         shl     v25.4s, v25.4s, #25
1652         eor     v1.16b, v16.16b, v1.16b
1653         orr     v0.16b, v0.16b, v31.16b
1654         eor     v2.16b, v4.16b, v2.16b
1655         orr     v25.16b, v25.16b, v11.16b
1656         ushr    v31.4s, v1.4s, #7
1657         shl     v1.4s, v1.4s, #25
1658         add     v20.4s, v20.4s, v8.4s
1659         add     v18.4s, v18.4s, v14.4s
1660         ushr    v11.4s, v2.4s, #7
1661         shl     v2.4s, v2.4s, #25
1662         orr     v1.16b, v1.16b, v31.16b
1663         add     v20.4s, v20.4s, v25.4s
1664         add     v17.4s, v17.4s, v13.4s
1665         add     v18.4s, v18.4s, v0.4s
1666         orr     v2.16b, v2.16b, v11.16b
1667         add     v19.4s, v19.4s, v29.4s
1668         eor     v7.16b, v7.16b, v20.16b
1669         add     v17.4s, v17.4s, v1.4s
1670         eor     v6.16b, v6.16b, v18.16b
1671         add     v19.4s, v19.4s, v2.4s
1672         rev32   v7.8h, v7.8h
1673         eor     v5.16b, v17.16b, v5.16b
1674         rev32   v6.8h, v6.8h
1675         eor     v26.16b, v19.16b, v26.16b
1676         add     v4.4s, v4.4s, v7.4s
1677         rev32   v5.8h, v5.8h
1678         add     v3.4s, v3.4s, v6.4s
1679         rev32   v26.8h, v26.8h
1680         eor     v25.16b, v4.16b, v25.16b
1681         add     v23.4s, v23.4s, v5.4s
1682         eor     v0.16b, v3.16b, v0.16b
1683         add     v16.4s, v16.4s, v26.4s
1684         ushr    v31.4s, v25.4s, #12
1685         shl     v25.4s, v25.4s, #20
1686         ushr    v11.4s, v0.4s, #12
1687         shl     v0.4s, v0.4s, #20
1688         eor     v1.16b, v23.16b, v1.16b
1689         orr     v25.16b, v25.16b, v31.16b
1690         eor     v2.16b, v16.16b, v2.16b
1691         orr     v0.16b, v0.16b, v11.16b
1692         ushr    v31.4s, v1.4s, #12
1693         shl     v1.4s, v1.4s, #20
1694         add     v20.4s, v20.4s, v28.4s
1695         add     v18.4s, v18.4s, v12.4s
1696         ushr    v11.4s, v2.4s, #12
1697         shl     v2.4s, v2.4s, #20
1698         orr     v1.16b, v1.16b, v31.16b
1699         add     v20.4s, v20.4s, v25.4s
1700         add     v17.4s, v17.4s, v30.4s
1701         add     v18.4s, v18.4s, v0.4s
1702         orr     v2.16b, v2.16b, v11.16b
1703         add     v19.4s, v19.4s, v21.4s
1704         eor     v7.16b, v20.16b, v7.16b
1705         add     v17.4s, v17.4s, v1.4s
1706         eor     v6.16b, v18.16b, v6.16b
1707         add     v19.4s, v19.4s, v2.4s
1708         ushr    v31.4s, v7.4s, #8
1709         shl     v7.4s, v7.4s, #24
1710         ushr    v11.4s, v6.4s, #8
1711         shl     v6.4s, v6.4s, #24
1712         eor     v5.16b, v17.16b, v5.16b
1713         orr     v7.16b, v7.16b, v31.16b
1714         eor     v26.16b, v19.16b, v26.16b
1715         orr     v6.16b, v6.16b, v11.16b
1716         ushr    v31.4s, v5.4s, #8
1717         shl     v5.4s, v5.4s, #24
1718         ushr    v11.4s, v26.4s, #8
1719         shl     v26.4s, v26.4s, #24
1720         add     v4.4s, v7.4s, v4.4s
1721         orr     v5.16b, v5.16b, v31.16b
1722         add     v3.4s, v6.4s, v3.4s
1723         orr     v26.16b, v26.16b, v11.16b
1724         eor     v25.16b, v4.16b, v25.16b
1725         add     v23.4s, v5.4s, v23.4s
1726         eor     v0.16b, v3.16b, v0.16b
1727         add     v16.4s, v26.4s, v16.4s
1728         ushr    v31.4s, v25.4s, #7
1729         shl     v25.4s, v25.4s, #25
1730         ushr    v11.4s, v0.4s, #7
1731         shl     v0.4s, v0.4s, #25
1732         eor     v1.16b, v23.16b, v1.16b
1733         orr     v25.16b, v25.16b, v31.16b
1734         eor     v2.16b, v16.16b, v2.16b
1735         orr     v0.16b, v0.16b, v11.16b
1736         ushr    v31.4s, v1.4s, #7
1737         shl     v1.4s, v1.4s, #25
1738         add     v20.4s, v20.4s, v15.4s
1739         ushr    v11.4s, v2.4s, #7
1740         shl     v2.4s, v2.4s, #25
1741         orr     v1.16b, v1.16b, v31.16b
1742         add     v18.4s, v18.4s, v24.4s
1743         add     v20.4s, v20.4s, v0.4s
1744         add     v19.4s, v19.4s, v9.4s
1745         mov     v8.16b, v13.16b
1746         ldur    q13, [x29, #-208]
1747         orr     v2.16b, v2.16b, v11.16b
1748         add     v18.4s, v18.4s, v1.4s
1749         add     v17.4s, v17.4s, v13.4s
1750         eor     v26.16b, v20.16b, v26.16b
1751         add     v19.4s, v19.4s, v25.4s
1752         eor     v7.16b, v18.16b, v7.16b
1753         add     v17.4s, v17.4s, v2.4s
1754         rev32   v26.8h, v26.8h
1755         eor     v5.16b, v19.16b, v5.16b
1756         rev32   v7.8h, v7.8h
1757         eor     v6.16b, v17.16b, v6.16b
1758         add     v23.4s, v23.4s, v26.4s
1759         rev32   v5.8h, v5.8h
1760         add     v16.4s, v16.4s, v7.4s
1761         rev32   v6.8h, v6.8h
1762         eor     v0.16b, v23.16b, v0.16b
1763         add     v3.4s, v3.4s, v5.4s
1764         eor     v1.16b, v16.16b, v1.16b
1765         add     v4.4s, v4.4s, v6.4s
1766         ushr    v31.4s, v0.4s, #12
1767         shl     v0.4s, v0.4s, #20
1768         eor     v25.16b, v3.16b, v25.16b
1769         ushr    v11.4s, v1.4s, #12
1770         shl     v1.4s, v1.4s, #20
1771         orr     v0.16b, v0.16b, v31.16b
1772         eor     v2.16b, v4.16b, v2.16b
1773         ushr    v31.4s, v25.4s, #12
1774         shl     v25.4s, v25.4s, #20
1775         orr     v1.16b, v1.16b, v11.16b
1776         ushr    v11.4s, v2.4s, #12
1777         shl     v2.4s, v2.4s, #20
1778         add     v20.4s, v20.4s, v22.4s
1779         orr     v25.16b, v25.16b, v31.16b
1780         add     v19.4s, v19.4s, v10.4s
1781         mov     v27.16b, v12.16b
1782         mov     v12.16b, v30.16b
1783         mov     v29.16b, v21.16b
1784         mov     v21.16b, v24.16b
1785         ldr     q24, [sp, #192]
1786         mov     v30.16b, v22.16b
1787         ldr     q22, [sp, #256]
1788         orr     v2.16b, v2.16b, v11.16b
1789         add     v20.4s, v20.4s, v0.4s
1790         add     v18.4s, v18.4s, v24.4s
1791         add     v19.4s, v19.4s, v25.4s
1792         add     v17.4s, v17.4s, v22.4s
1793         eor     v26.16b, v20.16b, v26.16b
1794         add     v18.4s, v18.4s, v1.4s
1795         eor     v5.16b, v19.16b, v5.16b
1796         add     v17.4s, v17.4s, v2.4s
1797         ushr    v31.4s, v26.4s, #8
1798         shl     v26.4s, v26.4s, #24
1799         ushr    v11.4s, v5.4s, #8
1800         shl     v5.4s, v5.4s, #24
1801         eor     v7.16b, v18.16b, v7.16b
1802         eor     v6.16b, v17.16b, v6.16b
1803         orr     v26.16b, v26.16b, v31.16b
1804         orr     v5.16b, v5.16b, v11.16b
1805         ushr    v31.4s, v7.4s, #8
1806         shl     v7.4s, v7.4s, #24
1807         ushr    v11.4s, v6.4s, #8
1808         shl     v6.4s, v6.4s, #24
1809         add     v23.4s, v26.4s, v23.4s
1810         orr     v7.16b, v7.16b, v31.16b
1811         add     v3.4s, v5.4s, v3.4s
1812         orr     v6.16b, v6.16b, v11.16b
1813         eor     v0.16b, v23.16b, v0.16b
1814         add     v16.4s, v7.4s, v16.4s
1815         eor     v25.16b, v3.16b, v25.16b
1816         add     v4.4s, v6.4s, v4.4s
1817         ushr    v31.4s, v0.4s, #7
1818         shl     v0.4s, v0.4s, #25
1819         ushr    v11.4s, v25.4s, #7
1820         shl     v25.4s, v25.4s, #25
1821         eor     v1.16b, v16.16b, v1.16b
1822         eor     v2.16b, v4.16b, v2.16b
1823         orr     v0.16b, v0.16b, v31.16b
1824         orr     v25.16b, v25.16b, v11.16b
1825         ushr    v31.4s, v1.4s, #7
1826         shl     v1.4s, v1.4s, #25
1827         ushr    v11.4s, v2.4s, #7
1828         shl     v2.4s, v2.4s, #25
1829         add     v20.4s, v20.4s, v14.4s
1830         add     v18.4s, v18.4s, v27.4s
1831         ldr     q27, [sp, #224]
1832         orr     v1.16b, v1.16b, v31.16b
1833         orr     v2.16b, v2.16b, v11.16b
1834         add     v20.4s, v20.4s, v25.4s
1835         add     v17.4s, v17.4s, v29.4s
1836         add     v18.4s, v18.4s, v0.4s
1837         add     v19.4s, v19.4s, v8.4s
1838         eor     v7.16b, v7.16b, v20.16b
1839         add     v17.4s, v17.4s, v1.4s
1840         eor     v6.16b, v6.16b, v18.16b
1841         add     v19.4s, v19.4s, v2.4s
1842         rev32   v7.8h, v7.8h
1843         eor     v5.16b, v17.16b, v5.16b
1844         rev32   v6.8h, v6.8h
1845         eor     v26.16b, v19.16b, v26.16b
1846         add     v4.4s, v4.4s, v7.4s
1847         rev32   v5.8h, v5.8h
1848         add     v3.4s, v3.4s, v6.4s
1849         rev32   v26.8h, v26.8h
1850         eor     v25.16b, v4.16b, v25.16b
1851         add     v23.4s, v23.4s, v5.4s
1852         eor     v0.16b, v3.16b, v0.16b
1853         add     v16.4s, v16.4s, v26.4s
1854         ushr    v29.4s, v25.4s, #12
1855         shl     v25.4s, v25.4s, #20
1856         ushr    v31.4s, v0.4s, #12
1857         shl     v0.4s, v0.4s, #20
1858         eor     v1.16b, v23.16b, v1.16b
1859         eor     v2.16b, v16.16b, v2.16b
1860         orr     v25.16b, v25.16b, v29.16b
1861         orr     v0.16b, v0.16b, v31.16b
1862         ushr    v29.4s, v1.4s, #12
1863         shl     v1.4s, v1.4s, #20
1864         ushr    v31.4s, v2.4s, #12
1865         shl     v2.4s, v2.4s, #20
1866         add     v18.4s, v18.4s, v21.4s
1867         ldr     q21, [sp, #240]
1868         add     v20.4s, v20.4s, v27.4s
1869         prfm    pldl1keep, [x17, #256]
1870         orr     v1.16b, v1.16b, v29.16b
1871         prfm    pldl1keep, [x21, #256]
1872         orr     v2.16b, v2.16b, v31.16b
1873         prfm    pldl1keep, [x16, #256]
1874         add     v18.4s, v18.4s, v0.4s
1875         prfm    pldl1keep, [x6, #256]
1876         add     v17.4s, v17.4s, v21.4s
1877         add     v19.4s, v19.4s, v22.4s
1878         add     v20.4s, v20.4s, v25.4s
1879         eor     v6.16b, v18.16b, v6.16b
1880         add     v17.4s, v17.4s, v1.4s
1881         add     v19.4s, v19.4s, v2.4s
1882         eor     v7.16b, v20.16b, v7.16b
1883         ushr    v22.4s, v6.4s, #8
1884         shl     v6.4s, v6.4s, #24
1885         eor     v5.16b, v17.16b, v5.16b
1886         eor     v26.16b, v19.16b, v26.16b
1887         ushr    v21.4s, v7.4s, #8
1888         shl     v7.4s, v7.4s, #24
1889         orr     v6.16b, v6.16b, v22.16b
1890         ushr    v22.4s, v5.4s, #8
1891         shl     v5.4s, v5.4s, #24
1892         ushr    v29.4s, v26.4s, #8
1893         shl     v26.4s, v26.4s, #24
1894         orr     v7.16b, v7.16b, v21.16b
1895         orr     v5.16b, v5.16b, v22.16b
1896         add     v3.4s, v6.4s, v3.4s
1897         orr     v21.16b, v26.16b, v29.16b
1898         add     v4.4s, v7.4s, v4.4s
1899         add     v22.4s, v5.4s, v23.4s
1900         eor     v0.16b, v3.16b, v0.16b
1901         add     v16.4s, v21.4s, v16.4s
1902         eor     v23.16b, v4.16b, v25.16b
1903         eor     v1.16b, v22.16b, v1.16b
1904         ushr    v25.4s, v0.4s, #7
1905         shl     v0.4s, v0.4s, #25
1906         eor     v2.16b, v16.16b, v2.16b
1907         ushr    v26.4s, v23.4s, #7
1908         shl     v23.4s, v23.4s, #25
1909         orr     v0.16b, v0.16b, v25.16b
1910         ushr    v25.4s, v1.4s, #7
1911         shl     v1.4s, v1.4s, #25
1912         ushr    v29.4s, v2.4s, #7
1913         shl     v2.4s, v2.4s, #25
1914         add     v20.4s, v20.4s, v28.4s
1915         orr     v23.16b, v23.16b, v26.16b
1916         orr     v1.16b, v1.16b, v25.16b
1917         orr     v2.16b, v2.16b, v29.16b
1918         add     v20.4s, v20.4s, v0.4s
1919         add     v18.4s, v18.4s, v13.4s
1920         add     v17.4s, v17.4s, v30.4s
1921         add     v19.4s, v19.4s, v10.4s
1922         eor     v21.16b, v20.16b, v21.16b
1923         add     v18.4s, v18.4s, v1.4s
1924         add     v17.4s, v17.4s, v2.4s
1925         add     v19.4s, v19.4s, v23.4s
1926         rev32   v21.8h, v21.8h
1927         eor     v7.16b, v18.16b, v7.16b
1928         eor     v6.16b, v17.16b, v6.16b
1929         eor     v5.16b, v19.16b, v5.16b
1930         add     v22.4s, v22.4s, v21.4s
1931         rev32   v7.8h, v7.8h
1932         rev32   v6.8h, v6.8h
1933         rev32   v5.8h, v5.8h
1934         eor     v0.16b, v22.16b, v0.16b
1935         add     v16.4s, v16.4s, v7.4s
1936         add     v4.4s, v4.4s, v6.4s
1937         add     v3.4s, v3.4s, v5.4s
1938         ushr    v25.4s, v0.4s, #12
1939         shl     v0.4s, v0.4s, #20
1940         eor     v1.16b, v16.16b, v1.16b
1941         eor     v2.16b, v4.16b, v2.16b
1942         eor     v23.16b, v3.16b, v23.16b
1943         orr     v0.16b, v0.16b, v25.16b
1944         ushr    v25.4s, v1.4s, #12
1945         shl     v1.4s, v1.4s, #20
1946         ushr    v26.4s, v2.4s, #12
1947         shl     v2.4s, v2.4s, #20
1948         ushr    v27.4s, v23.4s, #12
1949         shl     v23.4s, v23.4s, #20
1950         orr     v1.16b, v1.16b, v25.16b
1951         add     v20.4s, v20.4s, v24.4s
1952         orr     v2.16b, v2.16b, v26.16b
1953         orr     v23.16b, v23.16b, v27.16b
1954         add     v18.4s, v18.4s, v12.4s
1955         add     v17.4s, v17.4s, v9.4s
1956         add     v19.4s, v19.4s, v15.4s
1957         add     v20.4s, v20.4s, v0.4s
1958         add     v18.4s, v18.4s, v1.4s
1959         add     v17.4s, v17.4s, v2.4s
1960         add     v19.4s, v19.4s, v23.4s
1961         eor     v21.16b, v20.16b, v21.16b
1962         eor     v7.16b, v18.16b, v7.16b
1963         eor     v6.16b, v17.16b, v6.16b
1964         eor     v5.16b, v19.16b, v5.16b
1965         ushr    v24.4s, v21.4s, #8
1966         shl     v21.4s, v21.4s, #24
1967         ushr    v25.4s, v7.4s, #8
1968         shl     v7.4s, v7.4s, #24
1969         ushr    v26.4s, v6.4s, #8
1970         shl     v6.4s, v6.4s, #24
1971         ushr    v27.4s, v5.4s, #8
1972         shl     v5.4s, v5.4s, #24
1973         orr     v21.16b, v21.16b, v24.16b
1974         orr     v7.16b, v7.16b, v25.16b
1975         orr     v6.16b, v6.16b, v26.16b
1976         orr     v5.16b, v5.16b, v27.16b
1977         add     v22.4s, v21.4s, v22.4s
1978         add     v16.4s, v7.4s, v16.4s
1979         add     v4.4s, v6.4s, v4.4s
1980         add     v3.4s, v5.4s, v3.4s
1981         eor     v0.16b, v22.16b, v0.16b
1982         eor     v1.16b, v16.16b, v1.16b
1983         eor     v2.16b, v4.16b, v2.16b
1984         eor     v23.16b, v3.16b, v23.16b
1985         ushr    v24.4s, v0.4s, #7
1986         shl     v0.4s, v0.4s, #25
1987         ushr    v25.4s, v1.4s, #7
1988         shl     v1.4s, v1.4s, #25
1989         ushr    v26.4s, v2.4s, #7
1990         shl     v2.4s, v2.4s, #25
1991         ushr    v27.4s, v23.4s, #7
1992         shl     v23.4s, v23.4s, #25
1993         orr     v0.16b, v0.16b, v24.16b
1994         orr     v1.16b, v1.16b, v25.16b
1995         orr     v2.16b, v2.16b, v26.16b
1996         orr     v23.16b, v23.16b, v27.16b
1997         movi    v24.4s, #64
1998         eor     v12.16b, v4.16b, v20.16b
1999         eor     v31.16b, v18.16b, v3.16b
2000         eor     v29.16b, v17.16b, v22.16b
2001         eor     v30.16b, v16.16b, v19.16b
2002         eor     v28.16b, v7.16b, v23.16b
2003         eor     v23.16b, v6.16b, v0.16b
2004         eor     v13.16b, v1.16b, v5.16b
2005         eor     v25.16b, v2.16b, v21.16b
2006         cbnz    x15, .LBB3_5
2007         b       .LBB3_2
2008 .LBB3_6:
2009         cbz     x24, .LBB3_14
2010         orr     w8, w7, w19
2011         and     x22, x5, #0x1
2012         stur    w8, [x29, #-192]
2013 .LBB3_8:
2014         ldr     x8, [sp, #40]
2015         mov     x28, x0
2016         ldr     x25, [x0]
2017         mov     x23, x2
2018         ldur    w5, [x29, #-192]
2019         ldp     q0, q1, [x8]
2020         mov     x8, x2
2021         b       .LBB3_11
2022 .LBB3_9:
2023         orr     w5, w5, w27
2024 .LBB3_10:
2025         sub     x0, x29, #144
2026         sub     x1, x29, #176
2027         mov     x2, x25
2028         mov     w3, #64
2029         mov     x4, x20
2030         bl      compress_pre
2031         ldp     q0, q1, [x29, #-144]
2032         add     x25, x25, #64
2033         mov     x8, x21
2034         mov     w5, w19
2035         ldp     q2, q3, [x29, #-112]
2036         eor     v0.16b, v2.16b, v0.16b
2037         eor     v1.16b, v3.16b, v1.16b
2038 .LBB3_11:
2039         subs    x21, x8, #1
2040         stp     q0, q1, [x29, #-176]
2041         b.eq    .LBB3_9
2042         cbnz    x8, .LBB3_10
2043         ldp     q1, q0, [x29, #-176]
2044         mov     x0, x28
2045         add     x20, x20, x22
2046         add     x0, x28, #8
2047         subs    x24, x24, #1
2048         mov     x2, x23
2049         stp     q1, q0, [x26], #32
2050         b.ne    .LBB3_8
2051 .LBB3_14:
2052         add     sp, sp, #464
2053         ldp     x20, x19, [sp, #144]
2054         ldp     x22, x21, [sp, #128]
2055         ldp     x24, x23, [sp, #112]
2056         ldp     x26, x25, [sp, #96]
2057         ldp     x28, x27, [sp, #80]
2058         ldp     x29, x30, [sp, #64]
2059         ldp     d9, d8, [sp, #48]
2060         ldp     d11, d10, [sp, #32]
2061         ldp     d13, d12, [sp, #16]
2062         ldp     d15, d14, [sp], #160
2063         hint    #29
2064         ret
2065 .Lfunc_end3:
2066         .size   zfs_blake3_hash_many_sse2, .Lfunc_end3-zfs_blake3_hash_many_sse2
2067         .cfi_endproc
2068         .section        ".note.GNU-stack","",@progbits
2069 #endif