4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2022 Samuel Neves
25 * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de>
27 * This is converted assembly: SSE4.1 -> ARMv8-A
28 * Used tools: SIMDe https://github.com/simd-everywhere/simde
30 * Should work on FreeBSD, Linux and macOS
31 * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh
34 #if defined(__aarch64__)
36 .section .note.gnu.property,"a",@note
48 .globl zfs_blake3_compress_in_place_sse41
50 .type zfs_blake3_compress_in_place_sse41,@function
51 zfs_blake3_compress_in_place_sse41:
56 stp x29, x30, [sp, #64]
73 eor v0.16b, v2.16b, v0.16b
74 eor v1.16b, v3.16b, v1.16b
75 ldp x29, x30, [sp, #64]
82 .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41
85 .section .rodata.cst16,"aM",@progbits,16
88 .xword -4942790177982912921
89 .xword -6534734903820487822
126 .type compress_pre,@function
131 movi d0, #0x0000ff000000ff
136 ldr q4, [x8, :lo12:.LCPI1_0]
139 and v0.8b, v1.8b, v0.8b
141 stp q5, q4, [x0, #16]
145 uzp1 v3.4s, v6.4s, v7.4s
146 add v0.4s, v2.4s, v3.4s
147 uzp2 v2.4s, v6.4s, v7.4s
148 add v16.4s, v0.4s, v5.4s
149 ldr q0, [x8, :lo12:.LCPI1_1]
151 eor v1.16b, v16.16b, v1.16b
152 add v7.4s, v16.4s, v2.4s
153 tbl v1.16b, { v1.16b }, v0.16b
154 add v4.4s, v1.4s, v4.4s
155 eor v5.16b, v4.16b, v5.16b
156 ushr v6.4s, v5.4s, #12
157 shl v5.4s, v5.4s, #20
158 orr v5.16b, v5.16b, v6.16b
159 add v6.4s, v7.4s, v5.4s
160 eor v7.16b, v1.16b, v6.16b
161 ldr q1, [x8, :lo12:.LCPI1_2]
163 tbl v7.16b, { v7.16b }, v1.16b
164 ld2 { v16.4s, v17.4s }, [x8]
165 add v4.4s, v4.4s, v7.4s
166 ext v7.16b, v7.16b, v7.16b, #8
167 add v6.4s, v6.4s, v16.4s
168 eor v5.16b, v4.16b, v5.16b
169 ext v4.16b, v4.16b, v4.16b, #4
170 ext v16.16b, v16.16b, v16.16b, #12
171 ext v6.16b, v6.16b, v6.16b, #12
172 ushr v18.4s, v5.4s, #7
173 shl v5.4s, v5.4s, #25
174 orr v5.16b, v5.16b, v18.16b
175 ext v18.16b, v17.16b, v17.16b, #12
176 add v6.4s, v6.4s, v5.4s
178 eor v7.16b, v7.16b, v6.16b
179 add v6.4s, v6.4s, v18.4s
180 mov v17.s[1], v16.s[2]
181 tbl v7.16b, { v7.16b }, v0.16b
182 add v4.4s, v4.4s, v7.4s
183 eor v5.16b, v4.16b, v5.16b
184 ushr v19.4s, v5.4s, #12
185 shl v5.4s, v5.4s, #20
186 orr v5.16b, v5.16b, v19.16b
187 uzp1 v19.4s, v3.4s, v3.4s
188 add v6.4s, v6.4s, v5.4s
189 ext v19.16b, v19.16b, v3.16b, #8
190 eor v7.16b, v7.16b, v6.16b
191 uzp2 v19.4s, v19.4s, v2.4s
192 tbl v7.16b, { v7.16b }, v1.16b
193 add v6.4s, v6.4s, v19.4s
194 add v4.4s, v4.4s, v7.4s
195 ext v6.16b, v6.16b, v6.16b, #4
196 ext v7.16b, v7.16b, v7.16b, #8
197 eor v5.16b, v4.16b, v5.16b
198 ext v4.16b, v4.16b, v4.16b, #12
199 ushr v20.4s, v5.4s, #7
200 shl v5.4s, v5.4s, #25
201 orr v5.16b, v5.16b, v20.16b
202 ext v20.16b, v3.16b, v3.16b, #12
203 add v6.4s, v6.4s, v5.4s
204 ext v3.16b, v3.16b, v20.16b, #12
205 eor v7.16b, v7.16b, v6.16b
207 tbl v7.16b, { v7.16b }, v0.16b
208 trn2 v3.4s, v3.4s, v17.4s
209 add v4.4s, v4.4s, v7.4s
210 add v6.4s, v6.4s, v3.4s
211 eor v5.16b, v4.16b, v5.16b
212 ushr v17.4s, v5.4s, #12
213 shl v5.4s, v5.4s, #20
214 orr v5.16b, v5.16b, v17.16b
215 zip1 v17.2d, v18.2d, v2.2d
216 zip2 v2.4s, v2.4s, v18.4s
217 add v6.4s, v6.4s, v5.4s
218 mov v17.s[3], v16.s[3]
219 zip1 v18.4s, v2.4s, v16.4s
220 zip1 v2.4s, v16.4s, v2.4s
221 eor v7.16b, v7.16b, v6.16b
222 ext v6.16b, v6.16b, v6.16b, #12
223 ext v16.16b, v2.16b, v18.16b, #8
224 tbl v7.16b, { v7.16b }, v1.16b
225 add v20.4s, v4.4s, v7.4s
226 ext v4.16b, v17.16b, v17.16b, #12
227 ext v7.16b, v7.16b, v7.16b, #8
228 eor v5.16b, v20.16b, v5.16b
229 uzp1 v4.4s, v17.4s, v4.4s
230 ushr v17.4s, v5.4s, #7
231 shl v5.4s, v5.4s, #25
232 add v6.4s, v6.4s, v4.4s
233 orr v5.16b, v5.16b, v17.16b
234 ext v17.16b, v20.16b, v20.16b, #4
235 add v6.4s, v6.4s, v5.4s
236 eor v7.16b, v7.16b, v6.16b
237 add v6.4s, v6.4s, v16.4s
238 tbl v7.16b, { v7.16b }, v0.16b
239 add v17.4s, v17.4s, v7.4s
240 eor v5.16b, v17.16b, v5.16b
241 ushr v2.4s, v5.4s, #12
242 shl v5.4s, v5.4s, #20
243 orr v2.16b, v5.16b, v2.16b
244 add v5.4s, v6.4s, v2.4s
245 ext v6.16b, v19.16b, v19.16b, #4
246 eor v7.16b, v7.16b, v5.16b
247 uzp1 v18.4s, v6.4s, v6.4s
248 tbl v7.16b, { v7.16b }, v1.16b
249 ext v18.16b, v18.16b, v6.16b, #8
250 add v17.4s, v17.4s, v7.4s
251 uzp2 v18.4s, v18.4s, v3.4s
252 ext v7.16b, v7.16b, v7.16b, #8
253 eor v2.16b, v17.16b, v2.16b
254 add v5.4s, v5.4s, v18.4s
255 ext v17.16b, v17.16b, v17.16b, #12
256 ushr v19.4s, v2.4s, #7
257 shl v2.4s, v2.4s, #25
258 ext v5.16b, v5.16b, v5.16b, #4
259 orr v2.16b, v2.16b, v19.16b
260 ext v19.16b, v6.16b, v6.16b, #12
261 add v5.4s, v5.4s, v2.4s
262 ext v6.16b, v6.16b, v19.16b, #12
264 eor v7.16b, v7.16b, v5.16b
266 mov v19.s[1], v4.s[2]
267 tbl v7.16b, { v7.16b }, v0.16b
268 add v17.4s, v17.4s, v7.4s
269 eor v20.16b, v17.16b, v2.16b
270 trn2 v2.4s, v6.4s, v19.4s
271 ushr v6.4s, v20.4s, #12
272 shl v19.4s, v20.4s, #20
273 add v5.4s, v5.4s, v2.4s
274 orr v6.16b, v19.16b, v6.16b
275 add v19.4s, v5.4s, v6.4s
276 eor v5.16b, v7.16b, v19.16b
277 zip1 v7.2d, v16.2d, v3.2d
278 zip2 v3.4s, v3.4s, v16.4s
279 tbl v20.16b, { v5.16b }, v1.16b
281 add v17.4s, v17.4s, v20.4s
282 ext v5.16b, v7.16b, v7.16b, #12
283 eor v6.16b, v17.16b, v6.16b
284 uzp1 v5.4s, v7.4s, v5.4s
285 ext v7.16b, v19.16b, v19.16b, #12
286 ext v17.16b, v17.16b, v17.16b, #4
287 ushr v19.4s, v6.4s, #7
288 shl v6.4s, v6.4s, #25
289 add v7.4s, v7.4s, v5.4s
290 orr v6.16b, v6.16b, v19.16b
291 ext v19.16b, v20.16b, v20.16b, #8
292 add v7.4s, v7.4s, v6.4s
293 eor v19.16b, v19.16b, v7.16b
294 tbl v19.16b, { v19.16b }, v0.16b
295 add v16.4s, v17.4s, v19.4s
296 zip1 v17.4s, v3.4s, v4.4s
297 zip1 v3.4s, v4.4s, v3.4s
298 eor v4.16b, v16.16b, v6.16b
299 ext v17.16b, v3.16b, v17.16b, #8
300 ushr v3.4s, v4.4s, #12
301 shl v4.4s, v4.4s, #20
302 add v6.4s, v7.4s, v17.4s
303 orr v3.16b, v4.16b, v3.16b
304 add v4.4s, v6.4s, v3.4s
305 ext v6.16b, v18.16b, v18.16b, #4
306 eor v7.16b, v19.16b, v4.16b
307 uzp1 v18.4s, v6.4s, v6.4s
308 tbl v7.16b, { v7.16b }, v1.16b
309 ext v18.16b, v18.16b, v6.16b, #8
310 add v16.4s, v16.4s, v7.4s
311 uzp2 v18.4s, v18.4s, v2.4s
312 ext v7.16b, v7.16b, v7.16b, #8
313 eor v3.16b, v16.16b, v3.16b
314 add v4.4s, v4.4s, v18.4s
315 ext v16.16b, v16.16b, v16.16b, #12
316 ushr v19.4s, v3.4s, #7
317 shl v3.4s, v3.4s, #25
318 ext v4.16b, v4.16b, v4.16b, #4
319 orr v3.16b, v3.16b, v19.16b
320 ext v19.16b, v6.16b, v6.16b, #12
321 add v4.4s, v4.4s, v3.4s
322 ext v6.16b, v6.16b, v19.16b, #12
324 eor v7.16b, v7.16b, v4.16b
326 mov v19.s[1], v5.s[2]
327 tbl v7.16b, { v7.16b }, v0.16b
328 add v16.4s, v16.4s, v7.4s
329 eor v20.16b, v16.16b, v3.16b
330 trn2 v3.4s, v6.4s, v19.4s
331 ushr v6.4s, v20.4s, #12
332 shl v19.4s, v20.4s, #20
333 add v4.4s, v4.4s, v3.4s
334 orr v6.16b, v19.16b, v6.16b
335 zip1 v19.2d, v17.2d, v2.2d
336 zip2 v2.4s, v2.4s, v17.4s
337 add v4.4s, v4.4s, v6.4s
338 mov v19.s[3], v5.s[3]
339 zip1 v17.4s, v2.4s, v5.4s
340 zip1 v2.4s, v5.4s, v2.4s
341 eor v7.16b, v7.16b, v4.16b
342 ext v20.16b, v19.16b, v19.16b, #12
343 ext v4.16b, v4.16b, v4.16b, #12
344 ext v2.16b, v2.16b, v17.16b, #8
345 tbl v7.16b, { v7.16b }, v1.16b
346 add v16.4s, v16.4s, v7.4s
347 ext v7.16b, v7.16b, v7.16b, #8
348 eor v21.16b, v16.16b, v6.16b
349 uzp1 v6.4s, v19.4s, v20.4s
350 ext v16.16b, v16.16b, v16.16b, #4
351 ushr v19.4s, v21.4s, #7
352 shl v20.4s, v21.4s, #25
353 add v4.4s, v4.4s, v6.4s
354 orr v19.16b, v20.16b, v19.16b
355 add v4.4s, v4.4s, v19.4s
356 eor v7.16b, v7.16b, v4.16b
357 add v4.4s, v4.4s, v2.4s
358 tbl v7.16b, { v7.16b }, v0.16b
359 add v16.4s, v16.4s, v7.4s
360 eor v5.16b, v16.16b, v19.16b
361 ushr v17.4s, v5.4s, #12
362 shl v5.4s, v5.4s, #20
363 orr v5.16b, v5.16b, v17.16b
364 ext v17.16b, v18.16b, v18.16b, #4
365 add v4.4s, v4.4s, v5.4s
366 uzp1 v18.4s, v17.4s, v17.4s
367 eor v7.16b, v7.16b, v4.16b
368 ext v18.16b, v18.16b, v17.16b, #8
369 tbl v7.16b, { v7.16b }, v1.16b
370 uzp2 v18.4s, v18.4s, v3.4s
371 add v16.4s, v16.4s, v7.4s
372 add v4.4s, v4.4s, v18.4s
373 ext v7.16b, v7.16b, v7.16b, #8
374 eor v5.16b, v16.16b, v5.16b
375 ext v4.16b, v4.16b, v4.16b, #4
376 ext v16.16b, v16.16b, v16.16b, #12
377 ushr v19.4s, v5.4s, #7
378 shl v5.4s, v5.4s, #25
379 orr v5.16b, v5.16b, v19.16b
380 add v19.4s, v4.4s, v5.4s
381 eor v4.16b, v7.16b, v19.16b
382 ext v7.16b, v17.16b, v17.16b, #12
383 tbl v20.16b, { v4.16b }, v0.16b
384 ext v4.16b, v17.16b, v7.16b, #12
386 add v16.4s, v16.4s, v20.4s
389 eor v5.16b, v16.16b, v5.16b
390 trn2 v4.4s, v4.4s, v7.4s
391 ushr v7.4s, v5.4s, #12
392 shl v5.4s, v5.4s, #20
393 add v17.4s, v19.4s, v4.4s
394 zip1 v19.2d, v2.2d, v3.2d
395 zip2 v2.4s, v3.4s, v2.4s
396 orr v5.16b, v5.16b, v7.16b
397 mov v19.s[3], v6.s[3]
398 add v7.4s, v17.4s, v5.4s
399 eor v17.16b, v20.16b, v7.16b
400 ext v20.16b, v19.16b, v19.16b, #12
401 ext v7.16b, v7.16b, v7.16b, #12
402 tbl v17.16b, { v17.16b }, v1.16b
403 add v16.4s, v16.4s, v17.4s
404 ext v17.16b, v17.16b, v17.16b, #8
405 eor v21.16b, v16.16b, v5.16b
406 uzp1 v5.4s, v19.4s, v20.4s
407 ext v16.16b, v16.16b, v16.16b, #4
408 ushr v19.4s, v21.4s, #7
409 shl v20.4s, v21.4s, #25
410 add v7.4s, v7.4s, v5.4s
411 orr v19.16b, v20.16b, v19.16b
412 add v7.4s, v7.4s, v19.4s
413 eor v17.16b, v17.16b, v7.16b
414 tbl v17.16b, { v17.16b }, v0.16b
415 add v3.4s, v16.4s, v17.4s
416 zip1 v16.4s, v2.4s, v6.4s
417 zip1 v2.4s, v6.4s, v2.4s
418 eor v6.16b, v3.16b, v19.16b
419 ext v16.16b, v2.16b, v16.16b, #8
420 ushr v2.4s, v6.4s, #12
421 shl v6.4s, v6.4s, #20
422 add v7.4s, v7.4s, v16.4s
423 orr v2.16b, v6.16b, v2.16b
424 add v6.4s, v7.4s, v2.4s
425 ext v7.16b, v18.16b, v18.16b, #4
426 eor v17.16b, v17.16b, v6.16b
427 uzp1 v18.4s, v7.4s, v7.4s
428 tbl v17.16b, { v17.16b }, v1.16b
429 ext v18.16b, v18.16b, v7.16b, #8
430 add v3.4s, v3.4s, v17.4s
431 uzp2 v18.4s, v18.4s, v4.4s
432 eor v2.16b, v3.16b, v2.16b
433 add v6.4s, v6.4s, v18.4s
434 ext v3.16b, v3.16b, v3.16b, #12
435 ext v18.16b, v18.16b, v18.16b, #4
436 ushr v19.4s, v2.4s, #7
437 shl v2.4s, v2.4s, #25
438 ext v6.16b, v6.16b, v6.16b, #4
439 orr v19.16b, v2.16b, v19.16b
440 ext v2.16b, v17.16b, v17.16b, #8
441 ext v17.16b, v7.16b, v7.16b, #12
442 add v6.4s, v6.4s, v19.4s
443 eor v2.16b, v2.16b, v6.16b
444 tbl v20.16b, { v2.16b }, v0.16b
445 ext v2.16b, v7.16b, v17.16b, #12
447 add v17.4s, v3.4s, v20.4s
450 eor v19.16b, v17.16b, v19.16b
451 trn2 v3.4s, v3.4s, v7.4s
452 ushr v21.4s, v19.4s, #12
453 shl v19.4s, v19.4s, #20
454 add v6.4s, v6.4s, v3.4s
455 orr v19.16b, v19.16b, v21.16b
456 add v21.4s, v6.4s, v19.4s
457 eor v6.16b, v20.16b, v21.16b
458 zip1 v20.2d, v16.2d, v4.2d
459 zip2 v4.4s, v4.4s, v16.4s
460 tbl v22.16b, { v6.16b }, v1.16b
461 mov v20.s[3], v5.s[3]
462 add v17.4s, v17.4s, v22.4s
463 ext v6.16b, v20.16b, v20.16b, #12
464 eor v19.16b, v17.16b, v19.16b
465 uzp1 v6.4s, v20.4s, v6.4s
466 ext v20.16b, v21.16b, v21.16b, #12
467 ext v17.16b, v17.16b, v17.16b, #4
468 ushr v21.4s, v19.4s, #7
469 shl v19.4s, v19.4s, #25
470 add v20.4s, v20.4s, v6.4s
471 orr v19.16b, v19.16b, v21.16b
472 ext v21.16b, v22.16b, v22.16b, #8
473 add v20.4s, v20.4s, v19.4s
474 eor v21.16b, v21.16b, v20.16b
475 tbl v21.16b, { v21.16b }, v0.16b
476 add v16.4s, v17.4s, v21.4s
477 zip1 v17.4s, v4.4s, v5.4s
478 zip1 v4.4s, v5.4s, v4.4s
479 eor v5.16b, v16.16b, v19.16b
480 ext v4.16b, v4.16b, v17.16b, #8
481 ushr v17.4s, v5.4s, #12
482 shl v5.4s, v5.4s, #20
483 add v19.4s, v20.4s, v4.4s
484 ext v20.16b, v18.16b, v18.16b, #8
485 zip1 v3.2d, v4.2d, v3.2d
486 orr v5.16b, v5.16b, v17.16b
487 zip2 v2.4s, v2.4s, v4.4s
488 uzp2 v7.4s, v20.4s, v7.4s
490 add v17.4s, v19.4s, v5.4s
491 ext v7.16b, v7.16b, v20.16b, #4
492 eor v19.16b, v21.16b, v17.16b
493 ext v17.16b, v17.16b, v17.16b, #4
494 tbl v19.16b, { v19.16b }, v1.16b
495 add v7.4s, v17.4s, v7.4s
496 add v16.4s, v16.4s, v19.4s
497 ext v17.16b, v19.16b, v19.16b, #8
498 ext v19.16b, v18.16b, v18.16b, #12
499 eor v5.16b, v16.16b, v5.16b
500 ext v16.16b, v16.16b, v16.16b, #12
501 ext v18.16b, v18.16b, v19.16b, #12
503 ushr v20.4s, v5.4s, #7
504 shl v5.4s, v5.4s, #25
506 mov v19.s[1], v6.s[2]
507 orr v5.16b, v5.16b, v20.16b
508 trn2 v18.4s, v18.4s, v19.4s
509 add v7.4s, v5.4s, v7.4s
510 eor v17.16b, v17.16b, v7.16b
511 add v7.4s, v7.4s, v18.4s
512 ext v18.16b, v3.16b, v3.16b, #12
513 tbl v17.16b, { v17.16b }, v0.16b
514 uzp1 v3.4s, v3.4s, v18.4s
515 add v16.4s, v16.4s, v17.4s
516 eor v5.16b, v16.16b, v5.16b
517 ushr v19.4s, v5.4s, #12
518 shl v5.4s, v5.4s, #20
519 orr v5.16b, v5.16b, v19.16b
520 add v7.4s, v7.4s, v5.4s
521 eor v17.16b, v17.16b, v7.16b
522 ext v7.16b, v7.16b, v7.16b, #12
523 tbl v17.16b, { v17.16b }, v1.16b
524 add v3.4s, v7.4s, v3.4s
525 add v16.4s, v16.4s, v17.4s
526 ext v7.16b, v17.16b, v17.16b, #8
527 eor v5.16b, v16.16b, v5.16b
528 ext v16.16b, v16.16b, v16.16b, #4
529 ushr v18.4s, v5.4s, #7
530 shl v5.4s, v5.4s, #25
531 orr v5.16b, v5.16b, v18.16b
532 add v3.4s, v3.4s, v5.4s
533 eor v7.16b, v7.16b, v3.16b
534 tbl v0.16b, { v7.16b }, v0.16b
535 zip1 v7.4s, v2.4s, v6.4s
536 zip1 v2.4s, v6.4s, v2.4s
537 add v4.4s, v16.4s, v0.4s
538 ext v2.16b, v2.16b, v7.16b, #8
539 eor v5.16b, v4.16b, v5.16b
540 add v2.4s, v3.4s, v2.4s
541 ushr v6.4s, v5.4s, #12
542 shl v5.4s, v5.4s, #20
543 orr v3.16b, v5.16b, v6.16b
544 add v2.4s, v2.4s, v3.4s
545 eor v0.16b, v0.16b, v2.16b
546 ext v2.16b, v2.16b, v2.16b, #4
547 tbl v0.16b, { v0.16b }, v1.16b
548 add v1.4s, v4.4s, v0.4s
549 ext v0.16b, v0.16b, v0.16b, #8
550 eor v3.16b, v1.16b, v3.16b
551 ext v1.16b, v1.16b, v1.16b, #12
552 ushr v4.4s, v3.4s, #7
553 shl v3.4s, v3.4s, #25
554 stp q1, q0, [x0, #32]
555 orr v3.16b, v3.16b, v4.16b
559 .size compress_pre, .Lfunc_end1-compress_pre
562 .globl zfs_blake3_compress_xof_sse41
564 .type zfs_blake3_compress_xof_sse41,@function
565 zfs_blake3_compress_xof_sse41:
570 stp x29, x30, [sp, #64]
572 stp x20, x19, [sp, #80]
588 ldp q2, q3, [sp, #32]
589 eor v0.16b, v2.16b, v0.16b
590 eor v1.16b, v3.16b, v1.16b
591 ldp x29, x30, [sp, #64]
594 eor v0.16b, v0.16b, v2.16b
597 eor v0.16b, v0.16b, v3.16b
599 ldp x20, x19, [sp, #80]
604 .size zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41
607 .section .rodata.cst16,"aM",@progbits,16
654 .globl zfs_blake3_hash_many_sse41
656 .type zfs_blake3_hash_many_sse41,@function
657 zfs_blake3_hash_many_sse41:
660 stp d15, d14, [sp, #-144]!
661 stp d13, d12, [sp, #16]
662 stp d11, d10, [sp, #32]
663 stp d9, d8, [sp, #48]
664 stp x29, x27, [sp, #64]
665 stp x26, x25, [sp, #80]
666 stp x24, x23, [sp, #96]
667 stp x22, x21, [sp, #112]
668 stp x20, x19, [sp, #128]
670 .cfi_def_cfa_offset 512
683 .cfi_offset b10, -104
684 .cfi_offset b11, -112
685 .cfi_offset b12, -120
686 .cfi_offset b13, -128
687 .cfi_offset b14, -136
688 .cfi_offset b15, -144
699 movk w15, #27145, lsl #16
700 movk w16, #47975, lsl #16
701 ldr q0, [x12, :lo12:.LCPI3_0]
707 and v0.16b, v1.16b, v0.16b
708 ldr q1, [x11, :lo12:.LCPI3_1]
709 movk w13, #15470, lsl #16
710 movk w14, #42319, lsl #16
712 stp q0, q1, [sp, #16]
713 orr v0.4s, #128, lsl #24
716 stp q0, q14, [sp, #48]
719 zip1 v0.4s, v29.4s, v8.4s
721 zip1 v1.4s, v30.4s, v31.4s
723 zip1 v2.4s, v24.4s, v18.4s
725 zip1 v3.4s, v25.4s, v26.4s
727 zip2 v6.4s, v29.4s, v8.4s
729 zip1 v4.2d, v0.2d, v1.2d
731 zip2 v7.4s, v30.4s, v31.4s
732 zip1 v5.2d, v2.2d, v3.2d
733 zip2 v0.2d, v0.2d, v1.2d
734 zip2 v1.2d, v2.2d, v3.2d
735 zip2 v2.4s, v24.4s, v18.4s
736 zip2 v3.4s, v25.4s, v26.4s
738 zip2 v4.2d, v6.2d, v7.2d
739 stp q0, q1, [x8, #32]
740 zip1 v0.2d, v6.2d, v7.2d
741 zip1 v1.2d, v2.2d, v3.2d
742 zip2 v2.2d, v2.2d, v3.2d
743 stp q0, q1, [x8, #64]
744 stp q4, q2, [x8, #96]
753 ld1r { v29.4s }, [x15], #4
754 ld1r { v30.4s }, [x16]
756 ld1r { v31.4s }, [x17]
758 ld1r { v24.4s }, [x19]
759 ld1r { v18.4s }, [x20]
760 ld1r { v25.4s }, [x16]
761 ld1r { v8.4s }, [x15]
762 ld1r { v26.4s }, [x17]
768 ldp x19, x20, [x0, #16]
769 add v1.4s, v0.4s, v1.4s
771 movi v0.4s, #128, lsl #24
774 eor v0.16b, v1.16b, v0.16b
776 cmgt v0.4s, v1.4s, v0.4s
779 sub v0.4s, v1.4s, v0.4s
789 csel w27, w9, wzr, eq
795 zip1 v22.4s, v1.4s, v4.4s
796 zip2 v20.4s, v1.4s, v4.4s
798 zip1 v17.4s, v2.4s, v5.4s
799 zip2 v23.4s, v2.4s, v5.4s
801 zip1 v19.4s, v6.4s, v16.4s
802 zip2 v1.4s, v6.4s, v16.4s
803 ldp q27, q28, [x23, #32]
804 zip1 v4.4s, v7.4s, v21.4s
805 zip2 v5.4s, v7.4s, v21.4s
806 zip2 v15.2d, v17.2d, v4.2d
807 ldp q9, q10, [x24, #32]
808 mov v17.d[1], v4.d[0]
809 add v4.4s, v30.4s, v25.4s
810 zip2 v11.2d, v23.2d, v5.2d
811 zip2 v3.4s, v27.4s, v9.4s
812 zip1 v7.4s, v27.4s, v9.4s
813 ldp q12, q6, [x22, #32]
814 mov v23.d[1], v5.d[0]
815 stp q11, q3, [sp, #256]
816 add v5.4s, v31.4s, v26.4s
817 add v4.4s, v4.4s, v17.4s
819 ldp q16, q2, [x25, #32]
820 add v5.4s, v5.4s, v23.4s
821 zip1 v3.4s, v12.4s, v16.4s
822 eor v0.16b, v5.16b, v0.16b
823 zip1 v9.4s, v6.4s, v2.4s
824 zip2 v2.4s, v6.4s, v2.4s
825 stp q7, q3, [sp, #208]
826 zip2 v3.4s, v12.4s, v16.4s
827 zip1 v12.4s, v28.4s, v10.4s
828 zip2 v10.4s, v28.4s, v10.4s
829 stp q17, q2, [sp, #160]
830 zip2 v28.2d, v22.2d, v19.2d
831 mov v22.d[1], v19.d[0]
833 add v2.4s, v8.4s, v18.4s
834 eor v16.16b, v4.16b, v13.16b
837 stp q22, q28, [sp, #320]
838 zip2 v22.2d, v20.2d, v1.2d
839 mov v20.d[1], v1.d[0]
840 add v1.4s, v29.4s, v24.4s
841 add v4.4s, v4.4s, v15.4s
842 add v5.4s, v5.4s, v11.4s
843 add v2.4s, v2.4s, v20.4s
844 stp q15, q20, [sp, #288]
845 add v1.4s, v1.4s, v3.4s
850 eor v6.16b, v1.16b, v3.16b
852 add v1.4s, v1.4s, v28.4s
855 eor v7.16b, v2.16b, v3.16b
856 ldp q27, q3, [sp, #32]
857 add v2.4s, v2.4s, v22.4s
858 tbl v6.16b, { v6.16b }, v27.16b
859 tbl v7.16b, { v7.16b }, v27.16b
860 tbl v16.16b, { v16.16b }, v27.16b
861 tbl v0.16b, { v0.16b }, v27.16b
862 add v19.4s, v6.4s, v14.4s
863 add v21.4s, v7.4s, v3.4s
864 add v30.4s, v16.4s, v17.4s
865 add v31.4s, v0.4s, v20.4s
866 eor v24.16b, v19.16b, v24.16b
867 eor v17.16b, v21.16b, v18.16b
868 ushr v18.4s, v24.4s, #12
869 shl v20.4s, v24.4s, #20
870 eor v24.16b, v30.16b, v25.16b
871 eor v25.16b, v31.16b, v26.16b
872 ushr v26.4s, v17.4s, #12
873 shl v17.4s, v17.4s, #20
874 ushr v29.4s, v24.4s, #12
875 shl v24.4s, v24.4s, #20
876 ushr v8.4s, v25.4s, #12
877 shl v25.4s, v25.4s, #20
878 orr v3.16b, v20.16b, v18.16b
879 ldr q18, [x10, :lo12:.LCPI3_2]
880 orr v13.16b, v17.16b, v26.16b
881 orr v24.16b, v24.16b, v29.16b
882 orr v14.16b, v25.16b, v8.16b
883 add v8.4s, v1.4s, v3.4s
884 add v29.4s, v2.4s, v13.4s
885 add v17.4s, v4.4s, v24.4s
886 add v20.4s, v5.4s, v14.4s
887 eor v1.16b, v6.16b, v8.16b
888 eor v2.16b, v7.16b, v29.16b
889 eor v4.16b, v16.16b, v17.16b
890 eor v0.16b, v0.16b, v20.16b
891 tbl v25.16b, { v1.16b }, v18.16b
892 tbl v16.16b, { v2.16b }, v18.16b
893 tbl v6.16b, { v4.16b }, v18.16b
894 tbl v4.16b, { v0.16b }, v18.16b
895 add v19.4s, v19.4s, v25.4s
896 add v21.4s, v21.4s, v16.4s
897 add v26.4s, v30.4s, v6.4s
898 add v7.4s, v31.4s, v4.4s
899 eor v0.16b, v19.16b, v3.16b
900 eor v1.16b, v21.16b, v13.16b
901 eor v2.16b, v26.16b, v24.16b
902 eor v3.16b, v7.16b, v14.16b
903 ushr v5.4s, v0.4s, #7
904 shl v0.4s, v0.4s, #25
905 ushr v24.4s, v1.4s, #7
906 shl v1.4s, v1.4s, #25
907 ushr v30.4s, v2.4s, #7
908 shl v2.4s, v2.4s, #25
909 orr v5.16b, v0.16b, v5.16b
910 orr v0.16b, v1.16b, v24.16b
911 ushr v31.4s, v3.4s, #7
912 orr v2.16b, v2.16b, v30.16b
913 ldp q24, q30, [sp, #208]
914 shl v3.4s, v3.4s, #25
915 zip2 v14.2d, v12.2d, v9.2d
917 orr v1.16b, v3.16b, v31.16b
918 zip2 v3.2d, v24.2d, v30.2d
920 mov v22.d[1], v30.d[0]
923 stp q22, q14, [sp, #224]
924 mov v24.d[1], v30.d[0]
925 add v12.4s, v8.4s, v22.4s
926 mov v31.d[1], v9.d[0]
927 add v22.4s, v29.4s, v24.4s
929 zip2 v28.2d, v28.2d, v30.2d
931 mov v15.d[1], v29.d[0]
932 zip2 v8.2d, v10.2d, v29.2d
933 add v10.4s, v12.4s, v0.4s
934 add v22.4s, v22.4s, v2.4s
936 add v20.4s, v20.4s, v15.4s
937 add v17.4s, v17.4s, v31.4s
938 stp q3, q8, [sp, #192]
939 eor v4.16b, v4.16b, v10.16b
940 eor v25.16b, v25.16b, v22.16b
941 add v20.4s, v20.4s, v5.4s
942 add v17.4s, v17.4s, v1.4s
943 tbl v4.16b, { v4.16b }, v27.16b
944 tbl v25.16b, { v25.16b }, v27.16b
945 eor v6.16b, v6.16b, v20.16b
946 eor v16.16b, v16.16b, v17.16b
947 add v26.4s, v26.4s, v4.4s
948 add v7.4s, v7.4s, v25.4s
949 tbl v6.16b, { v6.16b }, v27.16b
950 tbl v16.16b, { v16.16b }, v27.16b
951 eor v0.16b, v26.16b, v0.16b
952 eor v2.16b, v7.16b, v2.16b
953 add v21.4s, v21.4s, v6.4s
954 add v19.4s, v19.4s, v16.4s
955 ushr v12.4s, v0.4s, #12
956 shl v0.4s, v0.4s, #20
957 ushr v13.4s, v2.4s, #12
958 shl v2.4s, v2.4s, #20
959 eor v5.16b, v21.16b, v5.16b
960 eor v1.16b, v19.16b, v1.16b
961 orr v0.16b, v0.16b, v12.16b
962 add v10.4s, v10.4s, v3.4s
963 orr v2.16b, v2.16b, v13.16b
964 ushr v13.4s, v5.4s, #12
965 shl v5.4s, v5.4s, #20
966 add v22.4s, v22.4s, v28.4s
967 ushr v12.4s, v1.4s, #12
968 shl v1.4s, v1.4s, #20
969 add v10.4s, v10.4s, v0.4s
970 orr v5.16b, v5.16b, v13.16b
971 add v22.4s, v22.4s, v2.4s
972 add v20.4s, v20.4s, v8.4s
973 orr v1.16b, v1.16b, v12.16b
974 add v17.4s, v17.4s, v14.4s
975 eor v4.16b, v4.16b, v10.16b
976 eor v25.16b, v25.16b, v22.16b
977 add v20.4s, v20.4s, v5.4s
978 add v17.4s, v17.4s, v1.4s
979 tbl v4.16b, { v4.16b }, v18.16b
980 tbl v25.16b, { v25.16b }, v18.16b
981 eor v6.16b, v6.16b, v20.16b
982 eor v16.16b, v16.16b, v17.16b
983 add v26.4s, v26.4s, v4.4s
984 add v7.4s, v7.4s, v25.4s
985 tbl v6.16b, { v6.16b }, v18.16b
986 tbl v16.16b, { v16.16b }, v18.16b
987 eor v0.16b, v26.16b, v0.16b
988 eor v2.16b, v7.16b, v2.16b
989 add v21.4s, v21.4s, v6.4s
990 add v19.4s, v19.4s, v16.4s
991 ushr v12.4s, v0.4s, #7
992 shl v0.4s, v0.4s, #25
993 ushr v13.4s, v2.4s, #7
994 shl v2.4s, v2.4s, #25
995 eor v5.16b, v21.16b, v5.16b
996 eor v1.16b, v19.16b, v1.16b
997 orr v0.16b, v0.16b, v12.16b
998 add v22.4s, v22.4s, v23.4s
999 orr v2.16b, v2.16b, v13.16b
1000 ushr v13.4s, v5.4s, #7
1001 shl v5.4s, v5.4s, #25
1002 add v17.4s, v17.4s, v11.4s
1003 mov v30.16b, v28.16b
1004 mov v28.16b, v23.16b
1006 ushr v12.4s, v1.4s, #7
1007 shl v1.4s, v1.4s, #25
1008 add v22.4s, v22.4s, v0.4s
1009 mov v29.16b, v31.16b
1011 orr v5.16b, v5.16b, v13.16b
1012 add v17.4s, v17.4s, v2.4s
1013 add v10.4s, v10.4s, v23.4s
1014 orr v1.16b, v1.16b, v12.16b
1016 eor v16.16b, v16.16b, v22.16b
1017 add v20.4s, v20.4s, v31.4s
1018 eor v6.16b, v6.16b, v17.16b
1019 add v10.4s, v10.4s, v5.4s
1020 tbl v16.16b, { v16.16b }, v27.16b
1021 add v20.4s, v20.4s, v1.4s
1022 tbl v6.16b, { v6.16b }, v27.16b
1023 eor v25.16b, v25.16b, v10.16b
1024 add v21.4s, v21.4s, v16.4s
1025 eor v4.16b, v4.16b, v20.16b
1026 add v26.4s, v26.4s, v6.4s
1027 tbl v25.16b, { v25.16b }, v27.16b
1028 eor v0.16b, v21.16b, v0.16b
1029 tbl v4.16b, { v4.16b }, v27.16b
1030 eor v2.16b, v26.16b, v2.16b
1031 add v19.4s, v19.4s, v25.4s
1032 ushr v12.4s, v0.4s, #12
1033 shl v0.4s, v0.4s, #20
1034 add v7.4s, v7.4s, v4.4s
1035 ushr v13.4s, v2.4s, #12
1036 shl v2.4s, v2.4s, #20
1037 eor v5.16b, v5.16b, v19.16b
1038 add v22.4s, v22.4s, v24.4s
1040 orr v0.16b, v0.16b, v12.16b
1041 eor v1.16b, v7.16b, v1.16b
1042 orr v2.16b, v2.16b, v13.16b
1043 ushr v12.4s, v5.4s, #12
1044 shl v5.4s, v5.4s, #20
1045 add v17.4s, v17.4s, v24.4s
1047 ushr v13.4s, v1.4s, #12
1048 shl v1.4s, v1.4s, #20
1049 add v22.4s, v22.4s, v0.4s
1050 orr v5.16b, v5.16b, v12.16b
1051 add v17.4s, v17.4s, v2.4s
1052 add v10.4s, v10.4s, v24.4s
1054 orr v1.16b, v1.16b, v13.16b
1055 eor v16.16b, v16.16b, v22.16b
1056 add v20.4s, v20.4s, v14.4s
1057 eor v6.16b, v6.16b, v17.16b
1058 add v10.4s, v10.4s, v5.4s
1059 tbl v16.16b, { v16.16b }, v18.16b
1060 add v20.4s, v20.4s, v1.4s
1061 tbl v6.16b, { v6.16b }, v18.16b
1062 eor v25.16b, v25.16b, v10.16b
1063 add v21.4s, v21.4s, v16.4s
1064 eor v4.16b, v4.16b, v20.16b
1065 add v26.4s, v26.4s, v6.4s
1066 tbl v25.16b, { v25.16b }, v18.16b
1067 eor v0.16b, v21.16b, v0.16b
1068 tbl v4.16b, { v4.16b }, v18.16b
1069 eor v2.16b, v26.16b, v2.16b
1070 add v19.4s, v19.4s, v25.4s
1071 ushr v12.4s, v0.4s, #7
1072 shl v0.4s, v0.4s, #25
1073 add v7.4s, v7.4s, v4.4s
1074 ushr v13.4s, v2.4s, #7
1075 shl v2.4s, v2.4s, #25
1076 eor v5.16b, v19.16b, v5.16b
1077 orr v0.16b, v0.16b, v12.16b
1078 eor v1.16b, v7.16b, v1.16b
1079 add v10.4s, v10.4s, v24.4s
1080 orr v2.16b, v2.16b, v13.16b
1081 ushr v12.4s, v5.4s, #7
1082 shl v5.4s, v5.4s, #25
1083 add v22.4s, v22.4s, v29.4s
1084 ushr v13.4s, v1.4s, #7
1085 shl v1.4s, v1.4s, #25
1086 add v10.4s, v10.4s, v0.4s
1087 orr v5.16b, v5.16b, v12.16b
1088 add v22.4s, v22.4s, v2.4s
1089 add v20.4s, v20.4s, v8.4s
1091 orr v1.16b, v1.16b, v13.16b
1092 add v17.4s, v17.4s, v3.4s
1094 eor v4.16b, v4.16b, v10.16b
1095 eor v25.16b, v25.16b, v22.16b
1096 add v20.4s, v20.4s, v5.4s
1097 add v17.4s, v17.4s, v1.4s
1098 tbl v4.16b, { v4.16b }, v27.16b
1099 tbl v25.16b, { v25.16b }, v27.16b
1100 eor v6.16b, v6.16b, v20.16b
1101 eor v16.16b, v16.16b, v17.16b
1102 add v26.4s, v26.4s, v4.4s
1103 add v7.4s, v7.4s, v25.4s
1104 tbl v6.16b, { v6.16b }, v27.16b
1105 tbl v16.16b, { v16.16b }, v27.16b
1106 eor v0.16b, v26.16b, v0.16b
1107 eor v2.16b, v7.16b, v2.16b
1108 add v21.4s, v21.4s, v6.4s
1109 add v19.4s, v19.4s, v16.4s
1110 ushr v12.4s, v0.4s, #12
1111 shl v0.4s, v0.4s, #20
1112 ushr v13.4s, v2.4s, #12
1113 shl v2.4s, v2.4s, #20
1114 eor v5.16b, v21.16b, v5.16b
1115 eor v1.16b, v19.16b, v1.16b
1116 orr v0.16b, v0.16b, v12.16b
1117 add v10.4s, v10.4s, v30.4s
1118 orr v2.16b, v2.16b, v13.16b
1119 ushr v13.4s, v5.4s, #12
1120 shl v5.4s, v5.4s, #20
1121 add v22.4s, v22.4s, v8.4s
1122 mov v24.16b, v30.16b
1123 mov v30.16b, v15.16b
1124 add v17.4s, v17.4s, v15.4s
1126 ushr v12.4s, v1.4s, #12
1127 shl v1.4s, v1.4s, #20
1128 add v10.4s, v10.4s, v0.4s
1130 orr v5.16b, v5.16b, v13.16b
1131 add v22.4s, v22.4s, v2.4s
1132 add v20.4s, v20.4s, v15.4s
1133 orr v1.16b, v1.16b, v12.16b
1134 eor v4.16b, v4.16b, v10.16b
1135 eor v25.16b, v25.16b, v22.16b
1136 add v20.4s, v20.4s, v5.4s
1137 add v17.4s, v17.4s, v1.4s
1138 tbl v4.16b, { v4.16b }, v18.16b
1139 tbl v25.16b, { v25.16b }, v18.16b
1140 eor v6.16b, v6.16b, v20.16b
1141 eor v16.16b, v16.16b, v17.16b
1142 add v26.4s, v26.4s, v4.4s
1143 add v7.4s, v7.4s, v25.4s
1144 tbl v6.16b, { v6.16b }, v18.16b
1145 tbl v16.16b, { v16.16b }, v18.16b
1146 eor v0.16b, v26.16b, v0.16b
1147 eor v2.16b, v7.16b, v2.16b
1148 add v21.4s, v21.4s, v6.4s
1149 add v19.4s, v19.4s, v16.4s
1150 ushr v12.4s, v0.4s, #7
1151 shl v0.4s, v0.4s, #25
1152 ushr v13.4s, v2.4s, #7
1153 shl v2.4s, v2.4s, #25
1154 eor v5.16b, v21.16b, v5.16b
1155 eor v1.16b, v19.16b, v1.16b
1156 orr v0.16b, v0.16b, v12.16b
1157 add v22.4s, v22.4s, v9.4s
1158 orr v2.16b, v2.16b, v13.16b
1159 ushr v13.4s, v5.4s, #7
1160 shl v5.4s, v5.4s, #25
1161 add v17.4s, v17.4s, v14.4s
1162 ushr v12.4s, v1.4s, #7
1163 shl v1.4s, v1.4s, #25
1164 add v22.4s, v22.4s, v0.4s
1165 orr v5.16b, v5.16b, v13.16b
1166 add v17.4s, v17.4s, v2.4s
1167 add v10.4s, v10.4s, v28.4s
1168 orr v1.16b, v1.16b, v12.16b
1169 eor v16.16b, v16.16b, v22.16b
1170 add v20.4s, v20.4s, v11.4s
1171 eor v6.16b, v6.16b, v17.16b
1172 add v10.4s, v10.4s, v5.4s
1173 tbl v16.16b, { v16.16b }, v27.16b
1174 add v20.4s, v20.4s, v1.4s
1175 tbl v6.16b, { v6.16b }, v27.16b
1176 eor v25.16b, v25.16b, v10.16b
1177 add v21.4s, v21.4s, v16.4s
1178 eor v4.16b, v4.16b, v20.16b
1179 add v26.4s, v26.4s, v6.4s
1180 tbl v25.16b, { v25.16b }, v27.16b
1181 eor v0.16b, v21.16b, v0.16b
1182 tbl v4.16b, { v4.16b }, v27.16b
1183 eor v2.16b, v26.16b, v2.16b
1184 add v19.4s, v19.4s, v25.4s
1185 ushr v12.4s, v0.4s, #12
1186 shl v0.4s, v0.4s, #20
1187 add v7.4s, v7.4s, v4.4s
1188 ushr v13.4s, v2.4s, #12
1189 shl v2.4s, v2.4s, #20
1190 eor v5.16b, v5.16b, v19.16b
1191 orr v0.16b, v0.16b, v12.16b
1192 eor v1.16b, v7.16b, v1.16b
1193 add v22.4s, v22.4s, v29.4s
1194 orr v2.16b, v2.16b, v13.16b
1195 ushr v12.4s, v5.4s, #12
1196 shl v5.4s, v5.4s, #20
1197 add v17.4s, v17.4s, v23.4s
1198 ushr v13.4s, v1.4s, #12
1199 shl v1.4s, v1.4s, #20
1200 add v22.4s, v22.4s, v0.4s
1201 orr v5.16b, v5.16b, v12.16b
1202 add v17.4s, v17.4s, v2.4s
1203 add v10.4s, v10.4s, v31.4s
1204 orr v1.16b, v1.16b, v13.16b
1205 eor v16.16b, v16.16b, v22.16b
1206 add v20.4s, v20.4s, v30.4s
1207 eor v6.16b, v6.16b, v17.16b
1208 add v10.4s, v10.4s, v5.4s
1209 tbl v16.16b, { v16.16b }, v18.16b
1210 add v20.4s, v20.4s, v1.4s
1211 tbl v6.16b, { v6.16b }, v18.16b
1212 eor v25.16b, v25.16b, v10.16b
1213 add v21.4s, v21.4s, v16.4s
1214 eor v4.16b, v4.16b, v20.16b
1215 add v26.4s, v26.4s, v6.4s
1216 tbl v25.16b, { v25.16b }, v18.16b
1217 eor v0.16b, v21.16b, v0.16b
1218 tbl v4.16b, { v4.16b }, v18.16b
1219 eor v2.16b, v26.16b, v2.16b
1220 add v19.4s, v19.4s, v25.4s
1221 ushr v12.4s, v0.4s, #7
1222 shl v0.4s, v0.4s, #25
1223 add v7.4s, v7.4s, v4.4s
1224 ushr v13.4s, v2.4s, #7
1225 shl v2.4s, v2.4s, #25
1226 eor v5.16b, v19.16b, v5.16b
1227 add v10.4s, v10.4s, v3.4s
1229 orr v0.16b, v0.16b, v12.16b
1230 eor v1.16b, v7.16b, v1.16b
1231 orr v2.16b, v2.16b, v13.16b
1232 ushr v12.4s, v5.4s, #7
1233 shl v5.4s, v5.4s, #25
1234 add v22.4s, v22.4s, v3.4s
1235 ushr v13.4s, v1.4s, #7
1236 shl v1.4s, v1.4s, #25
1237 add v10.4s, v10.4s, v0.4s
1238 orr v5.16b, v5.16b, v12.16b
1239 add v22.4s, v22.4s, v2.4s
1240 add v20.4s, v20.4s, v15.4s
1242 orr v1.16b, v1.16b, v13.16b
1243 add v17.4s, v17.4s, v24.4s
1244 eor v4.16b, v4.16b, v10.16b
1245 eor v25.16b, v25.16b, v22.16b
1246 add v20.4s, v20.4s, v5.4s
1247 add v17.4s, v17.4s, v1.4s
1248 tbl v4.16b, { v4.16b }, v27.16b
1249 tbl v25.16b, { v25.16b }, v27.16b
1250 eor v6.16b, v6.16b, v20.16b
1251 eor v16.16b, v16.16b, v17.16b
1252 add v26.4s, v26.4s, v4.4s
1253 add v7.4s, v7.4s, v25.4s
1254 tbl v6.16b, { v6.16b }, v27.16b
1255 tbl v16.16b, { v16.16b }, v27.16b
1256 eor v0.16b, v26.16b, v0.16b
1257 eor v2.16b, v7.16b, v2.16b
1258 add v21.4s, v21.4s, v6.4s
1259 add v19.4s, v19.4s, v16.4s
1260 ushr v12.4s, v0.4s, #12
1261 shl v0.4s, v0.4s, #20
1262 ushr v13.4s, v2.4s, #12
1263 shl v2.4s, v2.4s, #20
1264 eor v5.16b, v21.16b, v5.16b
1265 ldp q23, q11, [sp, #320]
1266 eor v1.16b, v19.16b, v1.16b
1267 orr v0.16b, v0.16b, v12.16b
1268 add v10.4s, v10.4s, v8.4s
1269 orr v2.16b, v2.16b, v13.16b
1270 ushr v13.4s, v5.4s, #12
1271 shl v5.4s, v5.4s, #20
1272 add v22.4s, v22.4s, v23.4s
1273 ushr v12.4s, v1.4s, #12
1274 shl v1.4s, v1.4s, #20
1275 add v10.4s, v10.4s, v0.4s
1276 mov v28.16b, v31.16b
1279 orr v5.16b, v5.16b, v13.16b
1280 add v22.4s, v22.4s, v2.4s
1281 add v20.4s, v20.4s, v11.4s
1282 orr v1.16b, v1.16b, v12.16b
1283 add v17.4s, v17.4s, v8.4s
1284 eor v4.16b, v4.16b, v10.16b
1285 eor v25.16b, v25.16b, v22.16b
1286 add v20.4s, v20.4s, v5.4s
1287 add v17.4s, v17.4s, v1.4s
1288 tbl v4.16b, { v4.16b }, v18.16b
1289 tbl v25.16b, { v25.16b }, v18.16b
1290 eor v6.16b, v6.16b, v20.16b
1291 eor v16.16b, v16.16b, v17.16b
1292 add v26.4s, v26.4s, v4.4s
1293 add v7.4s, v7.4s, v25.4s
1294 tbl v6.16b, { v6.16b }, v18.16b
1295 tbl v16.16b, { v16.16b }, v18.16b
1296 eor v0.16b, v26.16b, v0.16b
1297 eor v2.16b, v7.16b, v2.16b
1298 add v21.4s, v21.4s, v6.4s
1299 add v19.4s, v19.4s, v16.4s
1300 ushr v12.4s, v0.4s, #7
1301 shl v0.4s, v0.4s, #25
1302 ushr v13.4s, v2.4s, #7
1303 shl v2.4s, v2.4s, #25
1304 eor v5.16b, v21.16b, v5.16b
1305 eor v1.16b, v19.16b, v1.16b
1306 orr v0.16b, v0.16b, v12.16b
1307 add v22.4s, v22.4s, v29.4s
1308 orr v2.16b, v2.16b, v13.16b
1309 ushr v13.4s, v5.4s, #7
1310 shl v5.4s, v5.4s, #25
1311 add v17.4s, v17.4s, v30.4s
1312 ushr v12.4s, v1.4s, #7
1313 shl v1.4s, v1.4s, #25
1314 add v22.4s, v22.4s, v0.4s
1315 orr v5.16b, v5.16b, v13.16b
1316 add v17.4s, v17.4s, v2.4s
1317 add v10.4s, v10.4s, v9.4s
1318 orr v1.16b, v1.16b, v12.16b
1319 eor v16.16b, v16.16b, v22.16b
1320 add v20.4s, v20.4s, v14.4s
1322 eor v6.16b, v6.16b, v17.16b
1323 add v10.4s, v10.4s, v5.4s
1324 tbl v16.16b, { v16.16b }, v27.16b
1325 add v20.4s, v20.4s, v1.4s
1326 tbl v6.16b, { v6.16b }, v27.16b
1327 eor v25.16b, v25.16b, v10.16b
1328 add v21.4s, v21.4s, v16.4s
1329 eor v4.16b, v4.16b, v20.16b
1330 add v26.4s, v26.4s, v6.4s
1331 tbl v25.16b, { v25.16b }, v27.16b
1332 eor v0.16b, v21.16b, v0.16b
1333 tbl v4.16b, { v4.16b }, v27.16b
1334 eor v2.16b, v26.16b, v2.16b
1335 add v19.4s, v19.4s, v25.4s
1336 ushr v12.4s, v0.4s, #12
1337 shl v0.4s, v0.4s, #20
1338 add v7.4s, v7.4s, v4.4s
1339 ushr v13.4s, v2.4s, #12
1340 shl v2.4s, v2.4s, #20
1341 eor v5.16b, v5.16b, v19.16b
1342 orr v0.16b, v0.16b, v12.16b
1343 eor v1.16b, v7.16b, v1.16b
1344 add v22.4s, v22.4s, v3.4s
1345 orr v2.16b, v2.16b, v13.16b
1346 ushr v12.4s, v5.4s, #12
1347 shl v5.4s, v5.4s, #20
1348 add v17.4s, v17.4s, v15.4s
1349 ushr v13.4s, v1.4s, #12
1350 shl v1.4s, v1.4s, #20
1351 add v22.4s, v22.4s, v0.4s
1352 orr v5.16b, v5.16b, v12.16b
1353 add v17.4s, v17.4s, v2.4s
1354 add v10.4s, v10.4s, v14.4s
1355 orr v1.16b, v1.16b, v13.16b
1356 eor v16.16b, v16.16b, v22.16b
1357 add v20.4s, v20.4s, v8.4s
1358 eor v6.16b, v6.16b, v17.16b
1359 add v10.4s, v10.4s, v5.4s
1360 tbl v16.16b, { v16.16b }, v18.16b
1361 add v20.4s, v20.4s, v1.4s
1362 tbl v6.16b, { v6.16b }, v18.16b
1363 eor v25.16b, v25.16b, v10.16b
1364 add v21.4s, v21.4s, v16.4s
1365 eor v4.16b, v4.16b, v20.16b
1366 add v26.4s, v26.4s, v6.4s
1367 tbl v25.16b, { v25.16b }, v18.16b
1368 eor v0.16b, v21.16b, v0.16b
1369 tbl v4.16b, { v4.16b }, v18.16b
1370 eor v2.16b, v26.16b, v2.16b
1371 add v19.4s, v19.4s, v25.4s
1372 ushr v12.4s, v0.4s, #7
1373 shl v0.4s, v0.4s, #25
1374 add v7.4s, v7.4s, v4.4s
1375 ushr v13.4s, v2.4s, #7
1376 shl v2.4s, v2.4s, #25
1377 eor v5.16b, v19.16b, v5.16b
1378 orr v0.16b, v0.16b, v12.16b
1379 eor v1.16b, v7.16b, v1.16b
1380 add v10.4s, v10.4s, v28.4s
1381 orr v2.16b, v2.16b, v13.16b
1382 ushr v12.4s, v5.4s, #7
1383 shl v5.4s, v5.4s, #25
1384 add v22.4s, v22.4s, v24.4s
1385 ushr v13.4s, v1.4s, #7
1386 shl v1.4s, v1.4s, #25
1387 add v10.4s, v10.4s, v0.4s
1388 orr v5.16b, v5.16b, v12.16b
1389 add v22.4s, v22.4s, v2.4s
1390 add v20.4s, v20.4s, v11.4s
1392 orr v1.16b, v1.16b, v13.16b
1393 add v17.4s, v17.4s, v31.4s
1395 eor v4.16b, v4.16b, v10.16b
1396 eor v25.16b, v25.16b, v22.16b
1397 add v20.4s, v20.4s, v5.4s
1398 add v17.4s, v17.4s, v1.4s
1399 tbl v4.16b, { v4.16b }, v27.16b
1400 tbl v25.16b, { v25.16b }, v27.16b
1401 eor v6.16b, v6.16b, v20.16b
1402 eor v16.16b, v16.16b, v17.16b
1403 add v26.4s, v26.4s, v4.4s
1404 add v7.4s, v7.4s, v25.4s
1405 tbl v6.16b, { v6.16b }, v27.16b
1406 tbl v16.16b, { v16.16b }, v27.16b
1407 eor v0.16b, v26.16b, v0.16b
1408 eor v2.16b, v7.16b, v2.16b
1409 add v21.4s, v21.4s, v6.4s
1410 add v19.4s, v19.4s, v16.4s
1411 ushr v12.4s, v0.4s, #12
1412 shl v0.4s, v0.4s, #20
1413 ushr v13.4s, v2.4s, #12
1414 shl v2.4s, v2.4s, #20
1415 eor v5.16b, v21.16b, v5.16b
1416 eor v1.16b, v19.16b, v1.16b
1417 orr v0.16b, v0.16b, v12.16b
1418 add v10.4s, v10.4s, v23.4s
1420 orr v2.16b, v2.16b, v13.16b
1421 ushr v13.4s, v5.4s, #12
1422 shl v5.4s, v5.4s, #20
1423 add v22.4s, v22.4s, v11.4s
1427 ushr v12.4s, v1.4s, #12
1428 shl v1.4s, v1.4s, #20
1429 add v10.4s, v10.4s, v0.4s
1430 orr v5.16b, v5.16b, v13.16b
1432 add v22.4s, v22.4s, v2.4s
1433 add v20.4s, v20.4s, v24.4s
1434 orr v1.16b, v1.16b, v12.16b
1435 add v17.4s, v17.4s, v31.4s
1436 eor v4.16b, v4.16b, v10.16b
1437 eor v25.16b, v25.16b, v22.16b
1438 add v20.4s, v20.4s, v5.4s
1439 add v17.4s, v17.4s, v1.4s
1440 tbl v4.16b, { v4.16b }, v18.16b
1441 tbl v25.16b, { v25.16b }, v18.16b
1442 eor v6.16b, v6.16b, v20.16b
1443 eor v16.16b, v16.16b, v17.16b
1444 add v26.4s, v26.4s, v4.4s
1445 add v7.4s, v7.4s, v25.4s
1446 tbl v6.16b, { v6.16b }, v18.16b
1447 tbl v16.16b, { v16.16b }, v18.16b
1448 eor v0.16b, v26.16b, v0.16b
1449 eor v2.16b, v7.16b, v2.16b
1450 add v21.4s, v21.4s, v6.4s
1452 add v19.4s, v19.4s, v16.4s
1453 ushr v12.4s, v0.4s, #7
1454 shl v0.4s, v0.4s, #25
1455 ushr v13.4s, v2.4s, #7
1456 shl v2.4s, v2.4s, #25
1457 eor v5.16b, v21.16b, v5.16b
1458 eor v1.16b, v19.16b, v1.16b
1459 orr v0.16b, v0.16b, v12.16b
1460 add v22.4s, v22.4s, v29.4s
1461 orr v2.16b, v2.16b, v13.16b
1462 ushr v13.4s, v5.4s, #7
1463 shl v5.4s, v5.4s, #25
1464 add v17.4s, v17.4s, v30.4s
1466 ushr v12.4s, v1.4s, #7
1467 shl v1.4s, v1.4s, #25
1468 add v22.4s, v22.4s, v0.4s
1471 orr v5.16b, v5.16b, v13.16b
1472 add v17.4s, v17.4s, v2.4s
1473 add v10.4s, v10.4s, v30.4s
1474 orr v1.16b, v1.16b, v12.16b
1475 eor v16.16b, v16.16b, v22.16b
1476 add v20.4s, v20.4s, v28.4s
1477 eor v6.16b, v6.16b, v17.16b
1478 add v10.4s, v10.4s, v5.4s
1479 tbl v16.16b, { v16.16b }, v27.16b
1480 add v20.4s, v20.4s, v1.4s
1481 tbl v6.16b, { v6.16b }, v27.16b
1482 eor v25.16b, v25.16b, v10.16b
1483 add v21.4s, v21.4s, v16.4s
1484 eor v4.16b, v4.16b, v20.16b
1485 add v26.4s, v26.4s, v6.4s
1486 tbl v25.16b, { v25.16b }, v27.16b
1487 eor v0.16b, v21.16b, v0.16b
1488 tbl v4.16b, { v4.16b }, v27.16b
1489 eor v2.16b, v26.16b, v2.16b
1490 add v19.4s, v19.4s, v25.4s
1491 ushr v12.4s, v0.4s, #12
1492 shl v0.4s, v0.4s, #20
1493 add v7.4s, v7.4s, v4.4s
1494 ushr v13.4s, v2.4s, #12
1495 shl v2.4s, v2.4s, #20
1496 eor v5.16b, v5.16b, v19.16b
1497 orr v0.16b, v0.16b, v12.16b
1498 eor v1.16b, v7.16b, v1.16b
1499 add v22.4s, v22.4s, v8.4s
1500 orr v2.16b, v2.16b, v13.16b
1501 ushr v12.4s, v5.4s, #12
1502 shl v5.4s, v5.4s, #20
1503 add v17.4s, v17.4s, v9.4s
1505 ushr v13.4s, v1.4s, #12
1506 shl v1.4s, v1.4s, #20
1507 add v22.4s, v22.4s, v0.4s
1508 orr v5.16b, v5.16b, v12.16b
1509 add v17.4s, v17.4s, v2.4s
1510 add v10.4s, v10.4s, v23.4s
1511 orr v1.16b, v1.16b, v13.16b
1512 eor v16.16b, v16.16b, v22.16b
1513 add v20.4s, v20.4s, v31.4s
1514 eor v6.16b, v6.16b, v17.16b
1515 add v10.4s, v10.4s, v5.4s
1516 tbl v16.16b, { v16.16b }, v18.16b
1517 add v20.4s, v20.4s, v1.4s
1518 tbl v6.16b, { v6.16b }, v18.16b
1519 eor v25.16b, v25.16b, v10.16b
1520 add v21.4s, v21.4s, v16.4s
1521 eor v4.16b, v4.16b, v20.16b
1522 add v26.4s, v26.4s, v6.4s
1523 tbl v25.16b, { v25.16b }, v18.16b
1524 eor v0.16b, v21.16b, v0.16b
1525 tbl v4.16b, { v4.16b }, v18.16b
1526 eor v2.16b, v26.16b, v2.16b
1527 add v19.4s, v19.4s, v25.4s
1528 ushr v12.4s, v0.4s, #7
1529 shl v0.4s, v0.4s, #25
1530 add v7.4s, v7.4s, v4.4s
1531 ushr v13.4s, v2.4s, #7
1532 shl v2.4s, v2.4s, #25
1533 eor v5.16b, v19.16b, v5.16b
1534 add v10.4s, v10.4s, v14.4s
1536 orr v0.16b, v0.16b, v12.16b
1537 eor v1.16b, v7.16b, v1.16b
1538 orr v2.16b, v2.16b, v13.16b
1539 ushr v12.4s, v5.4s, #7
1540 shl v5.4s, v5.4s, #25
1541 add v22.4s, v22.4s, v14.4s
1542 ushr v13.4s, v1.4s, #7
1543 shl v1.4s, v1.4s, #25
1544 add v10.4s, v10.4s, v0.4s
1545 orr v5.16b, v5.16b, v12.16b
1546 add v22.4s, v22.4s, v2.4s
1547 add v20.4s, v20.4s, v24.4s
1548 orr v1.16b, v1.16b, v13.16b
1549 eor v4.16b, v4.16b, v10.16b
1550 add v17.4s, v17.4s, v9.4s
1551 eor v25.16b, v25.16b, v22.16b
1552 add v20.4s, v20.4s, v5.4s
1553 tbl v4.16b, { v4.16b }, v27.16b
1554 add v17.4s, v17.4s, v1.4s
1555 tbl v25.16b, { v25.16b }, v27.16b
1556 eor v6.16b, v6.16b, v20.16b
1557 add v26.4s, v26.4s, v4.4s
1558 eor v16.16b, v16.16b, v17.16b
1559 add v7.4s, v7.4s, v25.4s
1560 tbl v6.16b, { v6.16b }, v27.16b
1561 eor v0.16b, v26.16b, v0.16b
1562 tbl v16.16b, { v16.16b }, v27.16b
1563 eor v2.16b, v7.16b, v2.16b
1564 add v21.4s, v21.4s, v6.4s
1565 ushr v12.4s, v0.4s, #12
1566 shl v0.4s, v0.4s, #20
1567 add v19.4s, v19.4s, v16.4s
1568 ushr v13.4s, v2.4s, #12
1569 shl v2.4s, v2.4s, #20
1570 eor v5.16b, v21.16b, v5.16b
1571 orr v0.16b, v0.16b, v12.16b
1572 eor v1.16b, v19.16b, v1.16b
1573 add v10.4s, v10.4s, v11.4s
1574 orr v2.16b, v2.16b, v13.16b
1575 ushr v13.4s, v5.4s, #12
1576 shl v5.4s, v5.4s, #20
1577 ushr v12.4s, v1.4s, #12
1578 shl v1.4s, v1.4s, #20
1579 add v10.4s, v10.4s, v0.4s
1580 add v22.4s, v22.4s, v15.4s
1581 orr v5.16b, v5.16b, v13.16b
1582 add v20.4s, v20.4s, v3.4s
1585 orr v1.16b, v1.16b, v12.16b
1586 eor v4.16b, v4.16b, v10.16b
1587 add v22.4s, v22.4s, v2.4s
1588 add v17.4s, v17.4s, v3.4s
1589 add v20.4s, v20.4s, v5.4s
1590 tbl v4.16b, { v4.16b }, v18.16b
1591 eor v25.16b, v25.16b, v22.16b
1592 add v17.4s, v17.4s, v1.4s
1593 eor v6.16b, v6.16b, v20.16b
1594 add v26.4s, v26.4s, v4.4s
1595 tbl v25.16b, { v25.16b }, v18.16b
1596 eor v16.16b, v16.16b, v17.16b
1597 tbl v6.16b, { v6.16b }, v18.16b
1598 eor v0.16b, v26.16b, v0.16b
1599 add v7.4s, v7.4s, v25.4s
1600 tbl v16.16b, { v16.16b }, v18.16b
1601 add v21.4s, v21.4s, v6.4s
1602 ushr v12.4s, v0.4s, #7
1603 shl v0.4s, v0.4s, #25
1604 eor v2.16b, v7.16b, v2.16b
1605 add v19.4s, v19.4s, v16.4s
1606 eor v5.16b, v21.16b, v5.16b
1607 orr v0.16b, v0.16b, v12.16b
1608 ushr v12.4s, v2.4s, #7
1609 shl v2.4s, v2.4s, #25
1610 eor v1.16b, v19.16b, v1.16b
1611 ushr v13.4s, v5.4s, #7
1612 shl v5.4s, v5.4s, #25
1613 add v22.4s, v22.4s, v8.4s
1614 orr v2.16b, v2.16b, v12.16b
1615 ushr v12.4s, v1.4s, #7
1616 shl v1.4s, v1.4s, #25
1617 orr v5.16b, v5.16b, v13.16b
1618 add v22.4s, v22.4s, v0.4s
1619 add v10.4s, v10.4s, v29.4s
1621 add v17.4s, v17.4s, v31.4s
1622 orr v1.16b, v1.16b, v12.16b
1623 add v20.4s, v20.4s, v29.4s
1624 eor v16.16b, v16.16b, v22.16b
1625 add v10.4s, v10.4s, v5.4s
1626 add v17.4s, v17.4s, v2.4s
1627 add v20.4s, v20.4s, v1.4s
1628 tbl v16.16b, { v16.16b }, v27.16b
1629 eor v25.16b, v25.16b, v10.16b
1630 eor v6.16b, v6.16b, v17.16b
1631 eor v4.16b, v4.16b, v20.16b
1632 add v21.4s, v21.4s, v16.4s
1633 tbl v25.16b, { v25.16b }, v27.16b
1634 tbl v6.16b, { v6.16b }, v27.16b
1635 tbl v4.16b, { v4.16b }, v27.16b
1636 eor v0.16b, v21.16b, v0.16b
1637 add v19.4s, v19.4s, v25.4s
1638 add v26.4s, v26.4s, v6.4s
1639 add v7.4s, v7.4s, v4.4s
1640 ushr v12.4s, v0.4s, #12
1641 shl v0.4s, v0.4s, #20
1642 eor v5.16b, v5.16b, v19.16b
1643 eor v2.16b, v26.16b, v2.16b
1644 eor v1.16b, v7.16b, v1.16b
1645 orr v0.16b, v0.16b, v12.16b
1646 ushr v12.4s, v5.4s, #12
1647 shl v5.4s, v5.4s, #20
1648 add v22.4s, v22.4s, v14.4s
1650 ushr v13.4s, v2.4s, #12
1651 shl v2.4s, v2.4s, #20
1652 mov v31.16b, v14.16b
1653 ushr v14.4s, v1.4s, #12
1654 shl v1.4s, v1.4s, #20
1655 orr v5.16b, v5.16b, v12.16b
1656 add v22.4s, v22.4s, v0.4s
1657 add v10.4s, v10.4s, v28.4s
1659 orr v2.16b, v2.16b, v13.16b
1660 orr v1.16b, v1.16b, v14.16b
1661 add v17.4s, v17.4s, v30.4s
1662 add v20.4s, v20.4s, v3.4s
1663 eor v16.16b, v16.16b, v22.16b
1664 add v10.4s, v10.4s, v5.4s
1665 add v17.4s, v17.4s, v2.4s
1666 add v20.4s, v20.4s, v1.4s
1667 tbl v16.16b, { v16.16b }, v18.16b
1668 eor v25.16b, v25.16b, v10.16b
1669 eor v6.16b, v6.16b, v17.16b
1670 eor v4.16b, v4.16b, v20.16b
1671 add v21.4s, v21.4s, v16.4s
1672 tbl v25.16b, { v25.16b }, v18.16b
1673 tbl v6.16b, { v6.16b }, v18.16b
1674 tbl v4.16b, { v4.16b }, v18.16b
1675 eor v0.16b, v21.16b, v0.16b
1676 add v19.4s, v19.4s, v25.4s
1677 add v26.4s, v26.4s, v6.4s
1678 add v7.4s, v7.4s, v4.4s
1679 ushr v12.4s, v0.4s, #7
1680 shl v0.4s, v0.4s, #25
1681 eor v5.16b, v19.16b, v5.16b
1682 eor v2.16b, v26.16b, v2.16b
1683 eor v1.16b, v7.16b, v1.16b
1684 orr v0.16b, v0.16b, v12.16b
1685 ushr v12.4s, v5.4s, #7
1686 shl v5.4s, v5.4s, #25
1687 add v10.4s, v10.4s, v23.4s
1688 ushr v13.4s, v2.4s, #7
1689 shl v2.4s, v2.4s, #25
1690 ushr v14.4s, v1.4s, #7
1691 shl v1.4s, v1.4s, #25
1692 orr v5.16b, v5.16b, v12.16b
1693 add v10.4s, v10.4s, v0.4s
1694 add v20.4s, v20.4s, v24.4s
1696 orr v2.16b, v2.16b, v13.16b
1697 orr v1.16b, v1.16b, v14.16b
1698 add v22.4s, v22.4s, v9.4s
1699 add v17.4s, v17.4s, v11.4s
1700 eor v4.16b, v4.16b, v10.16b
1701 add v20.4s, v20.4s, v5.4s
1702 add v22.4s, v22.4s, v2.4s
1703 add v17.4s, v17.4s, v1.4s
1704 tbl v4.16b, { v4.16b }, v27.16b
1705 eor v6.16b, v6.16b, v20.16b
1706 eor v25.16b, v25.16b, v22.16b
1707 eor v16.16b, v16.16b, v17.16b
1708 add v26.4s, v26.4s, v4.4s
1709 tbl v6.16b, { v6.16b }, v27.16b
1710 tbl v25.16b, { v25.16b }, v27.16b
1711 tbl v16.16b, { v16.16b }, v27.16b
1712 eor v0.16b, v26.16b, v0.16b
1713 add v21.4s, v21.4s, v6.4s
1714 add v7.4s, v7.4s, v25.4s
1715 add v19.4s, v19.4s, v16.4s
1716 ushr v12.4s, v0.4s, #12
1717 shl v0.4s, v0.4s, #20
1718 eor v5.16b, v21.16b, v5.16b
1719 eor v2.16b, v7.16b, v2.16b
1720 eor v1.16b, v19.16b, v1.16b
1721 orr v0.16b, v0.16b, v12.16b
1722 add v10.4s, v10.4s, v15.4s
1723 ushr v14.4s, v5.4s, #12
1724 shl v5.4s, v5.4s, #20
1727 ushr v12.4s, v2.4s, #12
1728 shl v2.4s, v2.4s, #20
1729 ushr v13.4s, v1.4s, #12
1730 shl v1.4s, v1.4s, #20
1731 add v10.4s, v10.4s, v0.4s
1732 orr v5.16b, v5.16b, v14.16b
1733 add v20.4s, v20.4s, v3.4s
1734 orr v2.16b, v2.16b, v12.16b
1735 orr v1.16b, v1.16b, v13.16b
1736 add v22.4s, v22.4s, v24.4s
1737 add v17.4s, v17.4s, v28.4s
1738 eor v4.16b, v4.16b, v10.16b
1739 add v20.4s, v20.4s, v5.4s
1740 add v22.4s, v22.4s, v2.4s
1741 add v17.4s, v17.4s, v1.4s
1742 tbl v4.16b, { v4.16b }, v18.16b
1743 eor v6.16b, v6.16b, v20.16b
1744 eor v25.16b, v25.16b, v22.16b
1745 eor v16.16b, v16.16b, v17.16b
1746 add v26.4s, v26.4s, v4.4s
1747 tbl v6.16b, { v6.16b }, v18.16b
1748 tbl v25.16b, { v25.16b }, v18.16b
1749 tbl v16.16b, { v16.16b }, v18.16b
1750 eor v0.16b, v26.16b, v0.16b
1751 add v21.4s, v21.4s, v6.4s
1752 add v7.4s, v7.4s, v25.4s
1753 add v19.4s, v19.4s, v16.4s
1754 ushr v12.4s, v0.4s, #7
1755 shl v0.4s, v0.4s, #25
1756 eor v5.16b, v21.16b, v5.16b
1757 eor v2.16b, v7.16b, v2.16b
1758 eor v1.16b, v19.16b, v1.16b
1759 orr v0.16b, v0.16b, v12.16b
1760 ushr v12.4s, v5.4s, #7
1761 shl v5.4s, v5.4s, #25
1764 ushr v13.4s, v2.4s, #7
1765 shl v2.4s, v2.4s, #25
1766 ushr v14.4s, v1.4s, #7
1767 shl v1.4s, v1.4s, #25
1768 orr v5.16b, v5.16b, v12.16b
1769 add v9.4s, v10.4s, v9.4s
1770 orr v2.16b, v2.16b, v13.16b
1771 orr v1.16b, v1.16b, v14.16b
1773 add v22.4s, v22.4s, v31.4s
1774 add v17.4s, v17.4s, v30.4s
1775 add v20.4s, v20.4s, v8.4s
1776 add v9.4s, v9.4s, v5.4s
1777 add v22.4s, v22.4s, v0.4s
1778 add v17.4s, v17.4s, v2.4s
1779 add v20.4s, v20.4s, v1.4s
1780 eor v25.16b, v25.16b, v9.16b
1781 eor v16.16b, v16.16b, v22.16b
1782 eor v6.16b, v6.16b, v17.16b
1783 eor v4.16b, v4.16b, v20.16b
1784 tbl v25.16b, { v25.16b }, v27.16b
1785 tbl v16.16b, { v16.16b }, v27.16b
1786 tbl v6.16b, { v6.16b }, v27.16b
1787 tbl v4.16b, { v4.16b }, v27.16b
1788 add v19.4s, v19.4s, v25.4s
1789 add v21.4s, v21.4s, v16.4s
1790 add v26.4s, v26.4s, v6.4s
1791 add v7.4s, v7.4s, v4.4s
1792 eor v5.16b, v5.16b, v19.16b
1793 eor v0.16b, v21.16b, v0.16b
1794 eor v2.16b, v26.16b, v2.16b
1795 eor v1.16b, v7.16b, v1.16b
1796 ushr v30.4s, v5.4s, #12
1797 shl v5.4s, v5.4s, #20
1798 ushr v10.4s, v0.4s, #12
1799 shl v0.4s, v0.4s, #20
1800 ushr v12.4s, v2.4s, #12
1801 shl v2.4s, v2.4s, #20
1802 ushr v13.4s, v1.4s, #12
1803 shl v1.4s, v1.4s, #20
1804 orr v5.16b, v5.16b, v30.16b
1805 add v30.4s, v9.4s, v29.4s
1806 add v22.4s, v22.4s, v23.4s
1808 orr v0.16b, v0.16b, v10.16b
1809 orr v2.16b, v2.16b, v12.16b
1810 orr v1.16b, v1.16b, v13.16b
1811 add v17.4s, v17.4s, v23.4s
1812 add v20.4s, v20.4s, v28.4s
1813 add v23.4s, v30.4s, v5.4s
1814 add v22.4s, v22.4s, v0.4s
1815 add v17.4s, v17.4s, v2.4s
1816 add v20.4s, v20.4s, v1.4s
1817 eor v25.16b, v25.16b, v23.16b
1818 eor v16.16b, v16.16b, v22.16b
1819 eor v6.16b, v6.16b, v17.16b
1820 eor v4.16b, v4.16b, v20.16b
1821 tbl v25.16b, { v25.16b }, v18.16b
1822 tbl v16.16b, { v16.16b }, v18.16b
1823 tbl v6.16b, { v6.16b }, v18.16b
1824 tbl v4.16b, { v4.16b }, v18.16b
1825 add v19.4s, v19.4s, v25.4s
1826 add v21.4s, v21.4s, v16.4s
1827 add v26.4s, v26.4s, v6.4s
1828 add v7.4s, v7.4s, v4.4s
1829 eor v5.16b, v19.16b, v5.16b
1830 eor v0.16b, v21.16b, v0.16b
1831 eor v2.16b, v26.16b, v2.16b
1832 eor v1.16b, v7.16b, v1.16b
1833 ushr v28.4s, v5.4s, #7
1834 shl v5.4s, v5.4s, #25
1835 ushr v30.4s, v0.4s, #7
1836 shl v0.4s, v0.4s, #25
1837 ushr v31.4s, v2.4s, #7
1838 shl v2.4s, v2.4s, #25
1839 ushr v8.4s, v1.4s, #7
1840 shl v1.4s, v1.4s, #25
1841 orr v5.16b, v5.16b, v28.16b
1843 orr v0.16b, v0.16b, v30.16b
1844 orr v2.16b, v2.16b, v31.16b
1845 orr v1.16b, v1.16b, v8.16b
1846 add v23.4s, v23.4s, v28.4s
1847 add v22.4s, v22.4s, v11.4s
1848 add v17.4s, v17.4s, v15.4s
1849 add v20.4s, v20.4s, v3.4s
1851 add v23.4s, v23.4s, v0.4s
1852 add v22.4s, v22.4s, v2.4s
1853 add v17.4s, v17.4s, v1.4s
1854 add v20.4s, v20.4s, v5.4s
1855 eor v4.16b, v4.16b, v23.16b
1856 eor v25.16b, v25.16b, v22.16b
1857 eor v16.16b, v16.16b, v17.16b
1858 eor v6.16b, v6.16b, v20.16b
1859 tbl v4.16b, { v4.16b }, v27.16b
1860 tbl v25.16b, { v25.16b }, v27.16b
1861 tbl v16.16b, { v16.16b }, v27.16b
1862 tbl v6.16b, { v6.16b }, v27.16b
1863 add v26.4s, v26.4s, v4.4s
1864 add v7.4s, v7.4s, v25.4s
1865 add v19.4s, v19.4s, v16.4s
1866 add v21.4s, v21.4s, v6.4s
1867 eor v0.16b, v26.16b, v0.16b
1868 eor v2.16b, v7.16b, v2.16b
1869 eor v1.16b, v19.16b, v1.16b
1870 eor v5.16b, v21.16b, v5.16b
1871 add v3.4s, v22.4s, v3.4s
1873 ushr v28.4s, v0.4s, #12
1874 shl v0.4s, v0.4s, #20
1875 ushr v29.4s, v2.4s, #12
1876 shl v2.4s, v2.4s, #20
1877 ushr v30.4s, v1.4s, #12
1878 shl v1.4s, v1.4s, #20
1879 ushr v31.4s, v5.4s, #12
1880 shl v5.4s, v5.4s, #20
1881 add v17.4s, v17.4s, v22.4s
1883 orr v0.16b, v0.16b, v28.16b
1884 prfm pldl1keep, [x23, #256]
1885 orr v2.16b, v2.16b, v29.16b
1886 prfm pldl1keep, [x24, #256]
1887 orr v1.16b, v1.16b, v30.16b
1888 prfm pldl1keep, [x22, #256]
1889 orr v5.16b, v5.16b, v31.16b
1890 prfm pldl1keep, [x25, #256]
1891 add v23.4s, v23.4s, v24.4s
1892 add v20.4s, v20.4s, v22.4s
1893 add v3.4s, v3.4s, v2.4s
1894 add v17.4s, v17.4s, v1.4s
1895 add v22.4s, v23.4s, v0.4s
1896 add v20.4s, v20.4s, v5.4s
1897 eor v23.16b, v25.16b, v3.16b
1898 eor v16.16b, v16.16b, v17.16b
1899 eor v4.16b, v4.16b, v22.16b
1900 eor v6.16b, v6.16b, v20.16b
1901 tbl v23.16b, { v23.16b }, v18.16b
1902 tbl v16.16b, { v16.16b }, v18.16b
1903 tbl v4.16b, { v4.16b }, v18.16b
1904 tbl v6.16b, { v6.16b }, v18.16b
1905 add v7.4s, v7.4s, v23.4s
1906 add v19.4s, v19.4s, v16.4s
1907 add v18.4s, v26.4s, v4.4s
1908 add v21.4s, v21.4s, v6.4s
1909 eor v2.16b, v7.16b, v2.16b
1910 eor v1.16b, v19.16b, v1.16b
1911 eor v0.16b, v18.16b, v0.16b
1912 eor v5.16b, v21.16b, v5.16b
1913 ushr v25.4s, v2.4s, #7
1914 shl v2.4s, v2.4s, #25
1915 ushr v24.4s, v0.4s, #7
1916 shl v0.4s, v0.4s, #25
1917 ushr v26.4s, v1.4s, #7
1918 shl v1.4s, v1.4s, #25
1919 ushr v27.4s, v5.4s, #7
1920 shl v5.4s, v5.4s, #25
1921 orr v0.16b, v0.16b, v24.16b
1922 orr v2.16b, v2.16b, v25.16b
1923 orr v1.16b, v1.16b, v26.16b
1924 orr v5.16b, v5.16b, v27.16b
1926 eor v29.16b, v19.16b, v22.16b
1927 eor v8.16b, v21.16b, v3.16b
1928 eor v30.16b, v17.16b, v18.16b
1929 eor v31.16b, v20.16b, v7.16b
1930 eor v24.16b, v5.16b, v23.16b
1931 eor v18.16b, v0.16b, v16.16b
1932 eor v25.16b, v2.16b, v6.16b
1933 eor v26.16b, v1.16b, v4.16b
1939 ldr q0, [x11, :lo12:.LCPI3_1]
1941 ldr q2, [x10, :lo12:.LCPI3_2]
1942 ldr q1, [x12, :lo12:.LCPI3_3]
1960 add v5.4s, v5.4s, v4.4s
1964 uzp1 v17.4s, v6.4s, v7.4s
1966 add v5.4s, v5.4s, v17.4s
1967 eor v16.16b, v5.16b, v16.16b
1968 tbl v16.16b, { v16.16b }, v0.16b
1969 add v18.4s, v16.4s, v1.4s
1970 eor v19.16b, v18.16b, v4.16b
1971 uzp2 v4.4s, v6.4s, v7.4s
1972 ushr v6.4s, v19.4s, #12
1973 shl v7.4s, v19.4s, #20
1974 ld2 { v19.4s, v20.4s }, [x14]
1975 add v5.4s, v5.4s, v4.4s
1977 orr v6.16b, v7.16b, v6.16b
1978 add v5.4s, v5.4s, v6.4s
1979 eor v7.16b, v16.16b, v5.16b
1980 add v5.4s, v5.4s, v19.4s
1981 tbl v7.16b, { v7.16b }, v2.16b
1982 ext v5.16b, v5.16b, v5.16b, #12
1983 add v16.4s, v18.4s, v7.4s
1984 ext v7.16b, v7.16b, v7.16b, #8
1985 eor v6.16b, v6.16b, v16.16b
1986 ext v16.16b, v16.16b, v16.16b, #4
1987 ushr v18.4s, v6.4s, #7
1988 shl v6.4s, v6.4s, #25
1989 orr v6.16b, v6.16b, v18.16b
1990 ext v18.16b, v20.16b, v20.16b, #12
1991 add v5.4s, v5.4s, v6.4s
1992 eor v7.16b, v5.16b, v7.16b
1993 add v5.4s, v5.4s, v18.4s
1994 tbl v7.16b, { v7.16b }, v0.16b
1995 add v16.4s, v16.4s, v7.4s
1996 eor v6.16b, v6.16b, v16.16b
1997 ushr v21.4s, v6.4s, #12
1998 shl v6.4s, v6.4s, #20
1999 orr v6.16b, v6.16b, v21.16b
2000 uzp1 v21.4s, v17.4s, v17.4s
2001 add v5.4s, v5.4s, v6.4s
2002 ext v21.16b, v21.16b, v17.16b, #8
2003 eor v7.16b, v7.16b, v5.16b
2004 uzp2 v21.4s, v21.4s, v4.4s
2005 tbl v7.16b, { v7.16b }, v2.16b
2006 add v5.4s, v5.4s, v21.4s
2007 add v16.4s, v16.4s, v7.4s
2008 ext v5.16b, v5.16b, v5.16b, #4
2009 ext v7.16b, v7.16b, v7.16b, #8
2010 eor v6.16b, v6.16b, v16.16b
2011 ushr v22.4s, v6.4s, #7
2012 shl v6.4s, v6.4s, #25
2013 orr v6.16b, v6.16b, v22.16b
2014 add v22.4s, v5.4s, v6.4s
2015 eor v5.16b, v22.16b, v7.16b
2016 ext v7.16b, v16.16b, v16.16b, #12
2017 tbl v16.16b, { v5.16b }, v0.16b
2018 ext v5.16b, v17.16b, v17.16b, #12
2019 add v7.4s, v7.4s, v16.4s
2020 ext v5.16b, v17.16b, v5.16b, #12
2021 ext v17.16b, v19.16b, v19.16b, #12
2022 mov v19.16b, v18.16b
2023 eor v6.16b, v6.16b, v7.16b
2025 mov v19.s[1], v17.s[2]
2026 ushr v20.4s, v6.4s, #12
2027 shl v6.4s, v6.4s, #20
2028 trn2 v5.4s, v5.4s, v19.4s
2029 orr v6.16b, v6.16b, v20.16b
2030 zip1 v20.2d, v18.2d, v4.2d
2031 zip2 v4.4s, v4.4s, v18.4s
2032 add v19.4s, v6.4s, v5.4s
2033 mov v20.s[3], v17.s[3]
2034 add v19.4s, v19.4s, v22.4s
2035 ext v22.16b, v20.16b, v20.16b, #12
2036 eor v16.16b, v16.16b, v19.16b
2037 ext v19.16b, v19.16b, v19.16b, #12
2038 tbl v16.16b, { v16.16b }, v2.16b
2039 add v7.4s, v7.4s, v16.4s
2040 ext v16.16b, v16.16b, v16.16b, #8
2041 eor v6.16b, v6.16b, v7.16b
2042 ext v7.16b, v7.16b, v7.16b, #4
2043 ushr v23.4s, v6.4s, #7
2044 shl v24.4s, v6.4s, #25
2045 uzp1 v6.4s, v20.4s, v22.4s
2046 orr v20.16b, v24.16b, v23.16b
2047 add v22.4s, v20.4s, v6.4s
2048 add v19.4s, v22.4s, v19.4s
2049 eor v16.16b, v19.16b, v16.16b
2050 tbl v16.16b, { v16.16b }, v0.16b
2051 add v7.4s, v7.4s, v16.4s
2052 eor v18.16b, v20.16b, v7.16b
2053 zip1 v20.4s, v4.4s, v17.4s
2054 zip1 v4.4s, v17.4s, v4.4s
2055 ushr v17.4s, v18.4s, #12
2056 shl v18.4s, v18.4s, #20
2057 ext v20.16b, v4.16b, v20.16b, #8
2058 orr v4.16b, v18.16b, v17.16b
2059 ext v18.16b, v21.16b, v21.16b, #4
2060 add v17.4s, v4.4s, v20.4s
2061 add v17.4s, v17.4s, v19.4s
2062 uzp1 v19.4s, v18.4s, v18.4s
2063 eor v16.16b, v16.16b, v17.16b
2064 ext v19.16b, v19.16b, v18.16b, #8
2065 tbl v16.16b, { v16.16b }, v2.16b
2066 uzp2 v19.4s, v19.4s, v5.4s
2067 add v7.4s, v7.4s, v16.4s
2068 add v17.4s, v17.4s, v19.4s
2069 ext v16.16b, v16.16b, v16.16b, #8
2070 eor v4.16b, v4.16b, v7.16b
2071 ext v17.16b, v17.16b, v17.16b, #4
2072 ext v7.16b, v7.16b, v7.16b, #12
2073 ushr v21.4s, v4.4s, #7
2074 shl v4.4s, v4.4s, #25
2075 orr v4.16b, v4.16b, v21.16b
2076 ext v21.16b, v18.16b, v18.16b, #12
2077 add v17.4s, v17.4s, v4.4s
2078 ext v18.16b, v18.16b, v21.16b, #12
2079 mov v21.16b, v20.16b
2080 eor v16.16b, v17.16b, v16.16b
2081 rev64 v18.4s, v18.4s
2082 mov v21.s[1], v6.s[2]
2083 tbl v16.16b, { v16.16b }, v0.16b
2084 add v7.4s, v7.4s, v16.4s
2085 eor v4.16b, v4.16b, v7.16b
2086 ushr v22.4s, v4.4s, #12
2087 shl v23.4s, v4.4s, #20
2088 trn2 v4.4s, v18.4s, v21.4s
2089 orr v18.16b, v23.16b, v22.16b
2090 add v21.4s, v18.4s, v4.4s
2091 add v17.4s, v21.4s, v17.4s
2092 zip1 v21.2d, v20.2d, v5.2d
2093 zip2 v5.4s, v5.4s, v20.4s
2094 eor v16.16b, v16.16b, v17.16b
2095 mov v21.s[3], v6.s[3]
2096 ext v17.16b, v17.16b, v17.16b, #12
2097 zip1 v20.4s, v5.4s, v6.4s
2098 tbl v16.16b, { v16.16b }, v2.16b
2099 zip1 v5.4s, v6.4s, v5.4s
2100 add v22.4s, v7.4s, v16.4s
2101 ext v16.16b, v16.16b, v16.16b, #8
2102 ext v20.16b, v5.16b, v20.16b, #8
2103 eor v7.16b, v18.16b, v22.16b
2104 ext v18.16b, v21.16b, v21.16b, #12
2105 ushr v23.4s, v7.4s, #7
2106 shl v24.4s, v7.4s, #25
2107 uzp1 v7.4s, v21.4s, v18.4s
2108 orr v18.16b, v24.16b, v23.16b
2109 add v21.4s, v18.4s, v7.4s
2110 add v17.4s, v21.4s, v17.4s
2111 ext v21.16b, v22.16b, v22.16b, #4
2112 eor v16.16b, v17.16b, v16.16b
2113 tbl v16.16b, { v16.16b }, v0.16b
2114 add v21.4s, v21.4s, v16.4s
2115 eor v18.16b, v18.16b, v21.16b
2116 ushr v6.4s, v18.4s, #12
2117 shl v18.4s, v18.4s, #20
2118 orr v5.16b, v18.16b, v6.16b
2119 add v6.4s, v5.4s, v20.4s
2120 add v6.4s, v6.4s, v17.4s
2121 ext v17.16b, v19.16b, v19.16b, #4
2122 eor v16.16b, v16.16b, v6.16b
2123 uzp1 v18.4s, v17.4s, v17.4s
2124 tbl v16.16b, { v16.16b }, v2.16b
2125 ext v18.16b, v18.16b, v17.16b, #8
2126 add v19.4s, v21.4s, v16.4s
2127 uzp2 v18.4s, v18.4s, v4.4s
2128 ext v16.16b, v16.16b, v16.16b, #8
2129 eor v5.16b, v5.16b, v19.16b
2130 add v6.4s, v6.4s, v18.4s
2131 ext v19.16b, v19.16b, v19.16b, #12
2132 ushr v21.4s, v5.4s, #7
2133 shl v5.4s, v5.4s, #25
2134 ext v6.16b, v6.16b, v6.16b, #4
2135 orr v5.16b, v5.16b, v21.16b
2136 ext v21.16b, v17.16b, v17.16b, #12
2137 add v6.4s, v6.4s, v5.4s
2138 ext v17.16b, v17.16b, v21.16b, #12
2139 mov v21.16b, v20.16b
2140 eor v16.16b, v6.16b, v16.16b
2141 rev64 v17.4s, v17.4s
2142 mov v21.s[1], v7.s[2]
2143 tbl v16.16b, { v16.16b }, v0.16b
2144 add v19.4s, v19.4s, v16.4s
2145 eor v5.16b, v5.16b, v19.16b
2146 ushr v22.4s, v5.4s, #12
2147 shl v23.4s, v5.4s, #20
2148 trn2 v5.4s, v17.4s, v21.4s
2149 orr v17.16b, v23.16b, v22.16b
2150 add v21.4s, v17.4s, v5.4s
2151 add v6.4s, v21.4s, v6.4s
2152 eor v16.16b, v16.16b, v6.16b
2153 ext v6.16b, v6.16b, v6.16b, #12
2154 tbl v21.16b, { v16.16b }, v2.16b
2155 zip1 v16.2d, v20.2d, v4.2d
2156 zip2 v4.4s, v4.4s, v20.4s
2157 add v19.4s, v19.4s, v21.4s
2158 mov v16.s[3], v7.s[3]
2159 ext v21.16b, v21.16b, v21.16b, #8
2160 zip1 v20.4s, v4.4s, v7.4s
2161 eor v17.16b, v17.16b, v19.16b
2162 ext v22.16b, v16.16b, v16.16b, #12
2163 ext v19.16b, v19.16b, v19.16b, #4
2164 zip1 v4.4s, v7.4s, v4.4s
2165 ushr v23.4s, v17.4s, #7
2166 shl v17.4s, v17.4s, #25
2167 uzp1 v16.4s, v16.4s, v22.4s
2168 ext v4.16b, v4.16b, v20.16b, #8
2169 orr v17.16b, v17.16b, v23.16b
2170 add v22.4s, v17.4s, v16.4s
2171 add v6.4s, v22.4s, v6.4s
2172 eor v21.16b, v6.16b, v21.16b
2173 tbl v21.16b, { v21.16b }, v0.16b
2174 add v19.4s, v19.4s, v21.4s
2175 eor v17.16b, v17.16b, v19.16b
2176 ushr v7.4s, v17.4s, #12
2177 shl v17.4s, v17.4s, #20
2178 orr v7.16b, v17.16b, v7.16b
2179 add v17.4s, v7.4s, v4.4s
2180 add v6.4s, v17.4s, v6.4s
2181 ext v17.16b, v18.16b, v18.16b, #4
2182 eor v18.16b, v21.16b, v6.16b
2183 uzp1 v20.4s, v17.4s, v17.4s
2184 tbl v18.16b, { v18.16b }, v2.16b
2185 ext v20.16b, v20.16b, v17.16b, #8
2186 add v19.4s, v19.4s, v18.4s
2187 uzp2 v20.4s, v20.4s, v5.4s
2188 ext v18.16b, v18.16b, v18.16b, #8
2189 eor v7.16b, v7.16b, v19.16b
2190 add v6.4s, v6.4s, v20.4s
2191 ushr v21.4s, v7.4s, #7
2192 shl v7.4s, v7.4s, #25
2193 ext v6.16b, v6.16b, v6.16b, #4
2194 orr v7.16b, v7.16b, v21.16b
2195 add v21.4s, v6.4s, v7.4s
2196 eor v6.16b, v21.16b, v18.16b
2197 ext v18.16b, v19.16b, v19.16b, #12
2198 tbl v19.16b, { v6.16b }, v0.16b
2199 ext v6.16b, v17.16b, v17.16b, #12
2200 add v18.4s, v18.4s, v19.4s
2201 ext v6.16b, v17.16b, v6.16b, #12
2203 eor v7.16b, v7.16b, v18.16b
2205 mov v17.s[1], v16.s[2]
2206 ushr v22.4s, v7.4s, #12
2207 shl v7.4s, v7.4s, #20
2208 trn2 v6.4s, v6.4s, v17.4s
2209 orr v7.16b, v7.16b, v22.16b
2210 add v17.4s, v7.4s, v6.4s
2211 add v17.4s, v17.4s, v21.4s
2212 zip1 v21.2d, v4.2d, v5.2d
2213 zip2 v4.4s, v5.4s, v4.4s
2214 eor v19.16b, v19.16b, v17.16b
2215 mov v21.s[3], v16.s[3]
2216 ext v17.16b, v17.16b, v17.16b, #12
2217 tbl v19.16b, { v19.16b }, v2.16b
2218 ext v22.16b, v21.16b, v21.16b, #12
2219 add v18.4s, v18.4s, v19.4s
2220 ext v19.16b, v19.16b, v19.16b, #8
2221 eor v7.16b, v7.16b, v18.16b
2222 ext v18.16b, v18.16b, v18.16b, #4
2223 ushr v23.4s, v7.4s, #7
2224 shl v24.4s, v7.4s, #25
2225 uzp1 v7.4s, v21.4s, v22.4s
2226 orr v21.16b, v24.16b, v23.16b
2227 add v22.4s, v21.4s, v7.4s
2228 add v17.4s, v22.4s, v17.4s
2229 eor v19.16b, v17.16b, v19.16b
2230 tbl v19.16b, { v19.16b }, v0.16b
2231 add v18.4s, v18.4s, v19.4s
2232 eor v5.16b, v21.16b, v18.16b
2233 zip1 v21.4s, v4.4s, v16.4s
2234 zip1 v4.4s, v16.4s, v4.4s
2235 ushr v16.4s, v5.4s, #12
2236 shl v5.4s, v5.4s, #20
2237 ext v21.16b, v4.16b, v21.16b, #8
2238 orr v4.16b, v5.16b, v16.16b
2239 ext v16.16b, v20.16b, v20.16b, #4
2240 mov v23.16b, v21.16b
2241 add v5.4s, v4.4s, v21.4s
2242 mov v23.s[1], v7.s[2]
2243 add v5.4s, v5.4s, v17.4s
2244 eor v17.16b, v19.16b, v5.16b
2245 uzp1 v19.4s, v16.4s, v16.4s
2246 tbl v17.16b, { v17.16b }, v2.16b
2247 ext v19.16b, v19.16b, v16.16b, #8
2248 add v18.4s, v18.4s, v17.4s
2249 uzp2 v19.4s, v19.4s, v6.4s
2250 eor v4.16b, v4.16b, v18.16b
2251 add v5.4s, v5.4s, v19.4s
2252 ext v19.16b, v19.16b, v19.16b, #4
2253 ushr v20.4s, v4.4s, #7
2254 shl v4.4s, v4.4s, #25
2255 ext v5.16b, v5.16b, v5.16b, #4
2256 orr v20.16b, v4.16b, v20.16b
2257 ext v4.16b, v17.16b, v17.16b, #8
2258 add v17.4s, v5.4s, v20.4s
2259 ext v5.16b, v18.16b, v18.16b, #12
2260 eor v4.16b, v17.16b, v4.16b
2261 tbl v18.16b, { v4.16b }, v0.16b
2262 ext v4.16b, v16.16b, v16.16b, #12
2263 add v22.4s, v5.4s, v18.4s
2264 ext v4.16b, v16.16b, v4.16b, #12
2265 eor v5.16b, v20.16b, v22.16b
2267 ushr v20.4s, v5.4s, #12
2268 shl v24.4s, v5.4s, #20
2269 trn2 v5.4s, v16.4s, v23.4s
2270 orr v16.16b, v24.16b, v20.16b
2271 add v20.4s, v16.4s, v5.4s
2272 add v17.4s, v20.4s, v17.4s
2273 zip1 v20.2d, v21.2d, v6.2d
2274 zip2 v6.4s, v6.4s, v21.4s
2275 eor v18.16b, v18.16b, v17.16b
2276 mov v20.s[3], v7.s[3]
2277 ext v17.16b, v17.16b, v17.16b, #12
2278 zip1 v21.4s, v6.4s, v7.4s
2279 tbl v18.16b, { v18.16b }, v2.16b
2280 ext v24.16b, v20.16b, v20.16b, #12
2281 zip1 v6.4s, v7.4s, v6.4s
2282 add v22.4s, v22.4s, v18.4s
2283 ext v18.16b, v18.16b, v18.16b, #8
2284 ext v6.16b, v6.16b, v21.16b, #8
2285 eor v16.16b, v16.16b, v22.16b
2286 ext v22.16b, v22.16b, v22.16b, #4
2287 zip1 v5.2d, v6.2d, v5.2d
2288 zip2 v4.4s, v4.4s, v6.4s
2289 ushr v25.4s, v16.4s, #7
2290 shl v26.4s, v16.4s, #25
2291 uzp1 v16.4s, v20.4s, v24.4s
2292 orr v20.16b, v26.16b, v25.16b
2293 mov v5.s[3], v16.s[3]
2294 add v24.4s, v20.4s, v16.4s
2295 add v17.4s, v24.4s, v17.4s
2296 eor v18.16b, v17.16b, v18.16b
2297 tbl v18.16b, { v18.16b }, v0.16b
2298 add v22.4s, v22.4s, v18.4s
2299 eor v20.16b, v20.16b, v22.16b
2300 ushr v7.4s, v20.4s, #12
2301 shl v20.4s, v20.4s, #20
2302 orr v7.16b, v20.16b, v7.16b
2303 add v20.4s, v7.4s, v6.4s
2304 add v17.4s, v20.4s, v17.4s
2305 ext v20.16b, v19.16b, v19.16b, #8
2306 eor v18.16b, v18.16b, v17.16b
2307 ext v17.16b, v17.16b, v17.16b, #4
2308 tbl v18.16b, { v18.16b }, v2.16b
2309 add v21.4s, v22.4s, v18.4s
2310 uzp2 v22.4s, v20.4s, v23.4s
2311 ext v18.16b, v18.16b, v18.16b, #8
2312 eor v7.16b, v7.16b, v21.16b
2313 ext v20.16b, v22.16b, v20.16b, #4
2314 ushr v22.4s, v7.4s, #7
2315 shl v7.4s, v7.4s, #25
2316 add v17.4s, v17.4s, v20.4s
2317 ext v20.16b, v21.16b, v21.16b, #12
2318 ext v21.16b, v19.16b, v19.16b, #12
2319 orr v7.16b, v7.16b, v22.16b
2320 ext v19.16b, v19.16b, v21.16b, #12
2321 add v17.4s, v17.4s, v7.4s
2323 rev64 v19.4s, v19.4s
2324 eor v18.16b, v17.16b, v18.16b
2325 mov v21.s[1], v16.s[2]
2326 tbl v18.16b, { v18.16b }, v0.16b
2327 trn2 v19.4s, v19.4s, v21.4s
2328 add v20.4s, v20.4s, v18.4s
2329 eor v7.16b, v7.16b, v20.16b
2330 ushr v22.4s, v7.4s, #12
2331 shl v7.4s, v7.4s, #20
2332 orr v7.16b, v7.16b, v22.16b
2333 add v19.4s, v7.4s, v19.4s
2334 add v17.4s, v19.4s, v17.4s
2335 eor v18.16b, v18.16b, v17.16b
2336 ext v17.16b, v17.16b, v17.16b, #12
2337 tbl v18.16b, { v18.16b }, v2.16b
2338 add v19.4s, v20.4s, v18.4s
2339 ext v20.16b, v5.16b, v5.16b, #12
2340 ext v18.16b, v18.16b, v18.16b, #8
2341 eor v7.16b, v7.16b, v19.16b
2342 uzp1 v5.4s, v5.4s, v20.4s
2343 ushr v21.4s, v7.4s, #7
2344 shl v7.4s, v7.4s, #25
2345 orr v7.16b, v7.16b, v21.16b
2346 add v5.4s, v7.4s, v5.4s
2347 add v5.4s, v5.4s, v17.4s
2348 eor v17.16b, v5.16b, v18.16b
2349 ext v18.16b, v19.16b, v19.16b, #4
2350 tbl v17.16b, { v17.16b }, v0.16b
2351 add v18.4s, v18.4s, v17.4s
2352 eor v6.16b, v7.16b, v18.16b
2353 zip1 v7.4s, v4.4s, v16.4s
2354 zip1 v4.4s, v16.4s, v4.4s
2355 ushr v16.4s, v6.4s, #12
2356 shl v6.4s, v6.4s, #20
2357 ext v4.16b, v4.16b, v7.16b, #8
2358 orr v6.16b, v6.16b, v16.16b
2359 add v4.4s, v6.4s, v4.4s
2360 add v4.4s, v4.4s, v5.4s
2361 eor v5.16b, v17.16b, v4.16b
2362 ext v4.16b, v4.16b, v4.16b, #4
2363 tbl v5.16b, { v5.16b }, v2.16b
2364 add v7.4s, v18.4s, v5.4s
2365 eor v6.16b, v6.16b, v7.16b
2366 ext v7.16b, v7.16b, v7.16b, #12
2367 ushr v16.4s, v6.4s, #7
2368 shl v6.4s, v6.4s, #25
2369 orr v6.16b, v6.16b, v16.16b
2370 ext v16.16b, v5.16b, v5.16b, #8
2371 eor v5.16b, v4.16b, v7.16b
2372 eor v4.16b, v6.16b, v16.16b
2380 stp q5, q4, [x8], #32
2384 ldp x20, x19, [sp, #128]
2385 ldp x22, x21, [sp, #112]
2386 ldp x24, x23, [sp, #96]
2387 ldp x26, x25, [sp, #80]
2388 ldp x29, x27, [sp, #64]
2389 ldp d9, d8, [sp, #48]
2390 ldp d11, d10, [sp, #32]
2391 ldp d13, d12, [sp, #16]
2392 ldp d15, d14, [sp], #144
2395 .size zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41
2397 .section ".note.GNU-stack","",@progbits