2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * https://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
19 * - modified assembly to fit into OpenZFS
22 #if defined(__aarch64__)
29 .quad 0x428a2f98d728ae22,0x7137449123ef65cd
30 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
31 .quad 0x3956c25bf348b538,0x59f111f1b605d019
32 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
33 .quad 0xd807aa98a3030242,0x12835b0145706fbe
34 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
35 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
36 .quad 0x9bdc06a725c71235,0xc19bf174cf692694
37 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
38 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
39 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
40 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
41 .quad 0x983e5152ee66dfab,0xa831c66d2db43210
42 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
43 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
44 .quad 0x06ca6351e003826f,0x142929670a0e6e70
45 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
46 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
47 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
48 .quad 0x81c2c92e47edaee6,0x92722c851482353b
49 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
50 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
51 .quad 0xd192e819d6ef5218,0xd69906245565a910
52 .quad 0xf40e35855771202a,0x106aa07032bbd1b8
53 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
54 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
55 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
56 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
57 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
58 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
59 .quad 0x90befffa23631e28,0xa4506cebde82bde9
60 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
61 .quad 0xca273eceea26619c,0xd186b8c721c0c207
62 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
63 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
64 .quad 0x113f9804bef90dae,0x1b710b35131c471b
65 .quad 0x28db77f523047d84,0x32caab7b40c72493
66 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
67 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
68 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
72 .globl zfs_sha512_block_armv7
73 .type zfs_sha512_block_armv7,%function
75 zfs_sha512_block_armv7:
77 stp x29,x30,[sp,#-128]!
87 ldp x20,x21,[x0] // load context
90 add x2,x1,x2,lsl#7 // end of input
97 ldr x19,[x30],#8 // *K++
98 eor x28,x21,x22 // magic seed
100 #ifndef __AARCH64EB__
104 add x27,x27,x19 // h+=K[i]
105 eor x6,x24,x24,ror#23
108 add x27,x27,x3 // h+=X[i]
109 orr x17,x17,x19 // Ch(e,f,g)
110 eor x19,x20,x21 // a^b, b^c in next round
111 eor x16,x16,x6,ror#18 // Sigma1(e)
113 add x27,x27,x17 // h+=Ch(e,f,g)
114 eor x17,x20,x20,ror#5
115 add x27,x27,x16 // h+=Sigma1(e)
116 and x28,x28,x19 // (b^c)&=(a^b)
117 add x23,x23,x27 // d+=h
118 eor x28,x28,x21 // Maj(a,b,c)
119 eor x17,x6,x17,ror#34 // Sigma0(a)
120 add x27,x27,x28 // h+=Maj(a,b,c)
121 ldr x28,[x30],#8 // *K++, x19 in next round
122 //add x27,x27,x17 // h+=Sigma0(a)
123 #ifndef __AARCH64EB__
127 add x27,x27,x17 // h+=Sigma0(a)
129 add x26,x26,x28 // h+=K[i]
130 eor x7,x23,x23,ror#23
133 add x26,x26,x4 // h+=X[i]
134 orr x17,x17,x28 // Ch(e,f,g)
135 eor x28,x27,x20 // a^b, b^c in next round
136 eor x16,x16,x7,ror#18 // Sigma1(e)
138 add x26,x26,x17 // h+=Ch(e,f,g)
139 eor x17,x27,x27,ror#5
140 add x26,x26,x16 // h+=Sigma1(e)
141 and x19,x19,x28 // (b^c)&=(a^b)
142 add x22,x22,x26 // d+=h
143 eor x19,x19,x20 // Maj(a,b,c)
144 eor x17,x7,x17,ror#34 // Sigma0(a)
145 add x26,x26,x19 // h+=Maj(a,b,c)
146 ldr x19,[x30],#8 // *K++, x28 in next round
147 //add x26,x26,x17 // h+=Sigma0(a)
148 #ifndef __AARCH64EB__
151 add x26,x26,x17 // h+=Sigma0(a)
153 add x25,x25,x19 // h+=K[i]
154 eor x8,x22,x22,ror#23
157 add x25,x25,x5 // h+=X[i]
158 orr x17,x17,x19 // Ch(e,f,g)
159 eor x19,x26,x27 // a^b, b^c in next round
160 eor x16,x16,x8,ror#18 // Sigma1(e)
162 add x25,x25,x17 // h+=Ch(e,f,g)
163 eor x17,x26,x26,ror#5
164 add x25,x25,x16 // h+=Sigma1(e)
165 and x28,x28,x19 // (b^c)&=(a^b)
166 add x21,x21,x25 // d+=h
167 eor x28,x28,x27 // Maj(a,b,c)
168 eor x17,x8,x17,ror#34 // Sigma0(a)
169 add x25,x25,x28 // h+=Maj(a,b,c)
170 ldr x28,[x30],#8 // *K++, x19 in next round
171 //add x25,x25,x17 // h+=Sigma0(a)
172 #ifndef __AARCH64EB__
176 add x25,x25,x17 // h+=Sigma0(a)
178 add x24,x24,x28 // h+=K[i]
179 eor x9,x21,x21,ror#23
182 add x24,x24,x6 // h+=X[i]
183 orr x17,x17,x28 // Ch(e,f,g)
184 eor x28,x25,x26 // a^b, b^c in next round
185 eor x16,x16,x9,ror#18 // Sigma1(e)
187 add x24,x24,x17 // h+=Ch(e,f,g)
188 eor x17,x25,x25,ror#5
189 add x24,x24,x16 // h+=Sigma1(e)
190 and x19,x19,x28 // (b^c)&=(a^b)
191 add x20,x20,x24 // d+=h
192 eor x19,x19,x26 // Maj(a,b,c)
193 eor x17,x9,x17,ror#34 // Sigma0(a)
194 add x24,x24,x19 // h+=Maj(a,b,c)
195 ldr x19,[x30],#8 // *K++, x28 in next round
196 //add x24,x24,x17 // h+=Sigma0(a)
197 #ifndef __AARCH64EB__
200 add x24,x24,x17 // h+=Sigma0(a)
202 add x23,x23,x19 // h+=K[i]
203 eor x10,x20,x20,ror#23
206 add x23,x23,x7 // h+=X[i]
207 orr x17,x17,x19 // Ch(e,f,g)
208 eor x19,x24,x25 // a^b, b^c in next round
209 eor x16,x16,x10,ror#18 // Sigma1(e)
211 add x23,x23,x17 // h+=Ch(e,f,g)
212 eor x17,x24,x24,ror#5
213 add x23,x23,x16 // h+=Sigma1(e)
214 and x28,x28,x19 // (b^c)&=(a^b)
215 add x27,x27,x23 // d+=h
216 eor x28,x28,x25 // Maj(a,b,c)
217 eor x17,x10,x17,ror#34 // Sigma0(a)
218 add x23,x23,x28 // h+=Maj(a,b,c)
219 ldr x28,[x30],#8 // *K++, x19 in next round
220 //add x23,x23,x17 // h+=Sigma0(a)
221 #ifndef __AARCH64EB__
225 add x23,x23,x17 // h+=Sigma0(a)
227 add x22,x22,x28 // h+=K[i]
228 eor x11,x27,x27,ror#23
231 add x22,x22,x8 // h+=X[i]
232 orr x17,x17,x28 // Ch(e,f,g)
233 eor x28,x23,x24 // a^b, b^c in next round
234 eor x16,x16,x11,ror#18 // Sigma1(e)
236 add x22,x22,x17 // h+=Ch(e,f,g)
237 eor x17,x23,x23,ror#5
238 add x22,x22,x16 // h+=Sigma1(e)
239 and x19,x19,x28 // (b^c)&=(a^b)
240 add x26,x26,x22 // d+=h
241 eor x19,x19,x24 // Maj(a,b,c)
242 eor x17,x11,x17,ror#34 // Sigma0(a)
243 add x22,x22,x19 // h+=Maj(a,b,c)
244 ldr x19,[x30],#8 // *K++, x28 in next round
245 //add x22,x22,x17 // h+=Sigma0(a)
246 #ifndef __AARCH64EB__
249 add x22,x22,x17 // h+=Sigma0(a)
251 add x21,x21,x19 // h+=K[i]
252 eor x12,x26,x26,ror#23
255 add x21,x21,x9 // h+=X[i]
256 orr x17,x17,x19 // Ch(e,f,g)
257 eor x19,x22,x23 // a^b, b^c in next round
258 eor x16,x16,x12,ror#18 // Sigma1(e)
260 add x21,x21,x17 // h+=Ch(e,f,g)
261 eor x17,x22,x22,ror#5
262 add x21,x21,x16 // h+=Sigma1(e)
263 and x28,x28,x19 // (b^c)&=(a^b)
264 add x25,x25,x21 // d+=h
265 eor x28,x28,x23 // Maj(a,b,c)
266 eor x17,x12,x17,ror#34 // Sigma0(a)
267 add x21,x21,x28 // h+=Maj(a,b,c)
268 ldr x28,[x30],#8 // *K++, x19 in next round
269 //add x21,x21,x17 // h+=Sigma0(a)
270 #ifndef __AARCH64EB__
273 ldp x11,x12,[x1],#2*8
274 add x21,x21,x17 // h+=Sigma0(a)
276 add x20,x20,x28 // h+=K[i]
277 eor x13,x25,x25,ror#23
280 add x20,x20,x10 // h+=X[i]
281 orr x17,x17,x28 // Ch(e,f,g)
282 eor x28,x21,x22 // a^b, b^c in next round
283 eor x16,x16,x13,ror#18 // Sigma1(e)
285 add x20,x20,x17 // h+=Ch(e,f,g)
286 eor x17,x21,x21,ror#5
287 add x20,x20,x16 // h+=Sigma1(e)
288 and x19,x19,x28 // (b^c)&=(a^b)
289 add x24,x24,x20 // d+=h
290 eor x19,x19,x22 // Maj(a,b,c)
291 eor x17,x13,x17,ror#34 // Sigma0(a)
292 add x20,x20,x19 // h+=Maj(a,b,c)
293 ldr x19,[x30],#8 // *K++, x28 in next round
294 //add x20,x20,x17 // h+=Sigma0(a)
295 #ifndef __AARCH64EB__
298 add x20,x20,x17 // h+=Sigma0(a)
300 add x27,x27,x19 // h+=K[i]
301 eor x14,x24,x24,ror#23
304 add x27,x27,x11 // h+=X[i]
305 orr x17,x17,x19 // Ch(e,f,g)
306 eor x19,x20,x21 // a^b, b^c in next round
307 eor x16,x16,x14,ror#18 // Sigma1(e)
309 add x27,x27,x17 // h+=Ch(e,f,g)
310 eor x17,x20,x20,ror#5
311 add x27,x27,x16 // h+=Sigma1(e)
312 and x28,x28,x19 // (b^c)&=(a^b)
313 add x23,x23,x27 // d+=h
314 eor x28,x28,x21 // Maj(a,b,c)
315 eor x17,x14,x17,ror#34 // Sigma0(a)
316 add x27,x27,x28 // h+=Maj(a,b,c)
317 ldr x28,[x30],#8 // *K++, x19 in next round
318 //add x27,x27,x17 // h+=Sigma0(a)
319 #ifndef __AARCH64EB__
322 ldp x13,x14,[x1],#2*8
323 add x27,x27,x17 // h+=Sigma0(a)
325 add x26,x26,x28 // h+=K[i]
326 eor x15,x23,x23,ror#23
329 add x26,x26,x12 // h+=X[i]
330 orr x17,x17,x28 // Ch(e,f,g)
331 eor x28,x27,x20 // a^b, b^c in next round
332 eor x16,x16,x15,ror#18 // Sigma1(e)
334 add x26,x26,x17 // h+=Ch(e,f,g)
335 eor x17,x27,x27,ror#5
336 add x26,x26,x16 // h+=Sigma1(e)
337 and x19,x19,x28 // (b^c)&=(a^b)
338 add x22,x22,x26 // d+=h
339 eor x19,x19,x20 // Maj(a,b,c)
340 eor x17,x15,x17,ror#34 // Sigma0(a)
341 add x26,x26,x19 // h+=Maj(a,b,c)
342 ldr x19,[x30],#8 // *K++, x28 in next round
343 //add x26,x26,x17 // h+=Sigma0(a)
344 #ifndef __AARCH64EB__
347 add x26,x26,x17 // h+=Sigma0(a)
349 add x25,x25,x19 // h+=K[i]
350 eor x0,x22,x22,ror#23
353 add x25,x25,x13 // h+=X[i]
354 orr x17,x17,x19 // Ch(e,f,g)
355 eor x19,x26,x27 // a^b, b^c in next round
356 eor x16,x16,x0,ror#18 // Sigma1(e)
358 add x25,x25,x17 // h+=Ch(e,f,g)
359 eor x17,x26,x26,ror#5
360 add x25,x25,x16 // h+=Sigma1(e)
361 and x28,x28,x19 // (b^c)&=(a^b)
362 add x21,x21,x25 // d+=h
363 eor x28,x28,x27 // Maj(a,b,c)
364 eor x17,x0,x17,ror#34 // Sigma0(a)
365 add x25,x25,x28 // h+=Maj(a,b,c)
366 ldr x28,[x30],#8 // *K++, x19 in next round
367 //add x25,x25,x17 // h+=Sigma0(a)
368 #ifndef __AARCH64EB__
372 add x25,x25,x17 // h+=Sigma0(a)
375 add x24,x24,x28 // h+=K[i]
376 eor x6,x21,x21,ror#23
379 add x24,x24,x14 // h+=X[i]
380 orr x17,x17,x28 // Ch(e,f,g)
381 eor x28,x25,x26 // a^b, b^c in next round
382 eor x16,x16,x6,ror#18 // Sigma1(e)
384 add x24,x24,x17 // h+=Ch(e,f,g)
385 eor x17,x25,x25,ror#5
386 add x24,x24,x16 // h+=Sigma1(e)
387 and x19,x19,x28 // (b^c)&=(a^b)
388 add x20,x20,x24 // d+=h
389 eor x19,x19,x26 // Maj(a,b,c)
390 eor x17,x6,x17,ror#34 // Sigma0(a)
391 add x24,x24,x19 // h+=Maj(a,b,c)
392 ldr x19,[x30],#8 // *K++, x28 in next round
393 //add x24,x24,x17 // h+=Sigma0(a)
394 #ifndef __AARCH64EB__
397 add x24,x24,x17 // h+=Sigma0(a)
400 add x23,x23,x19 // h+=K[i]
401 eor x7,x20,x20,ror#23
404 add x23,x23,x15 // h+=X[i]
405 orr x17,x17,x19 // Ch(e,f,g)
406 eor x19,x24,x25 // a^b, b^c in next round
407 eor x16,x16,x7,ror#18 // Sigma1(e)
409 add x23,x23,x17 // h+=Ch(e,f,g)
410 eor x17,x24,x24,ror#5
411 add x23,x23,x16 // h+=Sigma1(e)
412 and x28,x28,x19 // (b^c)&=(a^b)
413 add x27,x27,x23 // d+=h
414 eor x28,x28,x25 // Maj(a,b,c)
415 eor x17,x7,x17,ror#34 // Sigma0(a)
416 add x23,x23,x28 // h+=Maj(a,b,c)
417 ldr x28,[x30],#8 // *K++, x19 in next round
418 //add x23,x23,x17 // h+=Sigma0(a)
419 #ifndef __AARCH64EB__
423 add x23,x23,x17 // h+=Sigma0(a)
426 add x22,x22,x28 // h+=K[i]
427 eor x8,x27,x27,ror#23
430 add x22,x22,x0 // h+=X[i]
431 orr x17,x17,x28 // Ch(e,f,g)
432 eor x28,x23,x24 // a^b, b^c in next round
433 eor x16,x16,x8,ror#18 // Sigma1(e)
435 add x22,x22,x17 // h+=Ch(e,f,g)
436 eor x17,x23,x23,ror#5
437 add x22,x22,x16 // h+=Sigma1(e)
438 and x19,x19,x28 // (b^c)&=(a^b)
439 add x26,x26,x22 // d+=h
440 eor x19,x19,x24 // Maj(a,b,c)
441 eor x17,x8,x17,ror#34 // Sigma0(a)
442 add x22,x22,x19 // h+=Maj(a,b,c)
443 ldr x19,[x30],#8 // *K++, x28 in next round
444 //add x22,x22,x17 // h+=Sigma0(a)
445 #ifndef __AARCH64EB__
449 add x22,x22,x17 // h+=Sigma0(a)
452 add x21,x21,x19 // h+=K[i]
453 eor x9,x26,x26,ror#23
456 add x21,x21,x1 // h+=X[i]
457 orr x17,x17,x19 // Ch(e,f,g)
458 eor x19,x22,x23 // a^b, b^c in next round
459 eor x16,x16,x9,ror#18 // Sigma1(e)
461 add x21,x21,x17 // h+=Ch(e,f,g)
462 eor x17,x22,x22,ror#5
463 add x21,x21,x16 // h+=Sigma1(e)
464 and x28,x28,x19 // (b^c)&=(a^b)
465 add x25,x25,x21 // d+=h
466 eor x28,x28,x23 // Maj(a,b,c)
467 eor x17,x9,x17,ror#34 // Sigma0(a)
468 add x21,x21,x28 // h+=Maj(a,b,c)
469 ldr x28,[x30],#8 // *K++, x19 in next round
470 //add x21,x21,x17 // h+=Sigma0(a)
471 #ifndef __AARCH64EB__
475 add x21,x21,x17 // h+=Sigma0(a)
478 add x20,x20,x28 // h+=K[i]
484 add x20,x20,x2 // h+=X[i]
485 eor x16,x16,x25,ror#18
487 orr x17,x17,x28 // Ch(e,f,g)
488 eor x28,x21,x22 // a^b, b^c in next round
489 eor x16,x16,x25,ror#41 // Sigma1(e)
490 eor x10,x10,x21,ror#34
491 add x20,x20,x17 // h+=Ch(e,f,g)
492 and x19,x19,x28 // (b^c)&=(a^b)
494 eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
495 add x20,x20,x16 // h+=Sigma1(e)
496 eor x19,x19,x22 // Maj(a,b,c)
497 eor x17,x10,x21,ror#39 // Sigma0(a)
498 eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
500 add x24,x24,x20 // d+=h
501 add x20,x20,x19 // h+=Maj(a,b,c)
502 ldr x19,[x30],#8 // *K++, x28 in next round
504 add x20,x20,x17 // h+=Sigma0(a)
510 add x27,x27,x19 // h+=K[i]
516 add x27,x27,x3 // h+=X[i]
517 eor x16,x16,x24,ror#18
519 orr x17,x17,x19 // Ch(e,f,g)
520 eor x19,x20,x21 // a^b, b^c in next round
521 eor x16,x16,x24,ror#41 // Sigma1(e)
522 eor x11,x11,x20,ror#34
523 add x27,x27,x17 // h+=Ch(e,f,g)
524 and x28,x28,x19 // (b^c)&=(a^b)
526 eor x10,x10,x5,lsr#7 // sigma0(X[i+1])
527 add x27,x27,x16 // h+=Sigma1(e)
528 eor x28,x28,x21 // Maj(a,b,c)
529 eor x17,x11,x20,ror#39 // Sigma0(a)
530 eor x9,x9,x2,lsr#6 // sigma1(X[i+14])
532 add x23,x23,x27 // d+=h
533 add x27,x27,x28 // h+=Maj(a,b,c)
534 ldr x28,[x30],#8 // *K++, x19 in next round
536 add x27,x27,x17 // h+=Sigma0(a)
541 add x26,x26,x28 // h+=K[i]
547 add x26,x26,x4 // h+=X[i]
548 eor x16,x16,x23,ror#18
550 orr x17,x17,x28 // Ch(e,f,g)
551 eor x28,x27,x20 // a^b, b^c in next round
552 eor x16,x16,x23,ror#41 // Sigma1(e)
553 eor x12,x12,x27,ror#34
554 add x26,x26,x17 // h+=Ch(e,f,g)
555 and x19,x19,x28 // (b^c)&=(a^b)
556 eor x10,x10,x3,ror#61
557 eor x11,x11,x6,lsr#7 // sigma0(X[i+1])
558 add x26,x26,x16 // h+=Sigma1(e)
559 eor x19,x19,x20 // Maj(a,b,c)
560 eor x17,x12,x27,ror#39 // Sigma0(a)
561 eor x10,x10,x3,lsr#6 // sigma1(X[i+14])
563 add x22,x22,x26 // d+=h
564 add x26,x26,x19 // h+=Maj(a,b,c)
565 ldr x19,[x30],#8 // *K++, x28 in next round
567 add x26,x26,x17 // h+=Sigma0(a)
572 add x25,x25,x19 // h+=K[i]
578 add x25,x25,x5 // h+=X[i]
579 eor x16,x16,x22,ror#18
581 orr x17,x17,x19 // Ch(e,f,g)
582 eor x19,x26,x27 // a^b, b^c in next round
583 eor x16,x16,x22,ror#41 // Sigma1(e)
584 eor x13,x13,x26,ror#34
585 add x25,x25,x17 // h+=Ch(e,f,g)
586 and x28,x28,x19 // (b^c)&=(a^b)
587 eor x11,x11,x4,ror#61
588 eor x12,x12,x7,lsr#7 // sigma0(X[i+1])
589 add x25,x25,x16 // h+=Sigma1(e)
590 eor x28,x28,x27 // Maj(a,b,c)
591 eor x17,x13,x26,ror#39 // Sigma0(a)
592 eor x11,x11,x4,lsr#6 // sigma1(X[i+14])
594 add x21,x21,x25 // d+=h
595 add x25,x25,x28 // h+=Maj(a,b,c)
596 ldr x28,[x30],#8 // *K++, x19 in next round
598 add x25,x25,x17 // h+=Sigma0(a)
603 add x24,x24,x28 // h+=K[i]
609 add x24,x24,x6 // h+=X[i]
610 eor x16,x16,x21,ror#18
612 orr x17,x17,x28 // Ch(e,f,g)
613 eor x28,x25,x26 // a^b, b^c in next round
614 eor x16,x16,x21,ror#41 // Sigma1(e)
615 eor x14,x14,x25,ror#34
616 add x24,x24,x17 // h+=Ch(e,f,g)
617 and x19,x19,x28 // (b^c)&=(a^b)
618 eor x12,x12,x5,ror#61
619 eor x13,x13,x8,lsr#7 // sigma0(X[i+1])
620 add x24,x24,x16 // h+=Sigma1(e)
621 eor x19,x19,x26 // Maj(a,b,c)
622 eor x17,x14,x25,ror#39 // Sigma0(a)
623 eor x12,x12,x5,lsr#6 // sigma1(X[i+14])
625 add x20,x20,x24 // d+=h
626 add x24,x24,x19 // h+=Maj(a,b,c)
627 ldr x19,[x30],#8 // *K++, x28 in next round
629 add x24,x24,x17 // h+=Sigma0(a)
634 add x23,x23,x19 // h+=K[i]
640 add x23,x23,x7 // h+=X[i]
641 eor x16,x16,x20,ror#18
643 orr x17,x17,x19 // Ch(e,f,g)
644 eor x19,x24,x25 // a^b, b^c in next round
645 eor x16,x16,x20,ror#41 // Sigma1(e)
646 eor x15,x15,x24,ror#34
647 add x23,x23,x17 // h+=Ch(e,f,g)
648 and x28,x28,x19 // (b^c)&=(a^b)
649 eor x13,x13,x6,ror#61
650 eor x14,x14,x9,lsr#7 // sigma0(X[i+1])
651 add x23,x23,x16 // h+=Sigma1(e)
652 eor x28,x28,x25 // Maj(a,b,c)
653 eor x17,x15,x24,ror#39 // Sigma0(a)
654 eor x13,x13,x6,lsr#6 // sigma1(X[i+14])
656 add x27,x27,x23 // d+=h
657 add x23,x23,x28 // h+=Maj(a,b,c)
658 ldr x28,[x30],#8 // *K++, x19 in next round
660 add x23,x23,x17 // h+=Sigma0(a)
665 add x22,x22,x28 // h+=K[i]
671 add x22,x22,x8 // h+=X[i]
672 eor x16,x16,x27,ror#18
673 eor x15,x15,x10,ror#8
674 orr x17,x17,x28 // Ch(e,f,g)
675 eor x28,x23,x24 // a^b, b^c in next round
676 eor x16,x16,x27,ror#41 // Sigma1(e)
678 add x22,x22,x17 // h+=Ch(e,f,g)
679 and x19,x19,x28 // (b^c)&=(a^b)
680 eor x14,x14,x7,ror#61
681 eor x15,x15,x10,lsr#7 // sigma0(X[i+1])
682 add x22,x22,x16 // h+=Sigma1(e)
683 eor x19,x19,x24 // Maj(a,b,c)
684 eor x17,x0,x23,ror#39 // Sigma0(a)
685 eor x14,x14,x7,lsr#6 // sigma1(X[i+14])
687 add x26,x26,x22 // d+=h
688 add x22,x22,x19 // h+=Maj(a,b,c)
689 ldr x19,[x30],#8 // *K++, x28 in next round
691 add x22,x22,x17 // h+=Sigma0(a)
696 add x21,x21,x19 // h+=K[i]
702 add x21,x21,x9 // h+=X[i]
703 eor x16,x16,x26,ror#18
705 orr x17,x17,x19 // Ch(e,f,g)
706 eor x19,x22,x23 // a^b, b^c in next round
707 eor x16,x16,x26,ror#41 // Sigma1(e)
709 add x21,x21,x17 // h+=Ch(e,f,g)
710 and x28,x28,x19 // (b^c)&=(a^b)
711 eor x15,x15,x8,ror#61
712 eor x0,x0,x11,lsr#7 // sigma0(X[i+1])
713 add x21,x21,x16 // h+=Sigma1(e)
714 eor x28,x28,x23 // Maj(a,b,c)
715 eor x17,x1,x22,ror#39 // Sigma0(a)
716 eor x15,x15,x8,lsr#6 // sigma1(X[i+14])
718 add x25,x25,x21 // d+=h
719 add x21,x21,x28 // h+=Maj(a,b,c)
720 ldr x28,[x30],#8 // *K++, x19 in next round
722 add x21,x21,x17 // h+=Sigma0(a)
727 add x20,x20,x28 // h+=K[i]
733 add x20,x20,x10 // h+=X[i]
734 eor x16,x16,x25,ror#18
736 orr x17,x17,x28 // Ch(e,f,g)
737 eor x28,x21,x22 // a^b, b^c in next round
738 eor x16,x16,x25,ror#41 // Sigma1(e)
740 add x20,x20,x17 // h+=Ch(e,f,g)
741 and x19,x19,x28 // (b^c)&=(a^b)
743 eor x1,x1,x12,lsr#7 // sigma0(X[i+1])
744 add x20,x20,x16 // h+=Sigma1(e)
745 eor x19,x19,x22 // Maj(a,b,c)
746 eor x17,x2,x21,ror#39 // Sigma0(a)
747 eor x0,x0,x9,lsr#6 // sigma1(X[i+14])
749 add x24,x24,x20 // d+=h
750 add x20,x20,x19 // h+=Maj(a,b,c)
751 ldr x19,[x30],#8 // *K++, x28 in next round
753 add x20,x20,x17 // h+=Sigma0(a)
758 add x27,x27,x19 // h+=K[i]
764 add x27,x27,x11 // h+=X[i]
765 eor x16,x16,x24,ror#18
767 orr x17,x17,x19 // Ch(e,f,g)
768 eor x19,x20,x21 // a^b, b^c in next round
769 eor x16,x16,x24,ror#41 // Sigma1(e)
771 add x27,x27,x17 // h+=Ch(e,f,g)
772 and x28,x28,x19 // (b^c)&=(a^b)
774 eor x2,x2,x13,lsr#7 // sigma0(X[i+1])
775 add x27,x27,x16 // h+=Sigma1(e)
776 eor x28,x28,x21 // Maj(a,b,c)
777 eor x17,x3,x20,ror#39 // Sigma0(a)
778 eor x1,x1,x10,lsr#6 // sigma1(X[i+14])
780 add x23,x23,x27 // d+=h
781 add x27,x27,x28 // h+=Maj(a,b,c)
782 ldr x28,[x30],#8 // *K++, x19 in next round
784 add x27,x27,x17 // h+=Sigma0(a)
789 add x26,x26,x28 // h+=K[i]
795 add x26,x26,x12 // h+=X[i]
796 eor x16,x16,x23,ror#18
798 orr x17,x17,x28 // Ch(e,f,g)
799 eor x28,x27,x20 // a^b, b^c in next round
800 eor x16,x16,x23,ror#41 // Sigma1(e)
802 add x26,x26,x17 // h+=Ch(e,f,g)
803 and x19,x19,x28 // (b^c)&=(a^b)
805 eor x3,x3,x14,lsr#7 // sigma0(X[i+1])
806 add x26,x26,x16 // h+=Sigma1(e)
807 eor x19,x19,x20 // Maj(a,b,c)
808 eor x17,x4,x27,ror#39 // Sigma0(a)
809 eor x2,x2,x11,lsr#6 // sigma1(X[i+14])
811 add x22,x22,x26 // d+=h
812 add x26,x26,x19 // h+=Maj(a,b,c)
813 ldr x19,[x30],#8 // *K++, x28 in next round
815 add x26,x26,x17 // h+=Sigma0(a)
820 add x25,x25,x19 // h+=K[i]
826 add x25,x25,x13 // h+=X[i]
827 eor x16,x16,x22,ror#18
829 orr x17,x17,x19 // Ch(e,f,g)
830 eor x19,x26,x27 // a^b, b^c in next round
831 eor x16,x16,x22,ror#41 // Sigma1(e)
833 add x25,x25,x17 // h+=Ch(e,f,g)
834 and x28,x28,x19 // (b^c)&=(a^b)
836 eor x4,x4,x15,lsr#7 // sigma0(X[i+1])
837 add x25,x25,x16 // h+=Sigma1(e)
838 eor x28,x28,x27 // Maj(a,b,c)
839 eor x17,x5,x26,ror#39 // Sigma0(a)
840 eor x3,x3,x12,lsr#6 // sigma1(X[i+14])
842 add x21,x21,x25 // d+=h
843 add x25,x25,x28 // h+=Maj(a,b,c)
844 ldr x28,[x30],#8 // *K++, x19 in next round
846 add x25,x25,x17 // h+=Sigma0(a)
851 add x24,x24,x28 // h+=K[i]
857 add x24,x24,x14 // h+=X[i]
858 eor x16,x16,x21,ror#18
860 orr x17,x17,x28 // Ch(e,f,g)
861 eor x28,x25,x26 // a^b, b^c in next round
862 eor x16,x16,x21,ror#41 // Sigma1(e)
864 add x24,x24,x17 // h+=Ch(e,f,g)
865 and x19,x19,x28 // (b^c)&=(a^b)
867 eor x5,x5,x0,lsr#7 // sigma0(X[i+1])
868 add x24,x24,x16 // h+=Sigma1(e)
869 eor x19,x19,x26 // Maj(a,b,c)
870 eor x17,x6,x25,ror#39 // Sigma0(a)
871 eor x4,x4,x13,lsr#6 // sigma1(X[i+14])
873 add x20,x20,x24 // d+=h
874 add x24,x24,x19 // h+=Maj(a,b,c)
875 ldr x19,[x30],#8 // *K++, x28 in next round
877 add x24,x24,x17 // h+=Sigma0(a)
882 add x23,x23,x19 // h+=K[i]
888 add x23,x23,x15 // h+=X[i]
889 eor x16,x16,x20,ror#18
891 orr x17,x17,x19 // Ch(e,f,g)
892 eor x19,x24,x25 // a^b, b^c in next round
893 eor x16,x16,x20,ror#41 // Sigma1(e)
895 add x23,x23,x17 // h+=Ch(e,f,g)
896 and x28,x28,x19 // (b^c)&=(a^b)
898 eor x6,x6,x1,lsr#7 // sigma0(X[i+1])
899 add x23,x23,x16 // h+=Sigma1(e)
900 eor x28,x28,x25 // Maj(a,b,c)
901 eor x17,x7,x24,ror#39 // Sigma0(a)
902 eor x5,x5,x14,lsr#6 // sigma1(X[i+14])
904 add x27,x27,x23 // d+=h
905 add x23,x23,x28 // h+=Maj(a,b,c)
906 ldr x28,[x30],#8 // *K++, x19 in next round
908 add x23,x23,x17 // h+=Sigma0(a)
913 add x22,x22,x28 // h+=K[i]
919 add x22,x22,x0 // h+=X[i]
920 eor x16,x16,x27,ror#18
922 orr x17,x17,x28 // Ch(e,f,g)
923 eor x28,x23,x24 // a^b, b^c in next round
924 eor x16,x16,x27,ror#41 // Sigma1(e)
926 add x22,x22,x17 // h+=Ch(e,f,g)
927 and x19,x19,x28 // (b^c)&=(a^b)
929 eor x7,x7,x2,lsr#7 // sigma0(X[i+1])
930 add x22,x22,x16 // h+=Sigma1(e)
931 eor x19,x19,x24 // Maj(a,b,c)
932 eor x17,x8,x23,ror#39 // Sigma0(a)
933 eor x6,x6,x15,lsr#6 // sigma1(X[i+14])
935 add x26,x26,x22 // d+=h
936 add x22,x22,x19 // h+=Maj(a,b,c)
937 ldr x19,[x30],#8 // *K++, x28 in next round
939 add x22,x22,x17 // h+=Sigma0(a)
944 add x21,x21,x19 // h+=K[i]
950 add x21,x21,x1 // h+=X[i]
951 eor x16,x16,x26,ror#18
953 orr x17,x17,x19 // Ch(e,f,g)
954 eor x19,x22,x23 // a^b, b^c in next round
955 eor x16,x16,x26,ror#41 // Sigma1(e)
957 add x21,x21,x17 // h+=Ch(e,f,g)
958 and x28,x28,x19 // (b^c)&=(a^b)
960 eor x8,x8,x3,lsr#7 // sigma0(X[i+1])
961 add x21,x21,x16 // h+=Sigma1(e)
962 eor x28,x28,x23 // Maj(a,b,c)
963 eor x17,x9,x22,ror#39 // Sigma0(a)
964 eor x7,x7,x0,lsr#6 // sigma1(X[i+14])
966 add x25,x25,x21 // d+=h
967 add x21,x21,x28 // h+=Maj(a,b,c)
968 ldr x28,[x30],#8 // *K++, x19 in next round
970 add x21,x21,x17 // h+=Sigma0(a)
975 add x20,x20,x28 // h+=K[i]
981 add x20,x20,x2 // h+=X[i]
982 eor x16,x16,x25,ror#18
984 orr x17,x17,x28 // Ch(e,f,g)
985 eor x28,x21,x22 // a^b, b^c in next round
986 eor x16,x16,x25,ror#41 // Sigma1(e)
987 eor x10,x10,x21,ror#34
988 add x20,x20,x17 // h+=Ch(e,f,g)
989 and x19,x19,x28 // (b^c)&=(a^b)
991 eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
992 add x20,x20,x16 // h+=Sigma1(e)
993 eor x19,x19,x22 // Maj(a,b,c)
994 eor x17,x10,x21,ror#39 // Sigma0(a)
995 eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
997 add x24,x24,x20 // d+=h
998 add x20,x20,x19 // h+=Maj(a,b,c)
999 ldr x19,[x30],#8 // *K++, x28 in next round
1001 add x20,x20,x17 // h+=Sigma0(a)
1003 cbnz x19,.Loop_16_xx
1007 sub x30,x30,#648 // rewind
1011 add x1,x1,#14*8 // advance input pointer
1014 ldp x9,x10,[x0,#6*8]
1021 stp x22,x23,[x0,#2*8]
1025 stp x24,x25,[x0,#4*8]
1026 stp x26,x27,[x0,#6*8]
1029 ldp x19,x20,[x29,#16]
1031 ldp x21,x22,[x29,#32]
1032 ldp x23,x24,[x29,#48]
1033 ldp x25,x26,[x29,#64]
1034 ldp x27,x28,[x29,#80]
1035 ldp x29,x30,[sp],#128
1037 .size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7
1040 .globl zfs_sha512_block_armv8
1041 .type zfs_sha512_block_armv8,%function
1043 zfs_sha512_block_armv8:
1046 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later
1047 stp x29,x30,[sp,#-16]!
1050 ld1 {v16.16b-v19.16b},[x1],#64 // load input
1051 ld1 {v20.16b-v23.16b},[x1],#64
1053 ld1 {v0.2d-v3.2d},[x0] // load context
1056 rev64 v16.16b,v16.16b
1057 rev64 v17.16b,v17.16b
1058 rev64 v18.16b,v18.16b
1059 rev64 v19.16b,v19.16b
1060 rev64 v20.16b,v20.16b
1061 rev64 v21.16b,v21.16b
1062 rev64 v22.16b,v22.16b
1063 rev64 v23.16b,v23.16b
1068 ld1 {v24.2d},[x3],#16
1071 orr v26.16b,v0.16b,v0.16b // offload
1072 orr v27.16b,v1.16b,v1.16b
1073 orr v28.16b,v2.16b,v2.16b
1074 orr v29.16b,v3.16b,v3.16b
1075 csel x1,x1,x4,ne // conditional rewind
1076 add v24.2d,v24.2d,v16.2d
1077 ld1 {v25.2d},[x3],#16
1078 ext v24.16b,v24.16b,v24.16b,#8
1079 ext v5.16b,v2.16b,v3.16b,#8
1080 ext v6.16b,v1.16b,v2.16b,#8
1081 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1082 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1083 ext v7.16b,v20.16b,v21.16b,#8
1084 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1085 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1086 add v4.2d,v1.2d,v3.2d // "D + T1"
1087 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1088 add v25.2d,v25.2d,v17.2d
1089 ld1 {v24.2d},[x3],#16
1090 ext v25.16b,v25.16b,v25.16b,#8
1091 ext v5.16b,v4.16b,v2.16b,#8
1092 ext v6.16b,v0.16b,v4.16b,#8
1093 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1094 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1095 ext v7.16b,v21.16b,v22.16b,#8
1096 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1097 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1098 add v1.2d,v0.2d,v2.2d // "D + T1"
1099 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1100 add v24.2d,v24.2d,v18.2d
1101 ld1 {v25.2d},[x3],#16
1102 ext v24.16b,v24.16b,v24.16b,#8
1103 ext v5.16b,v1.16b,v4.16b,#8
1104 ext v6.16b,v3.16b,v1.16b,#8
1105 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1106 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1107 ext v7.16b,v22.16b,v23.16b,#8
1108 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1109 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1110 add v0.2d,v3.2d,v4.2d // "D + T1"
1111 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1112 add v25.2d,v25.2d,v19.2d
1113 ld1 {v24.2d},[x3],#16
1114 ext v25.16b,v25.16b,v25.16b,#8
1115 ext v5.16b,v0.16b,v1.16b,#8
1116 ext v6.16b,v2.16b,v0.16b,#8
1117 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1118 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1119 ext v7.16b,v23.16b,v16.16b,#8
1120 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1121 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1122 add v3.2d,v2.2d,v1.2d // "D + T1"
1123 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1124 add v24.2d,v24.2d,v20.2d
1125 ld1 {v25.2d},[x3],#16
1126 ext v24.16b,v24.16b,v24.16b,#8
1127 ext v5.16b,v3.16b,v0.16b,#8
1128 ext v6.16b,v4.16b,v3.16b,#8
1129 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1130 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1131 ext v7.16b,v16.16b,v17.16b,#8
1132 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1133 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1134 add v2.2d,v4.2d,v0.2d // "D + T1"
1135 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1136 add v25.2d,v25.2d,v21.2d
1137 ld1 {v24.2d},[x3],#16
1138 ext v25.16b,v25.16b,v25.16b,#8
1139 ext v5.16b,v2.16b,v3.16b,#8
1140 ext v6.16b,v1.16b,v2.16b,#8
1141 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1142 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1143 ext v7.16b,v17.16b,v18.16b,#8
1144 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1145 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1146 add v4.2d,v1.2d,v3.2d // "D + T1"
1147 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1148 add v24.2d,v24.2d,v22.2d
1149 ld1 {v25.2d},[x3],#16
1150 ext v24.16b,v24.16b,v24.16b,#8
1151 ext v5.16b,v4.16b,v2.16b,#8
1152 ext v6.16b,v0.16b,v4.16b,#8
1153 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1154 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1155 ext v7.16b,v18.16b,v19.16b,#8
1156 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1157 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1158 add v1.2d,v0.2d,v2.2d // "D + T1"
1159 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1160 add v25.2d,v25.2d,v23.2d
1161 ld1 {v24.2d},[x3],#16
1162 ext v25.16b,v25.16b,v25.16b,#8
1163 ext v5.16b,v1.16b,v4.16b,#8
1164 ext v6.16b,v3.16b,v1.16b,#8
1165 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1166 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1167 ext v7.16b,v19.16b,v20.16b,#8
1168 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1169 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1170 add v0.2d,v3.2d,v4.2d // "D + T1"
1171 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1172 add v24.2d,v24.2d,v16.2d
1173 ld1 {v25.2d},[x3],#16
1174 ext v24.16b,v24.16b,v24.16b,#8
1175 ext v5.16b,v0.16b,v1.16b,#8
1176 ext v6.16b,v2.16b,v0.16b,#8
1177 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1178 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1179 ext v7.16b,v20.16b,v21.16b,#8
1180 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1181 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1182 add v3.2d,v2.2d,v1.2d // "D + T1"
1183 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1184 add v25.2d,v25.2d,v17.2d
1185 ld1 {v24.2d},[x3],#16
1186 ext v25.16b,v25.16b,v25.16b,#8
1187 ext v5.16b,v3.16b,v0.16b,#8
1188 ext v6.16b,v4.16b,v3.16b,#8
1189 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1190 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1191 ext v7.16b,v21.16b,v22.16b,#8
1192 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1193 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1194 add v2.2d,v4.2d,v0.2d // "D + T1"
1195 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1196 add v24.2d,v24.2d,v18.2d
1197 ld1 {v25.2d},[x3],#16
1198 ext v24.16b,v24.16b,v24.16b,#8
1199 ext v5.16b,v2.16b,v3.16b,#8
1200 ext v6.16b,v1.16b,v2.16b,#8
1201 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1202 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1203 ext v7.16b,v22.16b,v23.16b,#8
1204 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1205 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1206 add v4.2d,v1.2d,v3.2d // "D + T1"
1207 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1208 add v25.2d,v25.2d,v19.2d
1209 ld1 {v24.2d},[x3],#16
1210 ext v25.16b,v25.16b,v25.16b,#8
1211 ext v5.16b,v4.16b,v2.16b,#8
1212 ext v6.16b,v0.16b,v4.16b,#8
1213 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1214 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1215 ext v7.16b,v23.16b,v16.16b,#8
1216 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1217 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1218 add v1.2d,v0.2d,v2.2d // "D + T1"
1219 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1220 add v24.2d,v24.2d,v20.2d
1221 ld1 {v25.2d},[x3],#16
1222 ext v24.16b,v24.16b,v24.16b,#8
1223 ext v5.16b,v1.16b,v4.16b,#8
1224 ext v6.16b,v3.16b,v1.16b,#8
1225 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1226 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1227 ext v7.16b,v16.16b,v17.16b,#8
1228 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1229 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1230 add v0.2d,v3.2d,v4.2d // "D + T1"
1231 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1232 add v25.2d,v25.2d,v21.2d
1233 ld1 {v24.2d},[x3],#16
1234 ext v25.16b,v25.16b,v25.16b,#8
1235 ext v5.16b,v0.16b,v1.16b,#8
1236 ext v6.16b,v2.16b,v0.16b,#8
1237 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1238 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1239 ext v7.16b,v17.16b,v18.16b,#8
1240 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1241 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1242 add v3.2d,v2.2d,v1.2d // "D + T1"
1243 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1244 add v24.2d,v24.2d,v22.2d
1245 ld1 {v25.2d},[x3],#16
1246 ext v24.16b,v24.16b,v24.16b,#8
1247 ext v5.16b,v3.16b,v0.16b,#8
1248 ext v6.16b,v4.16b,v3.16b,#8
1249 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1250 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1251 ext v7.16b,v18.16b,v19.16b,#8
1252 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1253 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1254 add v2.2d,v4.2d,v0.2d // "D + T1"
1255 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1256 add v25.2d,v25.2d,v23.2d
1257 ld1 {v24.2d},[x3],#16
1258 ext v25.16b,v25.16b,v25.16b,#8
1259 ext v5.16b,v2.16b,v3.16b,#8
1260 ext v6.16b,v1.16b,v2.16b,#8
1261 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1262 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1263 ext v7.16b,v19.16b,v20.16b,#8
1264 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1265 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1266 add v4.2d,v1.2d,v3.2d // "D + T1"
1267 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1268 add v24.2d,v24.2d,v16.2d
1269 ld1 {v25.2d},[x3],#16
1270 ext v24.16b,v24.16b,v24.16b,#8
1271 ext v5.16b,v4.16b,v2.16b,#8
1272 ext v6.16b,v0.16b,v4.16b,#8
1273 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1274 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1275 ext v7.16b,v20.16b,v21.16b,#8
1276 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1277 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1278 add v1.2d,v0.2d,v2.2d // "D + T1"
1279 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1280 add v25.2d,v25.2d,v17.2d
1281 ld1 {v24.2d},[x3],#16
1282 ext v25.16b,v25.16b,v25.16b,#8
1283 ext v5.16b,v1.16b,v4.16b,#8
1284 ext v6.16b,v3.16b,v1.16b,#8
1285 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1286 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1287 ext v7.16b,v21.16b,v22.16b,#8
1288 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1289 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1290 add v0.2d,v3.2d,v4.2d // "D + T1"
1291 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1292 add v24.2d,v24.2d,v18.2d
1293 ld1 {v25.2d},[x3],#16
1294 ext v24.16b,v24.16b,v24.16b,#8
1295 ext v5.16b,v0.16b,v1.16b,#8
1296 ext v6.16b,v2.16b,v0.16b,#8
1297 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1298 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1299 ext v7.16b,v22.16b,v23.16b,#8
1300 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1301 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1302 add v3.2d,v2.2d,v1.2d // "D + T1"
1303 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1304 add v25.2d,v25.2d,v19.2d
1305 ld1 {v24.2d},[x3],#16
1306 ext v25.16b,v25.16b,v25.16b,#8
1307 ext v5.16b,v3.16b,v0.16b,#8
1308 ext v6.16b,v4.16b,v3.16b,#8
1309 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1310 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1311 ext v7.16b,v23.16b,v16.16b,#8
1312 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1313 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1314 add v2.2d,v4.2d,v0.2d // "D + T1"
1315 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1316 add v24.2d,v24.2d,v20.2d
1317 ld1 {v25.2d},[x3],#16
1318 ext v24.16b,v24.16b,v24.16b,#8
1319 ext v5.16b,v2.16b,v3.16b,#8
1320 ext v6.16b,v1.16b,v2.16b,#8
1321 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1322 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1323 ext v7.16b,v16.16b,v17.16b,#8
1324 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1325 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1326 add v4.2d,v1.2d,v3.2d // "D + T1"
1327 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1328 add v25.2d,v25.2d,v21.2d
1329 ld1 {v24.2d},[x3],#16
1330 ext v25.16b,v25.16b,v25.16b,#8
1331 ext v5.16b,v4.16b,v2.16b,#8
1332 ext v6.16b,v0.16b,v4.16b,#8
1333 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1334 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1335 ext v7.16b,v17.16b,v18.16b,#8
1336 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1337 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1338 add v1.2d,v0.2d,v2.2d // "D + T1"
1339 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1340 add v24.2d,v24.2d,v22.2d
1341 ld1 {v25.2d},[x3],#16
1342 ext v24.16b,v24.16b,v24.16b,#8
1343 ext v5.16b,v1.16b,v4.16b,#8
1344 ext v6.16b,v3.16b,v1.16b,#8
1345 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1346 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1347 ext v7.16b,v18.16b,v19.16b,#8
1348 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1349 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1350 add v0.2d,v3.2d,v4.2d // "D + T1"
1351 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1352 add v25.2d,v25.2d,v23.2d
1353 ld1 {v24.2d},[x3],#16
1354 ext v25.16b,v25.16b,v25.16b,#8
1355 ext v5.16b,v0.16b,v1.16b,#8
1356 ext v6.16b,v2.16b,v0.16b,#8
1357 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1358 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1359 ext v7.16b,v19.16b,v20.16b,#8
1360 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1361 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1362 add v3.2d,v2.2d,v1.2d // "D + T1"
1363 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1364 add v24.2d,v24.2d,v16.2d
1365 ld1 {v25.2d},[x3],#16
1366 ext v24.16b,v24.16b,v24.16b,#8
1367 ext v5.16b,v3.16b,v0.16b,#8
1368 ext v6.16b,v4.16b,v3.16b,#8
1369 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1370 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1371 ext v7.16b,v20.16b,v21.16b,#8
1372 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1373 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1374 add v2.2d,v4.2d,v0.2d // "D + T1"
1375 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1376 add v25.2d,v25.2d,v17.2d
1377 ld1 {v24.2d},[x3],#16
1378 ext v25.16b,v25.16b,v25.16b,#8
1379 ext v5.16b,v2.16b,v3.16b,#8
1380 ext v6.16b,v1.16b,v2.16b,#8
1381 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1382 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1383 ext v7.16b,v21.16b,v22.16b,#8
1384 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1385 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1386 add v4.2d,v1.2d,v3.2d // "D + T1"
1387 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1388 add v24.2d,v24.2d,v18.2d
1389 ld1 {v25.2d},[x3],#16
1390 ext v24.16b,v24.16b,v24.16b,#8
1391 ext v5.16b,v4.16b,v2.16b,#8
1392 ext v6.16b,v0.16b,v4.16b,#8
1393 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1394 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1395 ext v7.16b,v22.16b,v23.16b,#8
1396 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1397 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1398 add v1.2d,v0.2d,v2.2d // "D + T1"
1399 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1400 add v25.2d,v25.2d,v19.2d
1401 ld1 {v24.2d},[x3],#16
1402 ext v25.16b,v25.16b,v25.16b,#8
1403 ext v5.16b,v1.16b,v4.16b,#8
1404 ext v6.16b,v3.16b,v1.16b,#8
1405 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1406 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1407 ext v7.16b,v23.16b,v16.16b,#8
1408 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1409 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1410 add v0.2d,v3.2d,v4.2d // "D + T1"
1411 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1412 add v24.2d,v24.2d,v20.2d
1413 ld1 {v25.2d},[x3],#16
1414 ext v24.16b,v24.16b,v24.16b,#8
1415 ext v5.16b,v0.16b,v1.16b,#8
1416 ext v6.16b,v2.16b,v0.16b,#8
1417 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1418 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1419 ext v7.16b,v16.16b,v17.16b,#8
1420 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1421 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1422 add v3.2d,v2.2d,v1.2d // "D + T1"
1423 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1424 add v25.2d,v25.2d,v21.2d
1425 ld1 {v24.2d},[x3],#16
1426 ext v25.16b,v25.16b,v25.16b,#8
1427 ext v5.16b,v3.16b,v0.16b,#8
1428 ext v6.16b,v4.16b,v3.16b,#8
1429 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1430 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1431 ext v7.16b,v17.16b,v18.16b,#8
1432 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1433 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1434 add v2.2d,v4.2d,v0.2d // "D + T1"
1435 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1436 add v24.2d,v24.2d,v22.2d
1437 ld1 {v25.2d},[x3],#16
1438 ext v24.16b,v24.16b,v24.16b,#8
1439 ext v5.16b,v2.16b,v3.16b,#8
1440 ext v6.16b,v1.16b,v2.16b,#8
1441 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1442 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1443 ext v7.16b,v18.16b,v19.16b,#8
1444 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1445 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1446 add v4.2d,v1.2d,v3.2d // "D + T1"
1447 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1448 add v25.2d,v25.2d,v23.2d
1449 ld1 {v24.2d},[x3],#16
1450 ext v25.16b,v25.16b,v25.16b,#8
1451 ext v5.16b,v4.16b,v2.16b,#8
1452 ext v6.16b,v0.16b,v4.16b,#8
1453 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1454 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1455 ext v7.16b,v19.16b,v20.16b,#8
1456 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1457 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1458 add v1.2d,v0.2d,v2.2d // "D + T1"
1459 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1460 ld1 {v25.2d},[x3],#16
1461 add v24.2d,v24.2d,v16.2d
1462 ld1 {v16.16b},[x1],#16 // load next input
1463 ext v24.16b,v24.16b,v24.16b,#8
1464 ext v5.16b,v1.16b,v4.16b,#8
1465 ext v6.16b,v3.16b,v1.16b,#8
1466 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1467 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1468 rev64 v16.16b,v16.16b
1469 add v0.2d,v3.2d,v4.2d // "D + T1"
1470 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1471 ld1 {v24.2d},[x3],#16
1472 add v25.2d,v25.2d,v17.2d
1473 ld1 {v17.16b},[x1],#16 // load next input
1474 ext v25.16b,v25.16b,v25.16b,#8
1475 ext v5.16b,v0.16b,v1.16b,#8
1476 ext v6.16b,v2.16b,v0.16b,#8
1477 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1478 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1479 rev64 v17.16b,v17.16b
1480 add v3.2d,v2.2d,v1.2d // "D + T1"
1481 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1482 ld1 {v25.2d},[x3],#16
1483 add v24.2d,v24.2d,v18.2d
1484 ld1 {v18.16b},[x1],#16 // load next input
1485 ext v24.16b,v24.16b,v24.16b,#8
1486 ext v5.16b,v3.16b,v0.16b,#8
1487 ext v6.16b,v4.16b,v3.16b,#8
1488 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1489 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1490 rev64 v18.16b,v18.16b
1491 add v2.2d,v4.2d,v0.2d // "D + T1"
1492 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1493 ld1 {v24.2d},[x3],#16
1494 add v25.2d,v25.2d,v19.2d
1495 ld1 {v19.16b},[x1],#16 // load next input
1496 ext v25.16b,v25.16b,v25.16b,#8
1497 ext v5.16b,v2.16b,v3.16b,#8
1498 ext v6.16b,v1.16b,v2.16b,#8
1499 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1500 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1501 rev64 v19.16b,v19.16b
1502 add v4.2d,v1.2d,v3.2d // "D + T1"
1503 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1504 ld1 {v25.2d},[x3],#16
1505 add v24.2d,v24.2d,v20.2d
1506 ld1 {v20.16b},[x1],#16 // load next input
1507 ext v24.16b,v24.16b,v24.16b,#8
1508 ext v5.16b,v4.16b,v2.16b,#8
1509 ext v6.16b,v0.16b,v4.16b,#8
1510 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1511 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1512 rev64 v20.16b,v20.16b
1513 add v1.2d,v0.2d,v2.2d // "D + T1"
1514 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1515 ld1 {v24.2d},[x3],#16
1516 add v25.2d,v25.2d,v21.2d
1517 ld1 {v21.16b},[x1],#16 // load next input
1518 ext v25.16b,v25.16b,v25.16b,#8
1519 ext v5.16b,v1.16b,v4.16b,#8
1520 ext v6.16b,v3.16b,v1.16b,#8
1521 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1522 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1523 rev64 v21.16b,v21.16b
1524 add v0.2d,v3.2d,v4.2d // "D + T1"
1525 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1526 ld1 {v25.2d},[x3],#16
1527 add v24.2d,v24.2d,v22.2d
1528 ld1 {v22.16b},[x1],#16 // load next input
1529 ext v24.16b,v24.16b,v24.16b,#8
1530 ext v5.16b,v0.16b,v1.16b,#8
1531 ext v6.16b,v2.16b,v0.16b,#8
1532 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1533 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1534 rev64 v22.16b,v22.16b
1535 add v3.2d,v2.2d,v1.2d // "D + T1"
1536 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1537 sub x3,x3,#80*8 // rewind
1538 add v25.2d,v25.2d,v23.2d
1539 ld1 {v23.16b},[x1],#16 // load next input
1540 ext v25.16b,v25.16b,v25.16b,#8
1541 ext v5.16b,v3.16b,v0.16b,#8
1542 ext v6.16b,v4.16b,v3.16b,#8
1543 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1544 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1545 rev64 v23.16b,v23.16b
1546 add v2.2d,v4.2d,v0.2d // "D + T1"
1547 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1548 add v0.2d,v0.2d,v26.2d // accumulate
1549 add v1.2d,v1.2d,v27.2d
1550 add v2.2d,v2.2d,v28.2d
1551 add v3.2d,v3.2d,v29.2d
1555 st1 {v0.2d-v3.2d},[x0] // store context
1559 .size zfs_sha512_block_armv8,.-zfs_sha512_block_armv8