2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * https://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
19 * - modified assembly to fit into OpenZFS
25 # define __ARM_ARCH__ 7
27 # define __ARM_ARCH__ __ARM_ARCH
31 # define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
32 # define VFP_ABI_POP vldmia sp!,{d8-d15}
41 # define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
45 # define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
48 #if defined(__thumb2__)
61 WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
62 WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
63 WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
64 WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
65 WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
66 WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
67 WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
68 WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
69 WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
70 WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
71 WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
72 WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
73 WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
74 WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
75 WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
76 WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
77 WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
78 WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
79 WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
80 WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
81 WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
82 WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
83 WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
84 WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
85 WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
86 WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
87 WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
88 WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
89 WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
90 WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
91 WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
92 WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
93 WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
94 WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
95 WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
96 WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
97 WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
98 WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
99 WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
100 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
105 .globl zfs_sha512_block_armv7
106 .type zfs_sha512_block_armv7,%function
107 zfs_sha512_block_armv7:
108 .Lzfs_sha512_block_armv7:
110 #if __ARM_ARCH__<7 && !defined(__thumb2__)
111 sub r3,pc,#8 @ zfs_sha512_block_armv7
113 adr r3,.Lzfs_sha512_block_armv7
116 add r2,r1,r2,lsl#7 @ len to point at the end of inp
117 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
118 sub r14,r3,#672 @ K512
175 @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
176 @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
177 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
183 ldr r11,[sp,#56+0] @ h.lo
184 eor r10,r10,r7,lsl#18
185 ldr r12,[sp,#56+4] @ h.hi
187 eor r10,r10,r8,lsr#18
189 eor r10,r10,r7,lsl#14
193 eor r10,r10,r8,lsl#23 @ Sigma1(e)
195 ldr r9,[sp,#40+0] @ f.lo
196 adc r4,r4,r10 @ T += Sigma1(e)
197 ldr r10,[sp,#40+4] @ f.hi
199 ldr r11,[sp,#48+0] @ g.lo
200 adc r4,r4,r12 @ T += h
201 ldr r12,[sp,#48+4] @ g.hi
212 ldr r11,[r14,#LO] @ K[i].lo
213 eor r10,r10,r12 @ Ch(e,f,g)
214 ldr r12,[r14,#HI] @ K[i].hi
217 ldr r7,[sp,#24+0] @ d.lo
218 adc r4,r4,r10 @ T += Ch(e,f,g)
219 ldr r8,[sp,#24+4] @ d.hi
222 adc r4,r4,r12 @ T += K[i]
224 ldr r11,[sp,#8+0] @ b.lo
225 adc r8,r8,r4 @ d += T
228 ldr r12,[sp,#16+0] @ c.lo
230 it eq @ Thumb2 thing, sanity check in ARM
233 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
234 @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
235 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
243 eor r10,r10,r6,lsl#30
247 eor r10,r10,r6,lsl#25 @ Sigma0(a)
250 adc r4,r4,r10 @ T += Sigma0(a)
252 ldr r10,[sp,#8+4] @ b.hi
254 ldr r11,[sp,#16+4] @ c.hi
258 orr r5,r5,r9 @ Maj(a,b,c).lo
261 orr r6,r6,r12 @ Maj(a,b,c).hi
263 adc r6,r6,r4 @ h += T
272 @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
273 @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
274 @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
289 @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
290 @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
291 @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
295 eor r10,r10,r11,lsl#13
297 eor r10,r10,r11,lsr#29
299 eor r10,r10,r12,lsl#3
301 eor r10,r10,r12,lsr#6
315 @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
316 @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
317 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
323 ldr r11,[sp,#56+0] @ h.lo
324 eor r10,r10,r7,lsl#18
325 ldr r12,[sp,#56+4] @ h.hi
327 eor r10,r10,r8,lsr#18
329 eor r10,r10,r7,lsl#14
333 eor r10,r10,r8,lsl#23 @ Sigma1(e)
335 ldr r9,[sp,#40+0] @ f.lo
336 adc r4,r4,r10 @ T += Sigma1(e)
337 ldr r10,[sp,#40+4] @ f.hi
339 ldr r11,[sp,#48+0] @ g.lo
340 adc r4,r4,r12 @ T += h
341 ldr r12,[sp,#48+4] @ g.hi
352 ldr r11,[r14,#LO] @ K[i].lo
353 eor r10,r10,r12 @ Ch(e,f,g)
354 ldr r12,[r14,#HI] @ K[i].hi
357 ldr r7,[sp,#24+0] @ d.lo
358 adc r4,r4,r10 @ T += Ch(e,f,g)
359 ldr r8,[sp,#24+4] @ d.hi
362 adc r4,r4,r12 @ T += K[i]
364 ldr r11,[sp,#8+0] @ b.lo
365 adc r8,r8,r4 @ d += T
368 ldr r12,[sp,#16+0] @ c.lo
370 it eq @ Thumb2 thing, sanity check in ARM
373 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
374 @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
375 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
383 eor r10,r10,r6,lsl#30
387 eor r10,r10,r6,lsl#25 @ Sigma0(a)
390 adc r4,r4,r10 @ T += Sigma0(a)
392 ldr r10,[sp,#8+4] @ b.hi
394 ldr r11,[sp,#16+4] @ c.hi
398 orr r5,r5,r9 @ Maj(a,b,c).lo
401 orr r6,r6,r12 @ Maj(a,b,c).hi
403 adc r6,r6,r4 @ h += T
407 ittt eq @ Thumb2 thing, sanity check in ARM
410 ldreq r10,[sp,#184+4]
484 add sp,sp,#8*9 @ destroy frame
487 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
489 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
491 moveq pc,lr @ be binary compatible with V4, yet
492 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
494 .size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7
496 #if __ARM_ARCH__ >= 7
500 .globl zfs_sha512_block_neon
501 .type zfs_sha512_block_neon,%function
503 zfs_sha512_block_neon:
505 dmb @ errata #451034 on early Cortex A8
506 add r2,r1,r2,lsl#7 @ len to point at the end of inp
509 vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context
511 vshr.u64 d24,d20,#14 @ 0
513 vld1.64 {d0},[r1]! @ handles unaligned
517 vadd.i64 d16,d30 @ h+=Maj from the past
520 vld1.64 {d28},[r3,:64]! @ K[i++]
525 #if 0<16 && defined(__ARMEL__)
529 vbsl d29,d21,d22 @ Ch(e,f,g)
531 veor d26,d25 @ Sigma1(e)
543 vbsl d30,d18,d17 @ Maj(a,b,c)
544 veor d23,d26 @ Sigma0(a)
548 vshr.u64 d24,d19,#14 @ 1
550 vld1.64 {d1},[r1]! @ handles unaligned
554 vadd.i64 d23,d30 @ h+=Maj from the past
557 vld1.64 {d28},[r3,:64]! @ K[i++]
562 #if 1<16 && defined(__ARMEL__)
566 vbsl d29,d20,d21 @ Ch(e,f,g)
568 veor d26,d25 @ Sigma1(e)
580 vbsl d30,d17,d16 @ Maj(a,b,c)
581 veor d22,d26 @ Sigma0(a)
585 vshr.u64 d24,d18,#14 @ 2
587 vld1.64 {d2},[r1]! @ handles unaligned
591 vadd.i64 d22,d30 @ h+=Maj from the past
594 vld1.64 {d28},[r3,:64]! @ K[i++]
599 #if 2<16 && defined(__ARMEL__)
603 vbsl d29,d19,d20 @ Ch(e,f,g)
605 veor d26,d25 @ Sigma1(e)
617 vbsl d30,d16,d23 @ Maj(a,b,c)
618 veor d21,d26 @ Sigma0(a)
622 vshr.u64 d24,d17,#14 @ 3
624 vld1.64 {d3},[r1]! @ handles unaligned
628 vadd.i64 d21,d30 @ h+=Maj from the past
631 vld1.64 {d28},[r3,:64]! @ K[i++]
636 #if 3<16 && defined(__ARMEL__)
640 vbsl d29,d18,d19 @ Ch(e,f,g)
642 veor d26,d25 @ Sigma1(e)
654 vbsl d30,d23,d22 @ Maj(a,b,c)
655 veor d20,d26 @ Sigma0(a)
659 vshr.u64 d24,d16,#14 @ 4
661 vld1.64 {d4},[r1]! @ handles unaligned
665 vadd.i64 d20,d30 @ h+=Maj from the past
668 vld1.64 {d28},[r3,:64]! @ K[i++]
673 #if 4<16 && defined(__ARMEL__)
677 vbsl d29,d17,d18 @ Ch(e,f,g)
679 veor d26,d25 @ Sigma1(e)
691 vbsl d30,d22,d21 @ Maj(a,b,c)
692 veor d19,d26 @ Sigma0(a)
696 vshr.u64 d24,d23,#14 @ 5
698 vld1.64 {d5},[r1]! @ handles unaligned
702 vadd.i64 d19,d30 @ h+=Maj from the past
705 vld1.64 {d28},[r3,:64]! @ K[i++]
710 #if 5<16 && defined(__ARMEL__)
714 vbsl d29,d16,d17 @ Ch(e,f,g)
716 veor d26,d25 @ Sigma1(e)
728 vbsl d30,d21,d20 @ Maj(a,b,c)
729 veor d18,d26 @ Sigma0(a)
733 vshr.u64 d24,d22,#14 @ 6
735 vld1.64 {d6},[r1]! @ handles unaligned
739 vadd.i64 d18,d30 @ h+=Maj from the past
742 vld1.64 {d28},[r3,:64]! @ K[i++]
747 #if 6<16 && defined(__ARMEL__)
751 vbsl d29,d23,d16 @ Ch(e,f,g)
753 veor d26,d25 @ Sigma1(e)
765 vbsl d30,d20,d19 @ Maj(a,b,c)
766 veor d17,d26 @ Sigma0(a)
770 vshr.u64 d24,d21,#14 @ 7
772 vld1.64 {d7},[r1]! @ handles unaligned
776 vadd.i64 d17,d30 @ h+=Maj from the past
779 vld1.64 {d28},[r3,:64]! @ K[i++]
784 #if 7<16 && defined(__ARMEL__)
788 vbsl d29,d22,d23 @ Ch(e,f,g)
790 veor d26,d25 @ Sigma1(e)
802 vbsl d30,d19,d18 @ Maj(a,b,c)
803 veor d16,d26 @ Sigma0(a)
807 vshr.u64 d24,d20,#14 @ 8
809 vld1.64 {d8},[r1]! @ handles unaligned
813 vadd.i64 d16,d30 @ h+=Maj from the past
816 vld1.64 {d28},[r3,:64]! @ K[i++]
821 #if 8<16 && defined(__ARMEL__)
825 vbsl d29,d21,d22 @ Ch(e,f,g)
827 veor d26,d25 @ Sigma1(e)
839 vbsl d30,d18,d17 @ Maj(a,b,c)
840 veor d23,d26 @ Sigma0(a)
844 vshr.u64 d24,d19,#14 @ 9
846 vld1.64 {d9},[r1]! @ handles unaligned
850 vadd.i64 d23,d30 @ h+=Maj from the past
853 vld1.64 {d28},[r3,:64]! @ K[i++]
858 #if 9<16 && defined(__ARMEL__)
862 vbsl d29,d20,d21 @ Ch(e,f,g)
864 veor d26,d25 @ Sigma1(e)
876 vbsl d30,d17,d16 @ Maj(a,b,c)
877 veor d22,d26 @ Sigma0(a)
881 vshr.u64 d24,d18,#14 @ 10
883 vld1.64 {d10},[r1]! @ handles unaligned
887 vadd.i64 d22,d30 @ h+=Maj from the past
890 vld1.64 {d28},[r3,:64]! @ K[i++]
895 #if 10<16 && defined(__ARMEL__)
899 vbsl d29,d19,d20 @ Ch(e,f,g)
901 veor d26,d25 @ Sigma1(e)
913 vbsl d30,d16,d23 @ Maj(a,b,c)
914 veor d21,d26 @ Sigma0(a)
918 vshr.u64 d24,d17,#14 @ 11
920 vld1.64 {d11},[r1]! @ handles unaligned
924 vadd.i64 d21,d30 @ h+=Maj from the past
927 vld1.64 {d28},[r3,:64]! @ K[i++]
932 #if 11<16 && defined(__ARMEL__)
936 vbsl d29,d18,d19 @ Ch(e,f,g)
938 veor d26,d25 @ Sigma1(e)
950 vbsl d30,d23,d22 @ Maj(a,b,c)
951 veor d20,d26 @ Sigma0(a)
955 vshr.u64 d24,d16,#14 @ 12
957 vld1.64 {d12},[r1]! @ handles unaligned
961 vadd.i64 d20,d30 @ h+=Maj from the past
964 vld1.64 {d28},[r3,:64]! @ K[i++]
969 #if 12<16 && defined(__ARMEL__)
973 vbsl d29,d17,d18 @ Ch(e,f,g)
975 veor d26,d25 @ Sigma1(e)
987 vbsl d30,d22,d21 @ Maj(a,b,c)
988 veor d19,d26 @ Sigma0(a)
992 vshr.u64 d24,d23,#14 @ 13
994 vld1.64 {d13},[r1]! @ handles unaligned
998 vadd.i64 d19,d30 @ h+=Maj from the past
1000 vshr.u64 d26,d23,#41
1001 vld1.64 {d28},[r3,:64]! @ K[i++]
1006 #if 13<16 && defined(__ARMEL__)
1010 vbsl d29,d16,d17 @ Ch(e,f,g)
1011 vshr.u64 d24,d19,#28
1012 veor d26,d25 @ Sigma1(e)
1013 vadd.i64 d27,d29,d18
1014 vshr.u64 d25,d19,#34
1017 vshr.u64 d26,d19,#39
1024 vbsl d30,d21,d20 @ Maj(a,b,c)
1025 veor d18,d26 @ Sigma0(a)
1029 vshr.u64 d24,d22,#14 @ 14
1031 vld1.64 {d14},[r1]! @ handles unaligned
1033 vshr.u64 d25,d22,#18
1035 vadd.i64 d18,d30 @ h+=Maj from the past
1037 vshr.u64 d26,d22,#41
1038 vld1.64 {d28},[r3,:64]! @ K[i++]
1043 #if 14<16 && defined(__ARMEL__)
1047 vbsl d29,d23,d16 @ Ch(e,f,g)
1048 vshr.u64 d24,d18,#28
1049 veor d26,d25 @ Sigma1(e)
1050 vadd.i64 d27,d29,d17
1051 vshr.u64 d25,d18,#34
1054 vshr.u64 d26,d18,#39
1061 vbsl d30,d20,d19 @ Maj(a,b,c)
1062 veor d17,d26 @ Sigma0(a)
1066 vshr.u64 d24,d21,#14 @ 15
1068 vld1.64 {d15},[r1]! @ handles unaligned
1070 vshr.u64 d25,d21,#18
1072 vadd.i64 d17,d30 @ h+=Maj from the past
1074 vshr.u64 d26,d21,#41
1075 vld1.64 {d28},[r3,:64]! @ K[i++]
1080 #if 15<16 && defined(__ARMEL__)
1084 vbsl d29,d22,d23 @ Ch(e,f,g)
1085 vshr.u64 d24,d17,#28
1086 veor d26,d25 @ Sigma1(e)
1087 vadd.i64 d27,d29,d16
1088 vshr.u64 d25,d17,#34
1091 vshr.u64 d26,d17,#39
1098 vbsl d30,d19,d18 @ Maj(a,b,c)
1099 veor d16,d26 @ Sigma0(a)
1108 vadd.i64 d16,d30 @ h+=Maj from the past
1111 vext.8 q14,q0,q1,#8 @ X[i+1]
1115 veor q15,q13 @ sigma1(X[i+14])
1121 vext.8 q14,q4,q5,#8 @ X[i+9]
1123 vshr.u64 d24,d20,#14 @ from NEON_00_15
1125 vshr.u64 d25,d20,#18 @ from NEON_00_15
1126 veor q15,q13 @ sigma0(X[i+1])
1127 vshr.u64 d26,d20,#41 @ from NEON_00_15
1129 vld1.64 {d28},[r3,:64]! @ K[i++]
1134 #if 16<16 && defined(__ARMEL__)
1138 vbsl d29,d21,d22 @ Ch(e,f,g)
1139 vshr.u64 d24,d16,#28
1140 veor d26,d25 @ Sigma1(e)
1141 vadd.i64 d27,d29,d23
1142 vshr.u64 d25,d16,#34
1145 vshr.u64 d26,d16,#39
1152 vbsl d30,d18,d17 @ Maj(a,b,c)
1153 veor d23,d26 @ Sigma0(a)
1157 vshr.u64 d24,d19,#14 @ 17
1159 vld1.64 {d1},[r1]! @ handles unaligned
1161 vshr.u64 d25,d19,#18
1163 vadd.i64 d23,d30 @ h+=Maj from the past
1165 vshr.u64 d26,d19,#41
1166 vld1.64 {d28},[r3,:64]! @ K[i++]
1171 #if 17<16 && defined(__ARMEL__)
1175 vbsl d29,d20,d21 @ Ch(e,f,g)
1176 vshr.u64 d24,d23,#28
1177 veor d26,d25 @ Sigma1(e)
1178 vadd.i64 d27,d29,d22
1179 vshr.u64 d25,d23,#34
1182 vshr.u64 d26,d23,#39
1189 vbsl d30,d17,d16 @ Maj(a,b,c)
1190 veor d22,d26 @ Sigma0(a)
1196 vadd.i64 d22,d30 @ h+=Maj from the past
1199 vext.8 q14,q1,q2,#8 @ X[i+1]
1203 veor q15,q13 @ sigma1(X[i+14])
1209 vext.8 q14,q5,q6,#8 @ X[i+9]
1211 vshr.u64 d24,d18,#14 @ from NEON_00_15
1213 vshr.u64 d25,d18,#18 @ from NEON_00_15
1214 veor q15,q13 @ sigma0(X[i+1])
1215 vshr.u64 d26,d18,#41 @ from NEON_00_15
1217 vld1.64 {d28},[r3,:64]! @ K[i++]
1222 #if 18<16 && defined(__ARMEL__)
1226 vbsl d29,d19,d20 @ Ch(e,f,g)
1227 vshr.u64 d24,d22,#28
1228 veor d26,d25 @ Sigma1(e)
1229 vadd.i64 d27,d29,d21
1230 vshr.u64 d25,d22,#34
1233 vshr.u64 d26,d22,#39
1240 vbsl d30,d16,d23 @ Maj(a,b,c)
1241 veor d21,d26 @ Sigma0(a)
1245 vshr.u64 d24,d17,#14 @ 19
1247 vld1.64 {d3},[r1]! @ handles unaligned
1249 vshr.u64 d25,d17,#18
1251 vadd.i64 d21,d30 @ h+=Maj from the past
1253 vshr.u64 d26,d17,#41
1254 vld1.64 {d28},[r3,:64]! @ K[i++]
1259 #if 19<16 && defined(__ARMEL__)
1263 vbsl d29,d18,d19 @ Ch(e,f,g)
1264 vshr.u64 d24,d21,#28
1265 veor d26,d25 @ Sigma1(e)
1266 vadd.i64 d27,d29,d20
1267 vshr.u64 d25,d21,#34
1270 vshr.u64 d26,d21,#39
1277 vbsl d30,d23,d22 @ Maj(a,b,c)
1278 veor d20,d26 @ Sigma0(a)
1284 vadd.i64 d20,d30 @ h+=Maj from the past
1287 vext.8 q14,q2,q3,#8 @ X[i+1]
1291 veor q15,q13 @ sigma1(X[i+14])
1297 vext.8 q14,q6,q7,#8 @ X[i+9]
1299 vshr.u64 d24,d16,#14 @ from NEON_00_15
1301 vshr.u64 d25,d16,#18 @ from NEON_00_15
1302 veor q15,q13 @ sigma0(X[i+1])
1303 vshr.u64 d26,d16,#41 @ from NEON_00_15
1305 vld1.64 {d28},[r3,:64]! @ K[i++]
1310 #if 20<16 && defined(__ARMEL__)
1314 vbsl d29,d17,d18 @ Ch(e,f,g)
1315 vshr.u64 d24,d20,#28
1316 veor d26,d25 @ Sigma1(e)
1317 vadd.i64 d27,d29,d19
1318 vshr.u64 d25,d20,#34
1321 vshr.u64 d26,d20,#39
1328 vbsl d30,d22,d21 @ Maj(a,b,c)
1329 veor d19,d26 @ Sigma0(a)
1333 vshr.u64 d24,d23,#14 @ 21
1335 vld1.64 {d5},[r1]! @ handles unaligned
1337 vshr.u64 d25,d23,#18
1339 vadd.i64 d19,d30 @ h+=Maj from the past
1341 vshr.u64 d26,d23,#41
1342 vld1.64 {d28},[r3,:64]! @ K[i++]
1347 #if 21<16 && defined(__ARMEL__)
1351 vbsl d29,d16,d17 @ Ch(e,f,g)
1352 vshr.u64 d24,d19,#28
1353 veor d26,d25 @ Sigma1(e)
1354 vadd.i64 d27,d29,d18
1355 vshr.u64 d25,d19,#34
1358 vshr.u64 d26,d19,#39
1365 vbsl d30,d21,d20 @ Maj(a,b,c)
1366 veor d18,d26 @ Sigma0(a)
1372 vadd.i64 d18,d30 @ h+=Maj from the past
1375 vext.8 q14,q3,q4,#8 @ X[i+1]
1379 veor q15,q13 @ sigma1(X[i+14])
1385 vext.8 q14,q7,q0,#8 @ X[i+9]
1387 vshr.u64 d24,d22,#14 @ from NEON_00_15
1389 vshr.u64 d25,d22,#18 @ from NEON_00_15
1390 veor q15,q13 @ sigma0(X[i+1])
1391 vshr.u64 d26,d22,#41 @ from NEON_00_15
1393 vld1.64 {d28},[r3,:64]! @ K[i++]
1398 #if 22<16 && defined(__ARMEL__)
1402 vbsl d29,d23,d16 @ Ch(e,f,g)
1403 vshr.u64 d24,d18,#28
1404 veor d26,d25 @ Sigma1(e)
1405 vadd.i64 d27,d29,d17
1406 vshr.u64 d25,d18,#34
1409 vshr.u64 d26,d18,#39
1416 vbsl d30,d20,d19 @ Maj(a,b,c)
1417 veor d17,d26 @ Sigma0(a)
1421 vshr.u64 d24,d21,#14 @ 23
1423 vld1.64 {d7},[r1]! @ handles unaligned
1425 vshr.u64 d25,d21,#18
1427 vadd.i64 d17,d30 @ h+=Maj from the past
1429 vshr.u64 d26,d21,#41
1430 vld1.64 {d28},[r3,:64]! @ K[i++]
1435 #if 23<16 && defined(__ARMEL__)
1439 vbsl d29,d22,d23 @ Ch(e,f,g)
1440 vshr.u64 d24,d17,#28
1441 veor d26,d25 @ Sigma1(e)
1442 vadd.i64 d27,d29,d16
1443 vshr.u64 d25,d17,#34
1446 vshr.u64 d26,d17,#39
1453 vbsl d30,d19,d18 @ Maj(a,b,c)
1454 veor d16,d26 @ Sigma0(a)
1460 vadd.i64 d16,d30 @ h+=Maj from the past
1463 vext.8 q14,q4,q5,#8 @ X[i+1]
1467 veor q15,q13 @ sigma1(X[i+14])
1473 vext.8 q14,q0,q1,#8 @ X[i+9]
1475 vshr.u64 d24,d20,#14 @ from NEON_00_15
1477 vshr.u64 d25,d20,#18 @ from NEON_00_15
1478 veor q15,q13 @ sigma0(X[i+1])
1479 vshr.u64 d26,d20,#41 @ from NEON_00_15
1481 vld1.64 {d28},[r3,:64]! @ K[i++]
1486 #if 24<16 && defined(__ARMEL__)
1490 vbsl d29,d21,d22 @ Ch(e,f,g)
1491 vshr.u64 d24,d16,#28
1492 veor d26,d25 @ Sigma1(e)
1493 vadd.i64 d27,d29,d23
1494 vshr.u64 d25,d16,#34
1497 vshr.u64 d26,d16,#39
1504 vbsl d30,d18,d17 @ Maj(a,b,c)
1505 veor d23,d26 @ Sigma0(a)
1509 vshr.u64 d24,d19,#14 @ 25
1511 vld1.64 {d9},[r1]! @ handles unaligned
1513 vshr.u64 d25,d19,#18
1515 vadd.i64 d23,d30 @ h+=Maj from the past
1517 vshr.u64 d26,d19,#41
1518 vld1.64 {d28},[r3,:64]! @ K[i++]
1523 #if 25<16 && defined(__ARMEL__)
1527 vbsl d29,d20,d21 @ Ch(e,f,g)
1528 vshr.u64 d24,d23,#28
1529 veor d26,d25 @ Sigma1(e)
1530 vadd.i64 d27,d29,d22
1531 vshr.u64 d25,d23,#34
1534 vshr.u64 d26,d23,#39
1541 vbsl d30,d17,d16 @ Maj(a,b,c)
1542 veor d22,d26 @ Sigma0(a)
1548 vadd.i64 d22,d30 @ h+=Maj from the past
1551 vext.8 q14,q5,q6,#8 @ X[i+1]
1555 veor q15,q13 @ sigma1(X[i+14])
1561 vext.8 q14,q1,q2,#8 @ X[i+9]
1563 vshr.u64 d24,d18,#14 @ from NEON_00_15
1565 vshr.u64 d25,d18,#18 @ from NEON_00_15
1566 veor q15,q13 @ sigma0(X[i+1])
1567 vshr.u64 d26,d18,#41 @ from NEON_00_15
1569 vld1.64 {d28},[r3,:64]! @ K[i++]
1574 #if 26<16 && defined(__ARMEL__)
1578 vbsl d29,d19,d20 @ Ch(e,f,g)
1579 vshr.u64 d24,d22,#28
1580 veor d26,d25 @ Sigma1(e)
1581 vadd.i64 d27,d29,d21
1582 vshr.u64 d25,d22,#34
1585 vshr.u64 d26,d22,#39
1592 vbsl d30,d16,d23 @ Maj(a,b,c)
1593 veor d21,d26 @ Sigma0(a)
1597 vshr.u64 d24,d17,#14 @ 27
1599 vld1.64 {d11},[r1]! @ handles unaligned
1601 vshr.u64 d25,d17,#18
1603 vadd.i64 d21,d30 @ h+=Maj from the past
1605 vshr.u64 d26,d17,#41
1606 vld1.64 {d28},[r3,:64]! @ K[i++]
1611 #if 27<16 && defined(__ARMEL__)
1615 vbsl d29,d18,d19 @ Ch(e,f,g)
1616 vshr.u64 d24,d21,#28
1617 veor d26,d25 @ Sigma1(e)
1618 vadd.i64 d27,d29,d20
1619 vshr.u64 d25,d21,#34
1622 vshr.u64 d26,d21,#39
1629 vbsl d30,d23,d22 @ Maj(a,b,c)
1630 veor d20,d26 @ Sigma0(a)
1636 vadd.i64 d20,d30 @ h+=Maj from the past
1639 vext.8 q14,q6,q7,#8 @ X[i+1]
1643 veor q15,q13 @ sigma1(X[i+14])
1649 vext.8 q14,q2,q3,#8 @ X[i+9]
1651 vshr.u64 d24,d16,#14 @ from NEON_00_15
1653 vshr.u64 d25,d16,#18 @ from NEON_00_15
1654 veor q15,q13 @ sigma0(X[i+1])
1655 vshr.u64 d26,d16,#41 @ from NEON_00_15
1657 vld1.64 {d28},[r3,:64]! @ K[i++]
1662 #if 28<16 && defined(__ARMEL__)
1666 vbsl d29,d17,d18 @ Ch(e,f,g)
1667 vshr.u64 d24,d20,#28
1668 veor d26,d25 @ Sigma1(e)
1669 vadd.i64 d27,d29,d19
1670 vshr.u64 d25,d20,#34
1673 vshr.u64 d26,d20,#39
1680 vbsl d30,d22,d21 @ Maj(a,b,c)
1681 veor d19,d26 @ Sigma0(a)
1685 vshr.u64 d24,d23,#14 @ 29
1687 vld1.64 {d13},[r1]! @ handles unaligned
1689 vshr.u64 d25,d23,#18
1691 vadd.i64 d19,d30 @ h+=Maj from the past
1693 vshr.u64 d26,d23,#41
1694 vld1.64 {d28},[r3,:64]! @ K[i++]
1699 #if 29<16 && defined(__ARMEL__)
1703 vbsl d29,d16,d17 @ Ch(e,f,g)
1704 vshr.u64 d24,d19,#28
1705 veor d26,d25 @ Sigma1(e)
1706 vadd.i64 d27,d29,d18
1707 vshr.u64 d25,d19,#34
1710 vshr.u64 d26,d19,#39
1717 vbsl d30,d21,d20 @ Maj(a,b,c)
1718 veor d18,d26 @ Sigma0(a)
1724 vadd.i64 d18,d30 @ h+=Maj from the past
1727 vext.8 q14,q7,q0,#8 @ X[i+1]
1731 veor q15,q13 @ sigma1(X[i+14])
1737 vext.8 q14,q3,q4,#8 @ X[i+9]
1739 vshr.u64 d24,d22,#14 @ from NEON_00_15
1741 vshr.u64 d25,d22,#18 @ from NEON_00_15
1742 veor q15,q13 @ sigma0(X[i+1])
1743 vshr.u64 d26,d22,#41 @ from NEON_00_15
1745 vld1.64 {d28},[r3,:64]! @ K[i++]
1750 #if 30<16 && defined(__ARMEL__)
1754 vbsl d29,d23,d16 @ Ch(e,f,g)
1755 vshr.u64 d24,d18,#28
1756 veor d26,d25 @ Sigma1(e)
1757 vadd.i64 d27,d29,d17
1758 vshr.u64 d25,d18,#34
1761 vshr.u64 d26,d18,#39
1768 vbsl d30,d20,d19 @ Maj(a,b,c)
1769 veor d17,d26 @ Sigma0(a)
1773 vshr.u64 d24,d21,#14 @ 31
1775 vld1.64 {d15},[r1]! @ handles unaligned
1777 vshr.u64 d25,d21,#18
1779 vadd.i64 d17,d30 @ h+=Maj from the past
1781 vshr.u64 d26,d21,#41
1782 vld1.64 {d28},[r3,:64]! @ K[i++]
1787 #if 31<16 && defined(__ARMEL__)
1791 vbsl d29,d22,d23 @ Ch(e,f,g)
1792 vshr.u64 d24,d17,#28
1793 veor d26,d25 @ Sigma1(e)
1794 vadd.i64 d27,d29,d16
1795 vshr.u64 d25,d17,#34
1798 vshr.u64 d26,d17,#39
1805 vbsl d30,d19,d18 @ Maj(a,b,c)
1806 veor d16,d26 @ Sigma0(a)
1812 vadd.i64 d16,d30 @ h+=Maj from the past
1813 vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp
1814 vadd.i64 q8,q12 @ vectorized accumulate
1818 vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context
1820 sub r3,#640 @ rewind K512
1824 bx lr @ .word 0xe12fff1e
1825 .size zfs_sha512_block_neon,.-zfs_sha512_block_neon
1826 #endif // #if __ARM_ARCH__ >= 7
1827 #endif // #if defined(__arm__)