CI: Stick with ubuntu-22.04 for CodeQL analysis
[zfs.git] / module / icp / asm-arm / sha2 / sha512-armv7.S
blob66d7dd3cf0f7f4f3f96ffb7fbf6f812df82442e9
1 /*
2  * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     https://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
18  * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
19  * - modified assembly to fit into OpenZFS
20  */
22 #if defined(__arm__)
24 #ifndef __ARM_ARCH
25 # define __ARM_ARCH__   7
26 #else
27 # define __ARM_ARCH__   __ARM_ARCH
28 #endif
30 #ifndef __KERNEL__
31 # define VFP_ABI_PUSH   vstmdb  sp!,{d8-d15}
32 # define VFP_ABI_POP    vldmia  sp!,{d8-d15}
33 #else
34 # define VFP_ABI_PUSH
35 # define VFP_ABI_POP
36 #endif
38 #ifdef __ARMEL__
39 # define LO 0
40 # define HI 4
41 # define WORD64(hi0,lo0,hi1,lo1)        .word   lo0,hi0, lo1,hi1
42 #else
43 # define HI 0
44 # define LO 4
45 # define WORD64(hi0,lo0,hi1,lo1)        .word   hi0,lo0, hi1,lo1
46 #endif
48 #if defined(__thumb2__)
49 .syntax unified
50 .thumb
51 # define adrl adr
52 #else
53 .code   32
54 #endif
56 .text
58 .type   K512,%object
59 .align  5
60 K512:
61         WORD64(0x428a2f98,0xd728ae22,   0x71374491,0x23ef65cd)
62         WORD64(0xb5c0fbcf,0xec4d3b2f,   0xe9b5dba5,0x8189dbbc)
63         WORD64(0x3956c25b,0xf348b538,   0x59f111f1,0xb605d019)
64         WORD64(0x923f82a4,0xaf194f9b,   0xab1c5ed5,0xda6d8118)
65         WORD64(0xd807aa98,0xa3030242,   0x12835b01,0x45706fbe)
66         WORD64(0x243185be,0x4ee4b28c,   0x550c7dc3,0xd5ffb4e2)
67         WORD64(0x72be5d74,0xf27b896f,   0x80deb1fe,0x3b1696b1)
68         WORD64(0x9bdc06a7,0x25c71235,   0xc19bf174,0xcf692694)
69         WORD64(0xe49b69c1,0x9ef14ad2,   0xefbe4786,0x384f25e3)
70         WORD64(0x0fc19dc6,0x8b8cd5b5,   0x240ca1cc,0x77ac9c65)
71         WORD64(0x2de92c6f,0x592b0275,   0x4a7484aa,0x6ea6e483)
72         WORD64(0x5cb0a9dc,0xbd41fbd4,   0x76f988da,0x831153b5)
73         WORD64(0x983e5152,0xee66dfab,   0xa831c66d,0x2db43210)
74         WORD64(0xb00327c8,0x98fb213f,   0xbf597fc7,0xbeef0ee4)
75         WORD64(0xc6e00bf3,0x3da88fc2,   0xd5a79147,0x930aa725)
76         WORD64(0x06ca6351,0xe003826f,   0x14292967,0x0a0e6e70)
77         WORD64(0x27b70a85,0x46d22ffc,   0x2e1b2138,0x5c26c926)
78         WORD64(0x4d2c6dfc,0x5ac42aed,   0x53380d13,0x9d95b3df)
79         WORD64(0x650a7354,0x8baf63de,   0x766a0abb,0x3c77b2a8)
80         WORD64(0x81c2c92e,0x47edaee6,   0x92722c85,0x1482353b)
81         WORD64(0xa2bfe8a1,0x4cf10364,   0xa81a664b,0xbc423001)
82         WORD64(0xc24b8b70,0xd0f89791,   0xc76c51a3,0x0654be30)
83         WORD64(0xd192e819,0xd6ef5218,   0xd6990624,0x5565a910)
84         WORD64(0xf40e3585,0x5771202a,   0x106aa070,0x32bbd1b8)
85         WORD64(0x19a4c116,0xb8d2d0c8,   0x1e376c08,0x5141ab53)
86         WORD64(0x2748774c,0xdf8eeb99,   0x34b0bcb5,0xe19b48a8)
87         WORD64(0x391c0cb3,0xc5c95a63,   0x4ed8aa4a,0xe3418acb)
88         WORD64(0x5b9cca4f,0x7763e373,   0x682e6ff3,0xd6b2b8a3)
89         WORD64(0x748f82ee,0x5defb2fc,   0x78a5636f,0x43172f60)
90         WORD64(0x84c87814,0xa1f0ab72,   0x8cc70208,0x1a6439ec)
91         WORD64(0x90befffa,0x23631e28,   0xa4506ceb,0xde82bde9)
92         WORD64(0xbef9a3f7,0xb2c67915,   0xc67178f2,0xe372532b)
93         WORD64(0xca273ece,0xea26619c,   0xd186b8c7,0x21c0c207)
94         WORD64(0xeada7dd6,0xcde0eb1e,   0xf57d4f7f,0xee6ed178)
95         WORD64(0x06f067aa,0x72176fba,   0x0a637dc5,0xa2c898a6)
96         WORD64(0x113f9804,0xbef90dae,   0x1b710b35,0x131c471b)
97         WORD64(0x28db77f5,0x23047d84,   0x32caab7b,0x40c72493)
98         WORD64(0x3c9ebe0a,0x15c9bebc,   0x431d67c4,0x9c100d4c)
99         WORD64(0x4cc5d4be,0xcb3e42b6,   0x597f299c,0xfc657e2a)
100         WORD64(0x5fcb6fab,0x3ad6faec,   0x6c44198c,0x4a475817)
101 .size   K512,.-K512
102 .word   0                               @ terminator
104 .align  5
105 .globl  zfs_sha512_block_armv7
106 .type   zfs_sha512_block_armv7,%function
107 zfs_sha512_block_armv7:
108 .Lzfs_sha512_block_armv7:
110 #if __ARM_ARCH__<7 && !defined(__thumb2__)
111         sub     r3,pc,#8                @ zfs_sha512_block_armv7
112 #else
113         adr     r3,.Lzfs_sha512_block_armv7
114 #endif
116         add     r2,r1,r2,lsl#7  @ len to point at the end of inp
117         stmdb   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
118         sub     r14,r3,#672             @ K512
119         sub     sp,sp,#9*8
121         ldr     r7,[r0,#32+LO]
122         ldr     r8,[r0,#32+HI]
123         ldr     r9, [r0,#48+LO]
124         ldr     r10, [r0,#48+HI]
125         ldr     r11, [r0,#56+LO]
126         ldr     r12, [r0,#56+HI]
127 .Loop:
128         str     r9, [sp,#48+0]
129         str     r10, [sp,#48+4]
130         str     r11, [sp,#56+0]
131         str     r12, [sp,#56+4]
132         ldr     r5,[r0,#0+LO]
133         ldr     r6,[r0,#0+HI]
134         ldr     r3,[r0,#8+LO]
135         ldr     r4,[r0,#8+HI]
136         ldr     r9, [r0,#16+LO]
137         ldr     r10, [r0,#16+HI]
138         ldr     r11, [r0,#24+LO]
139         ldr     r12, [r0,#24+HI]
140         str     r3,[sp,#8+0]
141         str     r4,[sp,#8+4]
142         str     r9, [sp,#16+0]
143         str     r10, [sp,#16+4]
144         str     r11, [sp,#24+0]
145         str     r12, [sp,#24+4]
146         ldr     r3,[r0,#40+LO]
147         ldr     r4,[r0,#40+HI]
148         str     r3,[sp,#40+0]
149         str     r4,[sp,#40+4]
151 .L00_15:
152 #if __ARM_ARCH__<7
153         ldrb    r3,[r1,#7]
154         ldrb    r9, [r1,#6]
155         ldrb    r10, [r1,#5]
156         ldrb    r11, [r1,#4]
157         ldrb    r4,[r1,#3]
158         ldrb    r12, [r1,#2]
159         orr     r3,r3,r9,lsl#8
160         ldrb    r9, [r1,#1]
161         orr     r3,r3,r10,lsl#16
162         ldrb    r10, [r1],#8
163         orr     r3,r3,r11,lsl#24
164         orr     r4,r4,r12,lsl#8
165         orr     r4,r4,r9,lsl#16
166         orr     r4,r4,r10,lsl#24
167 #else
168         ldr     r3,[r1,#4]
169         ldr     r4,[r1],#8
170 #ifdef __ARMEL__
171         rev     r3,r3
172         rev     r4,r4
173 #endif
174 #endif
175         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
176         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
177         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
178         mov     r9,r7,lsr#14
179         str     r3,[sp,#64+0]
180         mov     r10,r8,lsr#14
181         str     r4,[sp,#64+4]
182         eor     r9,r9,r8,lsl#18
183         ldr     r11,[sp,#56+0]  @ h.lo
184         eor     r10,r10,r7,lsl#18
185         ldr     r12,[sp,#56+4]  @ h.hi
186         eor     r9,r9,r7,lsr#18
187         eor     r10,r10,r8,lsr#18
188         eor     r9,r9,r8,lsl#14
189         eor     r10,r10,r7,lsl#14
190         eor     r9,r9,r8,lsr#9
191         eor     r10,r10,r7,lsr#9
192         eor     r9,r9,r7,lsl#23
193         eor     r10,r10,r8,lsl#23       @ Sigma1(e)
194         adds    r3,r3,r9
195         ldr     r9,[sp,#40+0]   @ f.lo
196         adc     r4,r4,r10               @ T += Sigma1(e)
197         ldr     r10,[sp,#40+4]  @ f.hi
198         adds    r3,r3,r11
199         ldr     r11,[sp,#48+0]  @ g.lo
200         adc     r4,r4,r12               @ T += h
201         ldr     r12,[sp,#48+4]  @ g.hi
203         eor     r9,r9,r11
204         str     r7,[sp,#32+0]
205         eor     r10,r10,r12
206         str     r8,[sp,#32+4]
207         and     r9,r9,r7
208         str     r5,[sp,#0+0]
209         and     r10,r10,r8
210         str     r6,[sp,#0+4]
211         eor     r9,r9,r11
212         ldr     r11,[r14,#LO]   @ K[i].lo
213         eor     r10,r10,r12             @ Ch(e,f,g)
214         ldr     r12,[r14,#HI]   @ K[i].hi
216         adds    r3,r3,r9
217         ldr     r7,[sp,#24+0]   @ d.lo
218         adc     r4,r4,r10               @ T += Ch(e,f,g)
219         ldr     r8,[sp,#24+4]   @ d.hi
220         adds    r3,r3,r11
221         and     r9,r11,#0xff
222         adc     r4,r4,r12               @ T += K[i]
223         adds    r7,r7,r3
224         ldr     r11,[sp,#8+0]   @ b.lo
225         adc     r8,r8,r4                @ d += T
226         teq     r9,#148
228         ldr     r12,[sp,#16+0]  @ c.lo
229 #ifdef  __thumb2__
230         it      eq                      @ Thumb2 thing, sanity check in ARM
231 #endif
232         orreq   r14,r14,#1
233         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
234         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
235         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
236         mov     r9,r5,lsr#28
237         mov     r10,r6,lsr#28
238         eor     r9,r9,r6,lsl#4
239         eor     r10,r10,r5,lsl#4
240         eor     r9,r9,r6,lsr#2
241         eor     r10,r10,r5,lsr#2
242         eor     r9,r9,r5,lsl#30
243         eor     r10,r10,r6,lsl#30
244         eor     r9,r9,r6,lsr#7
245         eor     r10,r10,r5,lsr#7
246         eor     r9,r9,r5,lsl#25
247         eor     r10,r10,r6,lsl#25       @ Sigma0(a)
248         adds    r3,r3,r9
249         and     r9,r5,r11
250         adc     r4,r4,r10               @ T += Sigma0(a)
252         ldr     r10,[sp,#8+4]   @ b.hi
253         orr     r5,r5,r11
254         ldr     r11,[sp,#16+4]  @ c.hi
255         and     r5,r5,r12
256         and     r12,r6,r10
257         orr     r6,r6,r10
258         orr     r5,r5,r9                @ Maj(a,b,c).lo
259         and     r6,r6,r11
260         adds    r5,r5,r3
261         orr     r6,r6,r12               @ Maj(a,b,c).hi
262         sub     sp,sp,#8
263         adc     r6,r6,r4                @ h += T
264         tst     r14,#1
265         add     r14,r14,#8
266         tst     r14,#1
267         beq     .L00_15
268         ldr     r9,[sp,#184+0]
269         ldr     r10,[sp,#184+4]
270         bic     r14,r14,#1
271 .L16_79:
272         @ sigma0(x)     (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
273         @ LO            lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
274         @ HI            hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
275         mov     r3,r9,lsr#1
276         ldr     r11,[sp,#80+0]
277         mov     r4,r10,lsr#1
278         ldr     r12,[sp,#80+4]
279         eor     r3,r3,r10,lsl#31
280         eor     r4,r4,r9,lsl#31
281         eor     r3,r3,r9,lsr#8
282         eor     r4,r4,r10,lsr#8
283         eor     r3,r3,r10,lsl#24
284         eor     r4,r4,r9,lsl#24
285         eor     r3,r3,r9,lsr#7
286         eor     r4,r4,r10,lsr#7
287         eor     r3,r3,r10,lsl#25
289         @ sigma1(x)     (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
290         @ LO            lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
291         @ HI            hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
292         mov     r9,r11,lsr#19
293         mov     r10,r12,lsr#19
294         eor     r9,r9,r12,lsl#13
295         eor     r10,r10,r11,lsl#13
296         eor     r9,r9,r12,lsr#29
297         eor     r10,r10,r11,lsr#29
298         eor     r9,r9,r11,lsl#3
299         eor     r10,r10,r12,lsl#3
300         eor     r9,r9,r11,lsr#6
301         eor     r10,r10,r12,lsr#6
302         ldr     r11,[sp,#120+0]
303         eor     r9,r9,r12,lsl#26
305         ldr     r12,[sp,#120+4]
306         adds    r3,r3,r9
307         ldr     r9,[sp,#192+0]
308         adc     r4,r4,r10
310         ldr     r10,[sp,#192+4]
311         adds    r3,r3,r11
312         adc     r4,r4,r12
313         adds    r3,r3,r9
314         adc     r4,r4,r10
315         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
316         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
317         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
318         mov     r9,r7,lsr#14
319         str     r3,[sp,#64+0]
320         mov     r10,r8,lsr#14
321         str     r4,[sp,#64+4]
322         eor     r9,r9,r8,lsl#18
323         ldr     r11,[sp,#56+0]  @ h.lo
324         eor     r10,r10,r7,lsl#18
325         ldr     r12,[sp,#56+4]  @ h.hi
326         eor     r9,r9,r7,lsr#18
327         eor     r10,r10,r8,lsr#18
328         eor     r9,r9,r8,lsl#14
329         eor     r10,r10,r7,lsl#14
330         eor     r9,r9,r8,lsr#9
331         eor     r10,r10,r7,lsr#9
332         eor     r9,r9,r7,lsl#23
333         eor     r10,r10,r8,lsl#23       @ Sigma1(e)
334         adds    r3,r3,r9
335         ldr     r9,[sp,#40+0]   @ f.lo
336         adc     r4,r4,r10               @ T += Sigma1(e)
337         ldr     r10,[sp,#40+4]  @ f.hi
338         adds    r3,r3,r11
339         ldr     r11,[sp,#48+0]  @ g.lo
340         adc     r4,r4,r12               @ T += h
341         ldr     r12,[sp,#48+4]  @ g.hi
343         eor     r9,r9,r11
344         str     r7,[sp,#32+0]
345         eor     r10,r10,r12
346         str     r8,[sp,#32+4]
347         and     r9,r9,r7
348         str     r5,[sp,#0+0]
349         and     r10,r10,r8
350         str     r6,[sp,#0+4]
351         eor     r9,r9,r11
352         ldr     r11,[r14,#LO]   @ K[i].lo
353         eor     r10,r10,r12             @ Ch(e,f,g)
354         ldr     r12,[r14,#HI]   @ K[i].hi
356         adds    r3,r3,r9
357         ldr     r7,[sp,#24+0]   @ d.lo
358         adc     r4,r4,r10               @ T += Ch(e,f,g)
359         ldr     r8,[sp,#24+4]   @ d.hi
360         adds    r3,r3,r11
361         and     r9,r11,#0xff
362         adc     r4,r4,r12               @ T += K[i]
363         adds    r7,r7,r3
364         ldr     r11,[sp,#8+0]   @ b.lo
365         adc     r8,r8,r4                @ d += T
366         teq     r9,#23
368         ldr     r12,[sp,#16+0]  @ c.lo
369 #ifdef  __thumb2__
370         it      eq                      @ Thumb2 thing, sanity check in ARM
371 #endif
372         orreq   r14,r14,#1
373         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
374         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
375         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
376         mov     r9,r5,lsr#28
377         mov     r10,r6,lsr#28
378         eor     r9,r9,r6,lsl#4
379         eor     r10,r10,r5,lsl#4
380         eor     r9,r9,r6,lsr#2
381         eor     r10,r10,r5,lsr#2
382         eor     r9,r9,r5,lsl#30
383         eor     r10,r10,r6,lsl#30
384         eor     r9,r9,r6,lsr#7
385         eor     r10,r10,r5,lsr#7
386         eor     r9,r9,r5,lsl#25
387         eor     r10,r10,r6,lsl#25       @ Sigma0(a)
388         adds    r3,r3,r9
389         and     r9,r5,r11
390         adc     r4,r4,r10               @ T += Sigma0(a)
392         ldr     r10,[sp,#8+4]   @ b.hi
393         orr     r5,r5,r11
394         ldr     r11,[sp,#16+4]  @ c.hi
395         and     r5,r5,r12
396         and     r12,r6,r10
397         orr     r6,r6,r10
398         orr     r5,r5,r9                @ Maj(a,b,c).lo
399         and     r6,r6,r11
400         adds    r5,r5,r3
401         orr     r6,r6,r12               @ Maj(a,b,c).hi
402         sub     sp,sp,#8
403         adc     r6,r6,r4                @ h += T
404         tst     r14,#1
405         add     r14,r14,#8
406 #ifdef  __thumb2__
407         ittt    eq                      @ Thumb2 thing, sanity check in ARM
408 #endif
409         ldreq   r9,[sp,#184+0]
410         ldreq   r10,[sp,#184+4]
411         beq     .L16_79
412         bic     r14,r14,#1
414         ldr     r3,[sp,#8+0]
415         ldr     r4,[sp,#8+4]
416         ldr     r9, [r0,#0+LO]
417         ldr     r10, [r0,#0+HI]
418         ldr     r11, [r0,#8+LO]
419         ldr     r12, [r0,#8+HI]
420         adds    r9,r5,r9
421         str     r9, [r0,#0+LO]
422         adc     r10,r6,r10
423         str     r10, [r0,#0+HI]
424         adds    r11,r3,r11
425         str     r11, [r0,#8+LO]
426         adc     r12,r4,r12
427         str     r12, [r0,#8+HI]
429         ldr     r5,[sp,#16+0]
430         ldr     r6,[sp,#16+4]
431         ldr     r3,[sp,#24+0]
432         ldr     r4,[sp,#24+4]
433         ldr     r9, [r0,#16+LO]
434         ldr     r10, [r0,#16+HI]
435         ldr     r11, [r0,#24+LO]
436         ldr     r12, [r0,#24+HI]
437         adds    r9,r5,r9
438         str     r9, [r0,#16+LO]
439         adc     r10,r6,r10
440         str     r10, [r0,#16+HI]
441         adds    r11,r3,r11
442         str     r11, [r0,#24+LO]
443         adc     r12,r4,r12
444         str     r12, [r0,#24+HI]
446         ldr     r3,[sp,#40+0]
447         ldr     r4,[sp,#40+4]
448         ldr     r9, [r0,#32+LO]
449         ldr     r10, [r0,#32+HI]
450         ldr     r11, [r0,#40+LO]
451         ldr     r12, [r0,#40+HI]
452         adds    r7,r7,r9
453         str     r7,[r0,#32+LO]
454         adc     r8,r8,r10
455         str     r8,[r0,#32+HI]
456         adds    r11,r3,r11
457         str     r11, [r0,#40+LO]
458         adc     r12,r4,r12
459         str     r12, [r0,#40+HI]
461         ldr     r5,[sp,#48+0]
462         ldr     r6,[sp,#48+4]
463         ldr     r3,[sp,#56+0]
464         ldr     r4,[sp,#56+4]
465         ldr     r9, [r0,#48+LO]
466         ldr     r10, [r0,#48+HI]
467         ldr     r11, [r0,#56+LO]
468         ldr     r12, [r0,#56+HI]
469         adds    r9,r5,r9
470         str     r9, [r0,#48+LO]
471         adc     r10,r6,r10
472         str     r10, [r0,#48+HI]
473         adds    r11,r3,r11
474         str     r11, [r0,#56+LO]
475         adc     r12,r4,r12
476         str     r12, [r0,#56+HI]
478         add     sp,sp,#640
479         sub     r14,r14,#640
481         teq     r1,r2
482         bne     .Loop
484         add     sp,sp,#8*9              @ destroy frame
486 #if __ARM_ARCH__>=5
487         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
488 #else
489         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
490         tst     lr,#1
491         moveq   pc,lr                   @ be binary compatible with V4, yet
492 .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
493 #endif
494 .size   zfs_sha512_block_armv7,.-zfs_sha512_block_armv7
496 #if __ARM_ARCH__ >= 7
497 .arch   armv7-a
498 .fpu    neon
500 .globl  zfs_sha512_block_neon
501 .type   zfs_sha512_block_neon,%function
502 .align  4
503 zfs_sha512_block_neon:
504 .LNEON:
505         dmb     @ errata #451034 on early Cortex A8
506         add     r2,r1,r2,lsl#7  @ len to point at the end of inp
507         adr     r3,K512
508         VFP_ABI_PUSH
509         vldmia  r0,{d16,d17,d18,d19,d20,d21,d22,d23}            @ load context
510 .Loop_neon:
511         vshr.u64        d24,d20,#14     @ 0
512 #if 0<16
513         vld1.64 {d0},[r1]!      @ handles unaligned
514 #endif
515         vshr.u64        d25,d20,#18
516 #if 0>0
517         vadd.i64        d16,d30                 @ h+=Maj from the past
518 #endif
519         vshr.u64        d26,d20,#41
520         vld1.64 {d28},[r3,:64]! @ K[i++]
521         vsli.64 d24,d20,#50
522         vsli.64 d25,d20,#46
523         vmov    d29,d20
524         vsli.64 d26,d20,#23
525 #if 0<16 && defined(__ARMEL__)
526         vrev64.8        d0,d0
527 #endif
528         veor    d25,d24
529         vbsl    d29,d21,d22             @ Ch(e,f,g)
530         vshr.u64        d24,d16,#28
531         veor    d26,d25                 @ Sigma1(e)
532         vadd.i64        d27,d29,d23
533         vshr.u64        d25,d16,#34
534         vsli.64 d24,d16,#36
535         vadd.i64        d27,d26
536         vshr.u64        d26,d16,#39
537         vadd.i64        d28,d0
538         vsli.64 d25,d16,#30
539         veor    d30,d16,d17
540         vsli.64 d26,d16,#25
541         veor    d23,d24,d25
542         vadd.i64        d27,d28
543         vbsl    d30,d18,d17             @ Maj(a,b,c)
544         veor    d23,d26                 @ Sigma0(a)
545         vadd.i64        d19,d27
546         vadd.i64        d30,d27
547         @ vadd.i64      d23,d30
548         vshr.u64        d24,d19,#14     @ 1
549 #if 1<16
550         vld1.64 {d1},[r1]!      @ handles unaligned
551 #endif
552         vshr.u64        d25,d19,#18
553 #if 1>0
554         vadd.i64        d23,d30                 @ h+=Maj from the past
555 #endif
556         vshr.u64        d26,d19,#41
557         vld1.64 {d28},[r3,:64]! @ K[i++]
558         vsli.64 d24,d19,#50
559         vsli.64 d25,d19,#46
560         vmov    d29,d19
561         vsli.64 d26,d19,#23
562 #if 1<16 && defined(__ARMEL__)
563         vrev64.8        d1,d1
564 #endif
565         veor    d25,d24
566         vbsl    d29,d20,d21             @ Ch(e,f,g)
567         vshr.u64        d24,d23,#28
568         veor    d26,d25                 @ Sigma1(e)
569         vadd.i64        d27,d29,d22
570         vshr.u64        d25,d23,#34
571         vsli.64 d24,d23,#36
572         vadd.i64        d27,d26
573         vshr.u64        d26,d23,#39
574         vadd.i64        d28,d1
575         vsli.64 d25,d23,#30
576         veor    d30,d23,d16
577         vsli.64 d26,d23,#25
578         veor    d22,d24,d25
579         vadd.i64        d27,d28
580         vbsl    d30,d17,d16             @ Maj(a,b,c)
581         veor    d22,d26                 @ Sigma0(a)
582         vadd.i64        d18,d27
583         vadd.i64        d30,d27
584         @ vadd.i64      d22,d30
585         vshr.u64        d24,d18,#14     @ 2
586 #if 2<16
587         vld1.64 {d2},[r1]!      @ handles unaligned
588 #endif
589         vshr.u64        d25,d18,#18
590 #if 2>0
591         vadd.i64        d22,d30                 @ h+=Maj from the past
592 #endif
593         vshr.u64        d26,d18,#41
594         vld1.64 {d28},[r3,:64]! @ K[i++]
595         vsli.64 d24,d18,#50
596         vsli.64 d25,d18,#46
597         vmov    d29,d18
598         vsli.64 d26,d18,#23
599 #if 2<16 && defined(__ARMEL__)
600         vrev64.8        d2,d2
601 #endif
602         veor    d25,d24
603         vbsl    d29,d19,d20             @ Ch(e,f,g)
604         vshr.u64        d24,d22,#28
605         veor    d26,d25                 @ Sigma1(e)
606         vadd.i64        d27,d29,d21
607         vshr.u64        d25,d22,#34
608         vsli.64 d24,d22,#36
609         vadd.i64        d27,d26
610         vshr.u64        d26,d22,#39
611         vadd.i64        d28,d2
612         vsli.64 d25,d22,#30
613         veor    d30,d22,d23
614         vsli.64 d26,d22,#25
615         veor    d21,d24,d25
616         vadd.i64        d27,d28
617         vbsl    d30,d16,d23             @ Maj(a,b,c)
618         veor    d21,d26                 @ Sigma0(a)
619         vadd.i64        d17,d27
620         vadd.i64        d30,d27
621         @ vadd.i64      d21,d30
622         vshr.u64        d24,d17,#14     @ 3
623 #if 3<16
624         vld1.64 {d3},[r1]!      @ handles unaligned
625 #endif
626         vshr.u64        d25,d17,#18
627 #if 3>0
628         vadd.i64        d21,d30                 @ h+=Maj from the past
629 #endif
630         vshr.u64        d26,d17,#41
631         vld1.64 {d28},[r3,:64]! @ K[i++]
632         vsli.64 d24,d17,#50
633         vsli.64 d25,d17,#46
634         vmov    d29,d17
635         vsli.64 d26,d17,#23
636 #if 3<16 && defined(__ARMEL__)
637         vrev64.8        d3,d3
638 #endif
639         veor    d25,d24
640         vbsl    d29,d18,d19             @ Ch(e,f,g)
641         vshr.u64        d24,d21,#28
642         veor    d26,d25                 @ Sigma1(e)
643         vadd.i64        d27,d29,d20
644         vshr.u64        d25,d21,#34
645         vsli.64 d24,d21,#36
646         vadd.i64        d27,d26
647         vshr.u64        d26,d21,#39
648         vadd.i64        d28,d3
649         vsli.64 d25,d21,#30
650         veor    d30,d21,d22
651         vsli.64 d26,d21,#25
652         veor    d20,d24,d25
653         vadd.i64        d27,d28
654         vbsl    d30,d23,d22             @ Maj(a,b,c)
655         veor    d20,d26                 @ Sigma0(a)
656         vadd.i64        d16,d27
657         vadd.i64        d30,d27
658         @ vadd.i64      d20,d30
659         vshr.u64        d24,d16,#14     @ 4
660 #if 4<16
661         vld1.64 {d4},[r1]!      @ handles unaligned
662 #endif
663         vshr.u64        d25,d16,#18
664 #if 4>0
665         vadd.i64        d20,d30                 @ h+=Maj from the past
666 #endif
667         vshr.u64        d26,d16,#41
668         vld1.64 {d28},[r3,:64]! @ K[i++]
669         vsli.64 d24,d16,#50
670         vsli.64 d25,d16,#46
671         vmov    d29,d16
672         vsli.64 d26,d16,#23
673 #if 4<16 && defined(__ARMEL__)
674         vrev64.8        d4,d4
675 #endif
676         veor    d25,d24
677         vbsl    d29,d17,d18             @ Ch(e,f,g)
678         vshr.u64        d24,d20,#28
679         veor    d26,d25                 @ Sigma1(e)
680         vadd.i64        d27,d29,d19
681         vshr.u64        d25,d20,#34
682         vsli.64 d24,d20,#36
683         vadd.i64        d27,d26
684         vshr.u64        d26,d20,#39
685         vadd.i64        d28,d4
686         vsli.64 d25,d20,#30
687         veor    d30,d20,d21
688         vsli.64 d26,d20,#25
689         veor    d19,d24,d25
690         vadd.i64        d27,d28
691         vbsl    d30,d22,d21             @ Maj(a,b,c)
692         veor    d19,d26                 @ Sigma0(a)
693         vadd.i64        d23,d27
694         vadd.i64        d30,d27
695         @ vadd.i64      d19,d30
696         vshr.u64        d24,d23,#14     @ 5
697 #if 5<16
698         vld1.64 {d5},[r1]!      @ handles unaligned
699 #endif
700         vshr.u64        d25,d23,#18
701 #if 5>0
702         vadd.i64        d19,d30                 @ h+=Maj from the past
703 #endif
704         vshr.u64        d26,d23,#41
705         vld1.64 {d28},[r3,:64]! @ K[i++]
706         vsli.64 d24,d23,#50
707         vsli.64 d25,d23,#46
708         vmov    d29,d23
709         vsli.64 d26,d23,#23
710 #if 5<16 && defined(__ARMEL__)
711         vrev64.8        d5,d5
712 #endif
713         veor    d25,d24
714         vbsl    d29,d16,d17             @ Ch(e,f,g)
715         vshr.u64        d24,d19,#28
716         veor    d26,d25                 @ Sigma1(e)
717         vadd.i64        d27,d29,d18
718         vshr.u64        d25,d19,#34
719         vsli.64 d24,d19,#36
720         vadd.i64        d27,d26
721         vshr.u64        d26,d19,#39
722         vadd.i64        d28,d5
723         vsli.64 d25,d19,#30
724         veor    d30,d19,d20
725         vsli.64 d26,d19,#25
726         veor    d18,d24,d25
727         vadd.i64        d27,d28
728         vbsl    d30,d21,d20             @ Maj(a,b,c)
729         veor    d18,d26                 @ Sigma0(a)
730         vadd.i64        d22,d27
731         vadd.i64        d30,d27
732         @ vadd.i64      d18,d30
733         vshr.u64        d24,d22,#14     @ 6
734 #if 6<16
735         vld1.64 {d6},[r1]!      @ handles unaligned
736 #endif
737         vshr.u64        d25,d22,#18
738 #if 6>0
739         vadd.i64        d18,d30                 @ h+=Maj from the past
740 #endif
741         vshr.u64        d26,d22,#41
742         vld1.64 {d28},[r3,:64]! @ K[i++]
743         vsli.64 d24,d22,#50
744         vsli.64 d25,d22,#46
745         vmov    d29,d22
746         vsli.64 d26,d22,#23
747 #if 6<16 && defined(__ARMEL__)
748         vrev64.8        d6,d6
749 #endif
750         veor    d25,d24
751         vbsl    d29,d23,d16             @ Ch(e,f,g)
752         vshr.u64        d24,d18,#28
753         veor    d26,d25                 @ Sigma1(e)
754         vadd.i64        d27,d29,d17
755         vshr.u64        d25,d18,#34
756         vsli.64 d24,d18,#36
757         vadd.i64        d27,d26
758         vshr.u64        d26,d18,#39
759         vadd.i64        d28,d6
760         vsli.64 d25,d18,#30
761         veor    d30,d18,d19
762         vsli.64 d26,d18,#25
763         veor    d17,d24,d25
764         vadd.i64        d27,d28
765         vbsl    d30,d20,d19             @ Maj(a,b,c)
766         veor    d17,d26                 @ Sigma0(a)
767         vadd.i64        d21,d27
768         vadd.i64        d30,d27
769         @ vadd.i64      d17,d30
770         vshr.u64        d24,d21,#14     @ 7
771 #if 7<16
772         vld1.64 {d7},[r1]!      @ handles unaligned
773 #endif
774         vshr.u64        d25,d21,#18
775 #if 7>0
776         vadd.i64        d17,d30                 @ h+=Maj from the past
777 #endif
778         vshr.u64        d26,d21,#41
779         vld1.64 {d28},[r3,:64]! @ K[i++]
780         vsli.64 d24,d21,#50
781         vsli.64 d25,d21,#46
782         vmov    d29,d21
783         vsli.64 d26,d21,#23
784 #if 7<16 && defined(__ARMEL__)
785         vrev64.8        d7,d7
786 #endif
787         veor    d25,d24
788         vbsl    d29,d22,d23             @ Ch(e,f,g)
789         vshr.u64        d24,d17,#28
790         veor    d26,d25                 @ Sigma1(e)
791         vadd.i64        d27,d29,d16
792         vshr.u64        d25,d17,#34
793         vsli.64 d24,d17,#36
794         vadd.i64        d27,d26
795         vshr.u64        d26,d17,#39
796         vadd.i64        d28,d7
797         vsli.64 d25,d17,#30
798         veor    d30,d17,d18
799         vsli.64 d26,d17,#25
800         veor    d16,d24,d25
801         vadd.i64        d27,d28
802         vbsl    d30,d19,d18             @ Maj(a,b,c)
803         veor    d16,d26                 @ Sigma0(a)
804         vadd.i64        d20,d27
805         vadd.i64        d30,d27
806         @ vadd.i64      d16,d30
807         vshr.u64        d24,d20,#14     @ 8
808 #if 8<16
809         vld1.64 {d8},[r1]!      @ handles unaligned
810 #endif
811         vshr.u64        d25,d20,#18
812 #if 8>0
813         vadd.i64        d16,d30                 @ h+=Maj from the past
814 #endif
815         vshr.u64        d26,d20,#41
816         vld1.64 {d28},[r3,:64]! @ K[i++]
817         vsli.64 d24,d20,#50
818         vsli.64 d25,d20,#46
819         vmov    d29,d20
820         vsli.64 d26,d20,#23
821 #if 8<16 && defined(__ARMEL__)
822         vrev64.8        d8,d8
823 #endif
824         veor    d25,d24
825         vbsl    d29,d21,d22             @ Ch(e,f,g)
826         vshr.u64        d24,d16,#28
827         veor    d26,d25                 @ Sigma1(e)
828         vadd.i64        d27,d29,d23
829         vshr.u64        d25,d16,#34
830         vsli.64 d24,d16,#36
831         vadd.i64        d27,d26
832         vshr.u64        d26,d16,#39
833         vadd.i64        d28,d8
834         vsli.64 d25,d16,#30
835         veor    d30,d16,d17
836         vsli.64 d26,d16,#25
837         veor    d23,d24,d25
838         vadd.i64        d27,d28
839         vbsl    d30,d18,d17             @ Maj(a,b,c)
840         veor    d23,d26                 @ Sigma0(a)
841         vadd.i64        d19,d27
842         vadd.i64        d30,d27
843         @ vadd.i64      d23,d30
844         vshr.u64        d24,d19,#14     @ 9
845 #if 9<16
846         vld1.64 {d9},[r1]!      @ handles unaligned
847 #endif
848         vshr.u64        d25,d19,#18
849 #if 9>0
850         vadd.i64        d23,d30                 @ h+=Maj from the past
851 #endif
852         vshr.u64        d26,d19,#41
853         vld1.64 {d28},[r3,:64]! @ K[i++]
854         vsli.64 d24,d19,#50
855         vsli.64 d25,d19,#46
856         vmov    d29,d19
857         vsli.64 d26,d19,#23
858 #if 9<16 && defined(__ARMEL__)
859         vrev64.8        d9,d9
860 #endif
861         veor    d25,d24
862         vbsl    d29,d20,d21             @ Ch(e,f,g)
863         vshr.u64        d24,d23,#28
864         veor    d26,d25                 @ Sigma1(e)
865         vadd.i64        d27,d29,d22
866         vshr.u64        d25,d23,#34
867         vsli.64 d24,d23,#36
868         vadd.i64        d27,d26
869         vshr.u64        d26,d23,#39
870         vadd.i64        d28,d9
871         vsli.64 d25,d23,#30
872         veor    d30,d23,d16
873         vsli.64 d26,d23,#25
874         veor    d22,d24,d25
875         vadd.i64        d27,d28
876         vbsl    d30,d17,d16             @ Maj(a,b,c)
877         veor    d22,d26                 @ Sigma0(a)
878         vadd.i64        d18,d27
879         vadd.i64        d30,d27
880         @ vadd.i64      d22,d30
881         vshr.u64        d24,d18,#14     @ 10
882 #if 10<16
883         vld1.64 {d10},[r1]!     @ handles unaligned
884 #endif
885         vshr.u64        d25,d18,#18
886 #if 10>0
887         vadd.i64        d22,d30                 @ h+=Maj from the past
888 #endif
889         vshr.u64        d26,d18,#41
890         vld1.64 {d28},[r3,:64]! @ K[i++]
891         vsli.64 d24,d18,#50
892         vsli.64 d25,d18,#46
893         vmov    d29,d18
894         vsli.64 d26,d18,#23
895 #if 10<16 && defined(__ARMEL__)
896         vrev64.8        d10,d10
897 #endif
898         veor    d25,d24
899         vbsl    d29,d19,d20             @ Ch(e,f,g)
900         vshr.u64        d24,d22,#28
901         veor    d26,d25                 @ Sigma1(e)
902         vadd.i64        d27,d29,d21
903         vshr.u64        d25,d22,#34
904         vsli.64 d24,d22,#36
905         vadd.i64        d27,d26
906         vshr.u64        d26,d22,#39
907         vadd.i64        d28,d10
908         vsli.64 d25,d22,#30
909         veor    d30,d22,d23
910         vsli.64 d26,d22,#25
911         veor    d21,d24,d25
912         vadd.i64        d27,d28
913         vbsl    d30,d16,d23             @ Maj(a,b,c)
914         veor    d21,d26                 @ Sigma0(a)
915         vadd.i64        d17,d27
916         vadd.i64        d30,d27
917         @ vadd.i64      d21,d30
918         vshr.u64        d24,d17,#14     @ 11
919 #if 11<16
920         vld1.64 {d11},[r1]!     @ handles unaligned
921 #endif
922         vshr.u64        d25,d17,#18
923 #if 11>0
924         vadd.i64        d21,d30                 @ h+=Maj from the past
925 #endif
926         vshr.u64        d26,d17,#41
927         vld1.64 {d28},[r3,:64]! @ K[i++]
928         vsli.64 d24,d17,#50
929         vsli.64 d25,d17,#46
930         vmov    d29,d17
931         vsli.64 d26,d17,#23
932 #if 11<16 && defined(__ARMEL__)
933         vrev64.8        d11,d11
934 #endif
935         veor    d25,d24
936         vbsl    d29,d18,d19             @ Ch(e,f,g)
937         vshr.u64        d24,d21,#28
938         veor    d26,d25                 @ Sigma1(e)
939         vadd.i64        d27,d29,d20
940         vshr.u64        d25,d21,#34
941         vsli.64 d24,d21,#36
942         vadd.i64        d27,d26
943         vshr.u64        d26,d21,#39
944         vadd.i64        d28,d11
945         vsli.64 d25,d21,#30
946         veor    d30,d21,d22
947         vsli.64 d26,d21,#25
948         veor    d20,d24,d25
949         vadd.i64        d27,d28
950         vbsl    d30,d23,d22             @ Maj(a,b,c)
951         veor    d20,d26                 @ Sigma0(a)
952         vadd.i64        d16,d27
953         vadd.i64        d30,d27
954         @ vadd.i64      d20,d30
955         vshr.u64        d24,d16,#14     @ 12
956 #if 12<16
957         vld1.64 {d12},[r1]!     @ handles unaligned
958 #endif
959         vshr.u64        d25,d16,#18
960 #if 12>0
961         vadd.i64        d20,d30                 @ h+=Maj from the past
962 #endif
963         vshr.u64        d26,d16,#41
964         vld1.64 {d28},[r3,:64]! @ K[i++]
965         vsli.64 d24,d16,#50
966         vsli.64 d25,d16,#46
967         vmov    d29,d16
968         vsli.64 d26,d16,#23
969 #if 12<16 && defined(__ARMEL__)
970         vrev64.8        d12,d12
971 #endif
972         veor    d25,d24
973         vbsl    d29,d17,d18             @ Ch(e,f,g)
974         vshr.u64        d24,d20,#28
975         veor    d26,d25                 @ Sigma1(e)
976         vadd.i64        d27,d29,d19
977         vshr.u64        d25,d20,#34
978         vsli.64 d24,d20,#36
979         vadd.i64        d27,d26
980         vshr.u64        d26,d20,#39
981         vadd.i64        d28,d12
982         vsli.64 d25,d20,#30
983         veor    d30,d20,d21
984         vsli.64 d26,d20,#25
985         veor    d19,d24,d25
986         vadd.i64        d27,d28
987         vbsl    d30,d22,d21             @ Maj(a,b,c)
988         veor    d19,d26                 @ Sigma0(a)
989         vadd.i64        d23,d27
990         vadd.i64        d30,d27
991         @ vadd.i64      d19,d30
992         vshr.u64        d24,d23,#14     @ 13
993 #if 13<16
994         vld1.64 {d13},[r1]!     @ handles unaligned
995 #endif
996         vshr.u64        d25,d23,#18
997 #if 13>0
998         vadd.i64        d19,d30                 @ h+=Maj from the past
999 #endif
1000         vshr.u64        d26,d23,#41
1001         vld1.64 {d28},[r3,:64]! @ K[i++]
1002         vsli.64 d24,d23,#50
1003         vsli.64 d25,d23,#46
1004         vmov    d29,d23
1005         vsli.64 d26,d23,#23
1006 #if 13<16 && defined(__ARMEL__)
1007         vrev64.8        d13,d13
1008 #endif
1009         veor    d25,d24
1010         vbsl    d29,d16,d17             @ Ch(e,f,g)
1011         vshr.u64        d24,d19,#28
1012         veor    d26,d25                 @ Sigma1(e)
1013         vadd.i64        d27,d29,d18
1014         vshr.u64        d25,d19,#34
1015         vsli.64 d24,d19,#36
1016         vadd.i64        d27,d26
1017         vshr.u64        d26,d19,#39
1018         vadd.i64        d28,d13
1019         vsli.64 d25,d19,#30
1020         veor    d30,d19,d20
1021         vsli.64 d26,d19,#25
1022         veor    d18,d24,d25
1023         vadd.i64        d27,d28
1024         vbsl    d30,d21,d20             @ Maj(a,b,c)
1025         veor    d18,d26                 @ Sigma0(a)
1026         vadd.i64        d22,d27
1027         vadd.i64        d30,d27
1028         @ vadd.i64      d18,d30
1029         vshr.u64        d24,d22,#14     @ 14
1030 #if 14<16
1031         vld1.64 {d14},[r1]!     @ handles unaligned
1032 #endif
1033         vshr.u64        d25,d22,#18
1034 #if 14>0
1035         vadd.i64        d18,d30                 @ h+=Maj from the past
1036 #endif
1037         vshr.u64        d26,d22,#41
1038         vld1.64 {d28},[r3,:64]! @ K[i++]
1039         vsli.64 d24,d22,#50
1040         vsli.64 d25,d22,#46
1041         vmov    d29,d22
1042         vsli.64 d26,d22,#23
1043 #if 14<16 && defined(__ARMEL__)
1044         vrev64.8        d14,d14
1045 #endif
1046         veor    d25,d24
1047         vbsl    d29,d23,d16             @ Ch(e,f,g)
1048         vshr.u64        d24,d18,#28
1049         veor    d26,d25                 @ Sigma1(e)
1050         vadd.i64        d27,d29,d17
1051         vshr.u64        d25,d18,#34
1052         vsli.64 d24,d18,#36
1053         vadd.i64        d27,d26
1054         vshr.u64        d26,d18,#39
1055         vadd.i64        d28,d14
1056         vsli.64 d25,d18,#30
1057         veor    d30,d18,d19
1058         vsli.64 d26,d18,#25
1059         veor    d17,d24,d25
1060         vadd.i64        d27,d28
1061         vbsl    d30,d20,d19             @ Maj(a,b,c)
1062         veor    d17,d26                 @ Sigma0(a)
1063         vadd.i64        d21,d27
1064         vadd.i64        d30,d27
1065         @ vadd.i64      d17,d30
1066         vshr.u64        d24,d21,#14     @ 15
1067 #if 15<16
1068         vld1.64 {d15},[r1]!     @ handles unaligned
1069 #endif
1070         vshr.u64        d25,d21,#18
1071 #if 15>0
1072         vadd.i64        d17,d30                 @ h+=Maj from the past
1073 #endif
1074         vshr.u64        d26,d21,#41
1075         vld1.64 {d28},[r3,:64]! @ K[i++]
1076         vsli.64 d24,d21,#50
1077         vsli.64 d25,d21,#46
1078         vmov    d29,d21
1079         vsli.64 d26,d21,#23
1080 #if 15<16 && defined(__ARMEL__)
1081         vrev64.8        d15,d15
1082 #endif
1083         veor    d25,d24
1084         vbsl    d29,d22,d23             @ Ch(e,f,g)
1085         vshr.u64        d24,d17,#28
1086         veor    d26,d25                 @ Sigma1(e)
1087         vadd.i64        d27,d29,d16
1088         vshr.u64        d25,d17,#34
1089         vsli.64 d24,d17,#36
1090         vadd.i64        d27,d26
1091         vshr.u64        d26,d17,#39
1092         vadd.i64        d28,d15
1093         vsli.64 d25,d17,#30
1094         veor    d30,d17,d18
1095         vsli.64 d26,d17,#25
1096         veor    d16,d24,d25
1097         vadd.i64        d27,d28
1098         vbsl    d30,d19,d18             @ Maj(a,b,c)
1099         veor    d16,d26                 @ Sigma0(a)
1100         vadd.i64        d20,d27
1101         vadd.i64        d30,d27
1102         @ vadd.i64      d16,d30
1103         mov     r12,#4
1104 .L16_79_neon:
1105         subs    r12,#1
1106         vshr.u64        q12,q7,#19
1107         vshr.u64        q13,q7,#61
1108         vadd.i64        d16,d30                 @ h+=Maj from the past
1109         vshr.u64        q15,q7,#6
1110         vsli.64 q12,q7,#45
1111         vext.8  q14,q0,q1,#8    @ X[i+1]
1112         vsli.64 q13,q7,#3
1113         veor    q15,q12
1114         vshr.u64        q12,q14,#1
1115         veor    q15,q13                         @ sigma1(X[i+14])
1116         vshr.u64        q13,q14,#8
1117         vadd.i64        q0,q15
1118         vshr.u64        q15,q14,#7
1119         vsli.64 q12,q14,#63
1120         vsli.64 q13,q14,#56
1121         vext.8  q14,q4,q5,#8    @ X[i+9]
1122         veor    q15,q12
1123         vshr.u64        d24,d20,#14             @ from NEON_00_15
1124         vadd.i64        q0,q14
1125         vshr.u64        d25,d20,#18             @ from NEON_00_15
1126         veor    q15,q13                         @ sigma0(X[i+1])
1127         vshr.u64        d26,d20,#41             @ from NEON_00_15
1128         vadd.i64        q0,q15
1129         vld1.64 {d28},[r3,:64]! @ K[i++]
1130         vsli.64 d24,d20,#50
1131         vsli.64 d25,d20,#46
1132         vmov    d29,d20
1133         vsli.64 d26,d20,#23
1134 #if 16<16 && defined(__ARMEL__)
1135         vrev64.8        ,
1136 #endif
1137         veor    d25,d24
1138         vbsl    d29,d21,d22             @ Ch(e,f,g)
1139         vshr.u64        d24,d16,#28
1140         veor    d26,d25                 @ Sigma1(e)
1141         vadd.i64        d27,d29,d23
1142         vshr.u64        d25,d16,#34
1143         vsli.64 d24,d16,#36
1144         vadd.i64        d27,d26
1145         vshr.u64        d26,d16,#39
1146         vadd.i64        d28,d0
1147         vsli.64 d25,d16,#30
1148         veor    d30,d16,d17
1149         vsli.64 d26,d16,#25
1150         veor    d23,d24,d25
1151         vadd.i64        d27,d28
1152         vbsl    d30,d18,d17             @ Maj(a,b,c)
1153         veor    d23,d26                 @ Sigma0(a)
1154         vadd.i64        d19,d27
1155         vadd.i64        d30,d27
1156         @ vadd.i64      d23,d30
1157         vshr.u64        d24,d19,#14     @ 17
1158 #if 17<16
1159         vld1.64 {d1},[r1]!      @ handles unaligned
1160 #endif
1161         vshr.u64        d25,d19,#18
1162 #if 17>0
1163         vadd.i64        d23,d30                 @ h+=Maj from the past
1164 #endif
1165         vshr.u64        d26,d19,#41
1166         vld1.64 {d28},[r3,:64]! @ K[i++]
1167         vsli.64 d24,d19,#50
1168         vsli.64 d25,d19,#46
1169         vmov    d29,d19
1170         vsli.64 d26,d19,#23
1171 #if 17<16 && defined(__ARMEL__)
1172         vrev64.8        ,
1173 #endif
1174         veor    d25,d24
1175         vbsl    d29,d20,d21             @ Ch(e,f,g)
1176         vshr.u64        d24,d23,#28
1177         veor    d26,d25                 @ Sigma1(e)
1178         vadd.i64        d27,d29,d22
1179         vshr.u64        d25,d23,#34
1180         vsli.64 d24,d23,#36
1181         vadd.i64        d27,d26
1182         vshr.u64        d26,d23,#39
1183         vadd.i64        d28,d1
1184         vsli.64 d25,d23,#30
1185         veor    d30,d23,d16
1186         vsli.64 d26,d23,#25
1187         veor    d22,d24,d25
1188         vadd.i64        d27,d28
1189         vbsl    d30,d17,d16             @ Maj(a,b,c)
1190         veor    d22,d26                 @ Sigma0(a)
1191         vadd.i64        d18,d27
1192         vadd.i64        d30,d27
1193         @ vadd.i64      d22,d30
1194         vshr.u64        q12,q0,#19
1195         vshr.u64        q13,q0,#61
1196         vadd.i64        d22,d30                 @ h+=Maj from the past
1197         vshr.u64        q15,q0,#6
1198         vsli.64 q12,q0,#45
1199         vext.8  q14,q1,q2,#8    @ X[i+1]
1200         vsli.64 q13,q0,#3
1201         veor    q15,q12
1202         vshr.u64        q12,q14,#1
1203         veor    q15,q13                         @ sigma1(X[i+14])
1204         vshr.u64        q13,q14,#8
1205         vadd.i64        q1,q15
1206         vshr.u64        q15,q14,#7
1207         vsli.64 q12,q14,#63
1208         vsli.64 q13,q14,#56
1209         vext.8  q14,q5,q6,#8    @ X[i+9]
1210         veor    q15,q12
1211         vshr.u64        d24,d18,#14             @ from NEON_00_15
1212         vadd.i64        q1,q14
1213         vshr.u64        d25,d18,#18             @ from NEON_00_15
1214         veor    q15,q13                         @ sigma0(X[i+1])
1215         vshr.u64        d26,d18,#41             @ from NEON_00_15
1216         vadd.i64        q1,q15
1217         vld1.64 {d28},[r3,:64]! @ K[i++]
1218         vsli.64 d24,d18,#50
1219         vsli.64 d25,d18,#46
1220         vmov    d29,d18
1221         vsli.64 d26,d18,#23
1222 #if 18<16 && defined(__ARMEL__)
1223         vrev64.8        ,
1224 #endif
1225         veor    d25,d24
1226         vbsl    d29,d19,d20             @ Ch(e,f,g)
1227         vshr.u64        d24,d22,#28
1228         veor    d26,d25                 @ Sigma1(e)
1229         vadd.i64        d27,d29,d21
1230         vshr.u64        d25,d22,#34
1231         vsli.64 d24,d22,#36
1232         vadd.i64        d27,d26
1233         vshr.u64        d26,d22,#39
1234         vadd.i64        d28,d2
1235         vsli.64 d25,d22,#30
1236         veor    d30,d22,d23
1237         vsli.64 d26,d22,#25
1238         veor    d21,d24,d25
1239         vadd.i64        d27,d28
1240         vbsl    d30,d16,d23             @ Maj(a,b,c)
1241         veor    d21,d26                 @ Sigma0(a)
1242         vadd.i64        d17,d27
1243         vadd.i64        d30,d27
1244         @ vadd.i64      d21,d30
1245         vshr.u64        d24,d17,#14     @ 19
1246 #if 19<16
1247         vld1.64 {d3},[r1]!      @ handles unaligned
1248 #endif
1249         vshr.u64        d25,d17,#18
1250 #if 19>0
1251         vadd.i64        d21,d30                 @ h+=Maj from the past
1252 #endif
1253         vshr.u64        d26,d17,#41
1254         vld1.64 {d28},[r3,:64]! @ K[i++]
1255         vsli.64 d24,d17,#50
1256         vsli.64 d25,d17,#46
1257         vmov    d29,d17
1258         vsli.64 d26,d17,#23
1259 #if 19<16 && defined(__ARMEL__)
1260         vrev64.8        ,
1261 #endif
1262         veor    d25,d24
1263         vbsl    d29,d18,d19             @ Ch(e,f,g)
1264         vshr.u64        d24,d21,#28
1265         veor    d26,d25                 @ Sigma1(e)
1266         vadd.i64        d27,d29,d20
1267         vshr.u64        d25,d21,#34
1268         vsli.64 d24,d21,#36
1269         vadd.i64        d27,d26
1270         vshr.u64        d26,d21,#39
1271         vadd.i64        d28,d3
1272         vsli.64 d25,d21,#30
1273         veor    d30,d21,d22
1274         vsli.64 d26,d21,#25
1275         veor    d20,d24,d25
1276         vadd.i64        d27,d28
1277         vbsl    d30,d23,d22             @ Maj(a,b,c)
1278         veor    d20,d26                 @ Sigma0(a)
1279         vadd.i64        d16,d27
1280         vadd.i64        d30,d27
1281         @ vadd.i64      d20,d30
1282         vshr.u64        q12,q1,#19
1283         vshr.u64        q13,q1,#61
1284         vadd.i64        d20,d30                 @ h+=Maj from the past
1285         vshr.u64        q15,q1,#6
1286         vsli.64 q12,q1,#45
1287         vext.8  q14,q2,q3,#8    @ X[i+1]
1288         vsli.64 q13,q1,#3
1289         veor    q15,q12
1290         vshr.u64        q12,q14,#1
1291         veor    q15,q13                         @ sigma1(X[i+14])
1292         vshr.u64        q13,q14,#8
1293         vadd.i64        q2,q15
1294         vshr.u64        q15,q14,#7
1295         vsli.64 q12,q14,#63
1296         vsli.64 q13,q14,#56
1297         vext.8  q14,q6,q7,#8    @ X[i+9]
1298         veor    q15,q12
1299         vshr.u64        d24,d16,#14             @ from NEON_00_15
1300         vadd.i64        q2,q14
1301         vshr.u64        d25,d16,#18             @ from NEON_00_15
1302         veor    q15,q13                         @ sigma0(X[i+1])
1303         vshr.u64        d26,d16,#41             @ from NEON_00_15
1304         vadd.i64        q2,q15
1305         vld1.64 {d28},[r3,:64]! @ K[i++]
1306         vsli.64 d24,d16,#50
1307         vsli.64 d25,d16,#46
1308         vmov    d29,d16
1309         vsli.64 d26,d16,#23
1310 #if 20<16 && defined(__ARMEL__)
1311         vrev64.8        ,
1312 #endif
1313         veor    d25,d24
1314         vbsl    d29,d17,d18             @ Ch(e,f,g)
1315         vshr.u64        d24,d20,#28
1316         veor    d26,d25                 @ Sigma1(e)
1317         vadd.i64        d27,d29,d19
1318         vshr.u64        d25,d20,#34
1319         vsli.64 d24,d20,#36
1320         vadd.i64        d27,d26
1321         vshr.u64        d26,d20,#39
1322         vadd.i64        d28,d4
1323         vsli.64 d25,d20,#30
1324         veor    d30,d20,d21
1325         vsli.64 d26,d20,#25
1326         veor    d19,d24,d25
1327         vadd.i64        d27,d28
1328         vbsl    d30,d22,d21             @ Maj(a,b,c)
1329         veor    d19,d26                 @ Sigma0(a)
1330         vadd.i64        d23,d27
1331         vadd.i64        d30,d27
1332         @ vadd.i64      d19,d30
1333         vshr.u64        d24,d23,#14     @ 21
1334 #if 21<16
1335         vld1.64 {d5},[r1]!      @ handles unaligned
1336 #endif
1337         vshr.u64        d25,d23,#18
1338 #if 21>0
1339         vadd.i64        d19,d30                 @ h+=Maj from the past
1340 #endif
1341         vshr.u64        d26,d23,#41
1342         vld1.64 {d28},[r3,:64]! @ K[i++]
1343         vsli.64 d24,d23,#50
1344         vsli.64 d25,d23,#46
1345         vmov    d29,d23
1346         vsli.64 d26,d23,#23
1347 #if 21<16 && defined(__ARMEL__)
1348         vrev64.8        ,
1349 #endif
1350         veor    d25,d24
1351         vbsl    d29,d16,d17             @ Ch(e,f,g)
1352         vshr.u64        d24,d19,#28
1353         veor    d26,d25                 @ Sigma1(e)
1354         vadd.i64        d27,d29,d18
1355         vshr.u64        d25,d19,#34
1356         vsli.64 d24,d19,#36
1357         vadd.i64        d27,d26
1358         vshr.u64        d26,d19,#39
1359         vadd.i64        d28,d5
1360         vsli.64 d25,d19,#30
1361         veor    d30,d19,d20
1362         vsli.64 d26,d19,#25
1363         veor    d18,d24,d25
1364         vadd.i64        d27,d28
1365         vbsl    d30,d21,d20             @ Maj(a,b,c)
1366         veor    d18,d26                 @ Sigma0(a)
1367         vadd.i64        d22,d27
1368         vadd.i64        d30,d27
1369         @ vadd.i64      d18,d30
1370         vshr.u64        q12,q2,#19
1371         vshr.u64        q13,q2,#61
1372         vadd.i64        d18,d30                 @ h+=Maj from the past
1373         vshr.u64        q15,q2,#6
1374         vsli.64 q12,q2,#45
1375         vext.8  q14,q3,q4,#8    @ X[i+1]
1376         vsli.64 q13,q2,#3
1377         veor    q15,q12
1378         vshr.u64        q12,q14,#1
1379         veor    q15,q13                         @ sigma1(X[i+14])
1380         vshr.u64        q13,q14,#8
1381         vadd.i64        q3,q15
1382         vshr.u64        q15,q14,#7
1383         vsli.64 q12,q14,#63
1384         vsli.64 q13,q14,#56
1385         vext.8  q14,q7,q0,#8    @ X[i+9]
1386         veor    q15,q12
1387         vshr.u64        d24,d22,#14             @ from NEON_00_15
1388         vadd.i64        q3,q14
1389         vshr.u64        d25,d22,#18             @ from NEON_00_15
1390         veor    q15,q13                         @ sigma0(X[i+1])
1391         vshr.u64        d26,d22,#41             @ from NEON_00_15
1392         vadd.i64        q3,q15
1393         vld1.64 {d28},[r3,:64]! @ K[i++]
1394         vsli.64 d24,d22,#50
1395         vsli.64 d25,d22,#46
1396         vmov    d29,d22
1397         vsli.64 d26,d22,#23
1398 #if 22<16 && defined(__ARMEL__)
1399         vrev64.8        ,
1400 #endif
1401         veor    d25,d24
1402         vbsl    d29,d23,d16             @ Ch(e,f,g)
1403         vshr.u64        d24,d18,#28
1404         veor    d26,d25                 @ Sigma1(e)
1405         vadd.i64        d27,d29,d17
1406         vshr.u64        d25,d18,#34
1407         vsli.64 d24,d18,#36
1408         vadd.i64        d27,d26
1409         vshr.u64        d26,d18,#39
1410         vadd.i64        d28,d6
1411         vsli.64 d25,d18,#30
1412         veor    d30,d18,d19
1413         vsli.64 d26,d18,#25
1414         veor    d17,d24,d25
1415         vadd.i64        d27,d28
1416         vbsl    d30,d20,d19             @ Maj(a,b,c)
1417         veor    d17,d26                 @ Sigma0(a)
1418         vadd.i64        d21,d27
1419         vadd.i64        d30,d27
1420         @ vadd.i64      d17,d30
1421         vshr.u64        d24,d21,#14     @ 23
1422 #if 23<16
1423         vld1.64 {d7},[r1]!      @ handles unaligned
1424 #endif
1425         vshr.u64        d25,d21,#18
1426 #if 23>0
1427         vadd.i64        d17,d30                 @ h+=Maj from the past
1428 #endif
1429         vshr.u64        d26,d21,#41
1430         vld1.64 {d28},[r3,:64]! @ K[i++]
1431         vsli.64 d24,d21,#50
1432         vsli.64 d25,d21,#46
1433         vmov    d29,d21
1434         vsli.64 d26,d21,#23
1435 #if 23<16 && defined(__ARMEL__)
1436         vrev64.8        ,
1437 #endif
1438         veor    d25,d24
1439         vbsl    d29,d22,d23             @ Ch(e,f,g)
1440         vshr.u64        d24,d17,#28
1441         veor    d26,d25                 @ Sigma1(e)
1442         vadd.i64        d27,d29,d16
1443         vshr.u64        d25,d17,#34
1444         vsli.64 d24,d17,#36
1445         vadd.i64        d27,d26
1446         vshr.u64        d26,d17,#39
1447         vadd.i64        d28,d7
1448         vsli.64 d25,d17,#30
1449         veor    d30,d17,d18
1450         vsli.64 d26,d17,#25
1451         veor    d16,d24,d25
1452         vadd.i64        d27,d28
1453         vbsl    d30,d19,d18             @ Maj(a,b,c)
1454         veor    d16,d26                 @ Sigma0(a)
1455         vadd.i64        d20,d27
1456         vadd.i64        d30,d27
1457         @ vadd.i64      d16,d30
1458         vshr.u64        q12,q3,#19
1459         vshr.u64        q13,q3,#61
1460         vadd.i64        d16,d30                 @ h+=Maj from the past
1461         vshr.u64        q15,q3,#6
1462         vsli.64 q12,q3,#45
1463         vext.8  q14,q4,q5,#8    @ X[i+1]
1464         vsli.64 q13,q3,#3
1465         veor    q15,q12
1466         vshr.u64        q12,q14,#1
1467         veor    q15,q13                         @ sigma1(X[i+14])
1468         vshr.u64        q13,q14,#8
1469         vadd.i64        q4,q15
1470         vshr.u64        q15,q14,#7
1471         vsli.64 q12,q14,#63
1472         vsli.64 q13,q14,#56
1473         vext.8  q14,q0,q1,#8    @ X[i+9]
1474         veor    q15,q12
1475         vshr.u64        d24,d20,#14             @ from NEON_00_15
1476         vadd.i64        q4,q14
1477         vshr.u64        d25,d20,#18             @ from NEON_00_15
1478         veor    q15,q13                         @ sigma0(X[i+1])
1479         vshr.u64        d26,d20,#41             @ from NEON_00_15
1480         vadd.i64        q4,q15
1481         vld1.64 {d28},[r3,:64]! @ K[i++]
1482         vsli.64 d24,d20,#50
1483         vsli.64 d25,d20,#46
1484         vmov    d29,d20
1485         vsli.64 d26,d20,#23
1486 #if 24<16 && defined(__ARMEL__)
1487         vrev64.8        ,
1488 #endif
1489         veor    d25,d24
1490         vbsl    d29,d21,d22             @ Ch(e,f,g)
1491         vshr.u64        d24,d16,#28
1492         veor    d26,d25                 @ Sigma1(e)
1493         vadd.i64        d27,d29,d23
1494         vshr.u64        d25,d16,#34
1495         vsli.64 d24,d16,#36
1496         vadd.i64        d27,d26
1497         vshr.u64        d26,d16,#39
1498         vadd.i64        d28,d8
1499         vsli.64 d25,d16,#30
1500         veor    d30,d16,d17
1501         vsli.64 d26,d16,#25
1502         veor    d23,d24,d25
1503         vadd.i64        d27,d28
1504         vbsl    d30,d18,d17             @ Maj(a,b,c)
1505         veor    d23,d26                 @ Sigma0(a)
1506         vadd.i64        d19,d27
1507         vadd.i64        d30,d27
1508         @ vadd.i64      d23,d30
1509         vshr.u64        d24,d19,#14     @ 25
1510 #if 25<16
1511         vld1.64 {d9},[r1]!      @ handles unaligned
1512 #endif
1513         vshr.u64        d25,d19,#18
1514 #if 25>0
1515         vadd.i64        d23,d30                 @ h+=Maj from the past
1516 #endif
1517         vshr.u64        d26,d19,#41
1518         vld1.64 {d28},[r3,:64]! @ K[i++]
1519         vsli.64 d24,d19,#50
1520         vsli.64 d25,d19,#46
1521         vmov    d29,d19
1522         vsli.64 d26,d19,#23
1523 #if 25<16 && defined(__ARMEL__)
1524         vrev64.8        ,
1525 #endif
1526         veor    d25,d24
1527         vbsl    d29,d20,d21             @ Ch(e,f,g)
1528         vshr.u64        d24,d23,#28
1529         veor    d26,d25                 @ Sigma1(e)
1530         vadd.i64        d27,d29,d22
1531         vshr.u64        d25,d23,#34
1532         vsli.64 d24,d23,#36
1533         vadd.i64        d27,d26
1534         vshr.u64        d26,d23,#39
1535         vadd.i64        d28,d9
1536         vsli.64 d25,d23,#30
1537         veor    d30,d23,d16
1538         vsli.64 d26,d23,#25
1539         veor    d22,d24,d25
1540         vadd.i64        d27,d28
1541         vbsl    d30,d17,d16             @ Maj(a,b,c)
1542         veor    d22,d26                 @ Sigma0(a)
1543         vadd.i64        d18,d27
1544         vadd.i64        d30,d27
1545         @ vadd.i64      d22,d30
1546         vshr.u64        q12,q4,#19
1547         vshr.u64        q13,q4,#61
1548         vadd.i64        d22,d30                 @ h+=Maj from the past
1549         vshr.u64        q15,q4,#6
1550         vsli.64 q12,q4,#45
1551         vext.8  q14,q5,q6,#8    @ X[i+1]
1552         vsli.64 q13,q4,#3
1553         veor    q15,q12
1554         vshr.u64        q12,q14,#1
1555         veor    q15,q13                         @ sigma1(X[i+14])
1556         vshr.u64        q13,q14,#8
1557         vadd.i64        q5,q15
1558         vshr.u64        q15,q14,#7
1559         vsli.64 q12,q14,#63
1560         vsli.64 q13,q14,#56
1561         vext.8  q14,q1,q2,#8    @ X[i+9]
1562         veor    q15,q12
1563         vshr.u64        d24,d18,#14             @ from NEON_00_15
1564         vadd.i64        q5,q14
1565         vshr.u64        d25,d18,#18             @ from NEON_00_15
1566         veor    q15,q13                         @ sigma0(X[i+1])
1567         vshr.u64        d26,d18,#41             @ from NEON_00_15
1568         vadd.i64        q5,q15
1569         vld1.64 {d28},[r3,:64]! @ K[i++]
1570         vsli.64 d24,d18,#50
1571         vsli.64 d25,d18,#46
1572         vmov    d29,d18
1573         vsli.64 d26,d18,#23
1574 #if 26<16 && defined(__ARMEL__)
1575         vrev64.8        ,
1576 #endif
1577         veor    d25,d24
1578         vbsl    d29,d19,d20             @ Ch(e,f,g)
1579         vshr.u64        d24,d22,#28
1580         veor    d26,d25                 @ Sigma1(e)
1581         vadd.i64        d27,d29,d21
1582         vshr.u64        d25,d22,#34
1583         vsli.64 d24,d22,#36
1584         vadd.i64        d27,d26
1585         vshr.u64        d26,d22,#39
1586         vadd.i64        d28,d10
1587         vsli.64 d25,d22,#30
1588         veor    d30,d22,d23
1589         vsli.64 d26,d22,#25
1590         veor    d21,d24,d25
1591         vadd.i64        d27,d28
1592         vbsl    d30,d16,d23             @ Maj(a,b,c)
1593         veor    d21,d26                 @ Sigma0(a)
1594         vadd.i64        d17,d27
1595         vadd.i64        d30,d27
1596         @ vadd.i64      d21,d30
1597         vshr.u64        d24,d17,#14     @ 27
1598 #if 27<16
1599         vld1.64 {d11},[r1]!     @ handles unaligned
1600 #endif
1601         vshr.u64        d25,d17,#18
1602 #if 27>0
1603         vadd.i64        d21,d30                 @ h+=Maj from the past
1604 #endif
1605         vshr.u64        d26,d17,#41
1606         vld1.64 {d28},[r3,:64]! @ K[i++]
1607         vsli.64 d24,d17,#50
1608         vsli.64 d25,d17,#46
1609         vmov    d29,d17
1610         vsli.64 d26,d17,#23
1611 #if 27<16 && defined(__ARMEL__)
1612         vrev64.8        ,
1613 #endif
1614         veor    d25,d24
1615         vbsl    d29,d18,d19             @ Ch(e,f,g)
1616         vshr.u64        d24,d21,#28
1617         veor    d26,d25                 @ Sigma1(e)
1618         vadd.i64        d27,d29,d20
1619         vshr.u64        d25,d21,#34
1620         vsli.64 d24,d21,#36
1621         vadd.i64        d27,d26
1622         vshr.u64        d26,d21,#39
1623         vadd.i64        d28,d11
1624         vsli.64 d25,d21,#30
1625         veor    d30,d21,d22
1626         vsli.64 d26,d21,#25
1627         veor    d20,d24,d25
1628         vadd.i64        d27,d28
1629         vbsl    d30,d23,d22             @ Maj(a,b,c)
1630         veor    d20,d26                 @ Sigma0(a)
1631         vadd.i64        d16,d27
1632         vadd.i64        d30,d27
1633         @ vadd.i64      d20,d30
1634         vshr.u64        q12,q5,#19
1635         vshr.u64        q13,q5,#61
1636         vadd.i64        d20,d30                 @ h+=Maj from the past
1637         vshr.u64        q15,q5,#6
1638         vsli.64 q12,q5,#45
1639         vext.8  q14,q6,q7,#8    @ X[i+1]
1640         vsli.64 q13,q5,#3
1641         veor    q15,q12
1642         vshr.u64        q12,q14,#1
1643         veor    q15,q13                         @ sigma1(X[i+14])
1644         vshr.u64        q13,q14,#8
1645         vadd.i64        q6,q15
1646         vshr.u64        q15,q14,#7
1647         vsli.64 q12,q14,#63
1648         vsli.64 q13,q14,#56
1649         vext.8  q14,q2,q3,#8    @ X[i+9]
1650         veor    q15,q12
1651         vshr.u64        d24,d16,#14             @ from NEON_00_15
1652         vadd.i64        q6,q14
1653         vshr.u64        d25,d16,#18             @ from NEON_00_15
1654         veor    q15,q13                         @ sigma0(X[i+1])
1655         vshr.u64        d26,d16,#41             @ from NEON_00_15
1656         vadd.i64        q6,q15
1657         vld1.64 {d28},[r3,:64]! @ K[i++]
1658         vsli.64 d24,d16,#50
1659         vsli.64 d25,d16,#46
1660         vmov    d29,d16
1661         vsli.64 d26,d16,#23
1662 #if 28<16 && defined(__ARMEL__)
1663         vrev64.8        ,
1664 #endif
1665         veor    d25,d24
1666         vbsl    d29,d17,d18             @ Ch(e,f,g)
1667         vshr.u64        d24,d20,#28
1668         veor    d26,d25                 @ Sigma1(e)
1669         vadd.i64        d27,d29,d19
1670         vshr.u64        d25,d20,#34
1671         vsli.64 d24,d20,#36
1672         vadd.i64        d27,d26
1673         vshr.u64        d26,d20,#39
1674         vadd.i64        d28,d12
1675         vsli.64 d25,d20,#30
1676         veor    d30,d20,d21
1677         vsli.64 d26,d20,#25
1678         veor    d19,d24,d25
1679         vadd.i64        d27,d28
1680         vbsl    d30,d22,d21             @ Maj(a,b,c)
1681         veor    d19,d26                 @ Sigma0(a)
1682         vadd.i64        d23,d27
1683         vadd.i64        d30,d27
1684         @ vadd.i64      d19,d30
1685         vshr.u64        d24,d23,#14     @ 29
1686 #if 29<16
1687         vld1.64 {d13},[r1]!     @ handles unaligned
1688 #endif
1689         vshr.u64        d25,d23,#18
1690 #if 29>0
1691         vadd.i64        d19,d30                 @ h+=Maj from the past
1692 #endif
1693         vshr.u64        d26,d23,#41
1694         vld1.64 {d28},[r3,:64]! @ K[i++]
1695         vsli.64 d24,d23,#50
1696         vsli.64 d25,d23,#46
1697         vmov    d29,d23
1698         vsli.64 d26,d23,#23
1699 #if 29<16 && defined(__ARMEL__)
1700         vrev64.8        ,
1701 #endif
1702         veor    d25,d24
1703         vbsl    d29,d16,d17             @ Ch(e,f,g)
1704         vshr.u64        d24,d19,#28
1705         veor    d26,d25                 @ Sigma1(e)
1706         vadd.i64        d27,d29,d18
1707         vshr.u64        d25,d19,#34
1708         vsli.64 d24,d19,#36
1709         vadd.i64        d27,d26
1710         vshr.u64        d26,d19,#39
1711         vadd.i64        d28,d13
1712         vsli.64 d25,d19,#30
1713         veor    d30,d19,d20
1714         vsli.64 d26,d19,#25
1715         veor    d18,d24,d25
1716         vadd.i64        d27,d28
1717         vbsl    d30,d21,d20             @ Maj(a,b,c)
1718         veor    d18,d26                 @ Sigma0(a)
1719         vadd.i64        d22,d27
1720         vadd.i64        d30,d27
1721         @ vadd.i64      d18,d30
1722         vshr.u64        q12,q6,#19
1723         vshr.u64        q13,q6,#61
1724         vadd.i64        d18,d30                 @ h+=Maj from the past
1725         vshr.u64        q15,q6,#6
1726         vsli.64 q12,q6,#45
1727         vext.8  q14,q7,q0,#8    @ X[i+1]
1728         vsli.64 q13,q6,#3
1729         veor    q15,q12
1730         vshr.u64        q12,q14,#1
1731         veor    q15,q13                         @ sigma1(X[i+14])
1732         vshr.u64        q13,q14,#8
1733         vadd.i64        q7,q15
1734         vshr.u64        q15,q14,#7
1735         vsli.64 q12,q14,#63
1736         vsli.64 q13,q14,#56
1737         vext.8  q14,q3,q4,#8    @ X[i+9]
1738         veor    q15,q12
1739         vshr.u64        d24,d22,#14             @ from NEON_00_15
1740         vadd.i64        q7,q14
1741         vshr.u64        d25,d22,#18             @ from NEON_00_15
1742         veor    q15,q13                         @ sigma0(X[i+1])
1743         vshr.u64        d26,d22,#41             @ from NEON_00_15
1744         vadd.i64        q7,q15
1745         vld1.64 {d28},[r3,:64]! @ K[i++]
1746         vsli.64 d24,d22,#50
1747         vsli.64 d25,d22,#46
1748         vmov    d29,d22
1749         vsli.64 d26,d22,#23
1750 #if 30<16 && defined(__ARMEL__)
1751         vrev64.8        ,
1752 #endif
1753         veor    d25,d24
1754         vbsl    d29,d23,d16             @ Ch(e,f,g)
1755         vshr.u64        d24,d18,#28
1756         veor    d26,d25                 @ Sigma1(e)
1757         vadd.i64        d27,d29,d17
1758         vshr.u64        d25,d18,#34
1759         vsli.64 d24,d18,#36
1760         vadd.i64        d27,d26
1761         vshr.u64        d26,d18,#39
1762         vadd.i64        d28,d14
1763         vsli.64 d25,d18,#30
1764         veor    d30,d18,d19
1765         vsli.64 d26,d18,#25
1766         veor    d17,d24,d25
1767         vadd.i64        d27,d28
1768         vbsl    d30,d20,d19             @ Maj(a,b,c)
1769         veor    d17,d26                 @ Sigma0(a)
1770         vadd.i64        d21,d27
1771         vadd.i64        d30,d27
1772         @ vadd.i64      d17,d30
1773         vshr.u64        d24,d21,#14     @ 31
1774 #if 31<16
1775         vld1.64 {d15},[r1]!     @ handles unaligned
1776 #endif
1777         vshr.u64        d25,d21,#18
1778 #if 31>0
1779         vadd.i64        d17,d30                 @ h+=Maj from the past
1780 #endif
1781         vshr.u64        d26,d21,#41
1782         vld1.64 {d28},[r3,:64]! @ K[i++]
1783         vsli.64 d24,d21,#50
1784         vsli.64 d25,d21,#46
1785         vmov    d29,d21
1786         vsli.64 d26,d21,#23
1787 #if 31<16 && defined(__ARMEL__)
1788         vrev64.8        ,
1789 #endif
1790         veor    d25,d24
1791         vbsl    d29,d22,d23             @ Ch(e,f,g)
1792         vshr.u64        d24,d17,#28
1793         veor    d26,d25                 @ Sigma1(e)
1794         vadd.i64        d27,d29,d16
1795         vshr.u64        d25,d17,#34
1796         vsli.64 d24,d17,#36
1797         vadd.i64        d27,d26
1798         vshr.u64        d26,d17,#39
1799         vadd.i64        d28,d15
1800         vsli.64 d25,d17,#30
1801         veor    d30,d17,d18
1802         vsli.64 d26,d17,#25
1803         veor    d16,d24,d25
1804         vadd.i64        d27,d28
1805         vbsl    d30,d19,d18             @ Maj(a,b,c)
1806         veor    d16,d26                 @ Sigma0(a)
1807         vadd.i64        d20,d27
1808         vadd.i64        d30,d27
1809         @ vadd.i64      d16,d30
1810         bne     .L16_79_neon
1812         vadd.i64        d16,d30         @ h+=Maj from the past
1813         vldmia  r0,{d24,d25,d26,d27,d28,d29,d30,d31}    @ load context to temp
1814         vadd.i64        q8,q12          @ vectorized accumulate
1815         vadd.i64        q9,q13
1816         vadd.i64        q10,q14
1817         vadd.i64        q11,q15
1818         vstmia  r0,{d16,d17,d18,d19,d20,d21,d22,d23}    @ save context
1819         teq     r1,r2
1820         sub     r3,#640 @ rewind K512
1821         bne     .Loop_neon
1823         VFP_ABI_POP
1824         bx      lr                              @ .word 0xe12fff1e
1825 .size   zfs_sha512_block_neon,.-zfs_sha512_block_neon
1826 #endif // #if __ARM_ARCH__ >= 7
1827 #endif // #if defined(__arm__)