Revert of Update the extension whitelist for application host change. (patchset ...
[chromium-blink-merge.git] / third_party / boringssl / linux-arm / crypto / sha / sha512-armv4.S
blob37cfca37debec3cfa4c22104189a6f33842631f0
1 #include "arm_arch.h"
2 #ifdef __ARMEL__
3 # define LO 0
4 # define HI 4
5 # define WORD64(hi0,lo0,hi1,lo1)        .word   lo0,hi0, lo1,hi1
6 #else
7 # define HI 0
8 # define LO 4
9 # define WORD64(hi0,lo0,hi1,lo1)        .word   hi0,lo0, hi1,lo1
10 #endif
12 .text
13 .code   32
14 .type   K512,%object
15 .align  5
16 K512:
17 WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
18 WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
19 WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
20 WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
21 WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
22 WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
23 WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
24 WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
25 WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
26 WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
27 WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
28 WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
29 WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
30 WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
31 WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
32 WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
33 WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
34 WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
35 WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
36 WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
37 WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
38 WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
39 WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
40 WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
41 WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
42 WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
43 WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
44 WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
45 WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
46 WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
47 WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
48 WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
49 WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
50 WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
51 WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
52 WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
53 WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
54 WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
55 WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
56 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
57 .size   K512,.-K512
58 #if __ARM_MAX_ARCH__>=7
59 .LOPENSSL_armcap:
60 .word   OPENSSL_armcap_P-sha512_block_data_order
61 .skip   32-4
62 #else
63 .skip   32
64 #endif
66 .global sha512_block_data_order
67 .type   sha512_block_data_order,%function
68 sha512_block_data_order:
69         sub     r3,pc,#8                @ sha512_block_data_order
70         add     r2,r1,r2,lsl#7  @ len to point at the end of inp
71 #if __ARM_MAX_ARCH__>=7
72         ldr     r12,.LOPENSSL_armcap
73         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
74         tst     r12,#1
75         bne     .LNEON
76 #endif
77         stmdb   sp!,{r4-r12,lr}
78         sub     r14,r3,#672             @ K512
79         sub     sp,sp,#9*8
81         ldr     r7,[r0,#32+LO]
82         ldr     r8,[r0,#32+HI]
83         ldr     r9, [r0,#48+LO]
84         ldr     r10, [r0,#48+HI]
85         ldr     r11, [r0,#56+LO]
86         ldr     r12, [r0,#56+HI]
87 .Loop:
88         str     r9, [sp,#48+0]
89         str     r10, [sp,#48+4]
90         str     r11, [sp,#56+0]
91         str     r12, [sp,#56+4]
92         ldr     r5,[r0,#0+LO]
93         ldr     r6,[r0,#0+HI]
94         ldr     r3,[r0,#8+LO]
95         ldr     r4,[r0,#8+HI]
96         ldr     r9, [r0,#16+LO]
97         ldr     r10, [r0,#16+HI]
98         ldr     r11, [r0,#24+LO]
99         ldr     r12, [r0,#24+HI]
100         str     r3,[sp,#8+0]
101         str     r4,[sp,#8+4]
102         str     r9, [sp,#16+0]
103         str     r10, [sp,#16+4]
104         str     r11, [sp,#24+0]
105         str     r12, [sp,#24+4]
106         ldr     r3,[r0,#40+LO]
107         ldr     r4,[r0,#40+HI]
108         str     r3,[sp,#40+0]
109         str     r4,[sp,#40+4]
111 .L00_15:
112 #if __ARM_ARCH__<7
113         ldrb    r3,[r1,#7]
114         ldrb    r9, [r1,#6]
115         ldrb    r10, [r1,#5]
116         ldrb    r11, [r1,#4]
117         ldrb    r4,[r1,#3]
118         ldrb    r12, [r1,#2]
119         orr     r3,r3,r9,lsl#8
120         ldrb    r9, [r1,#1]
121         orr     r3,r3,r10,lsl#16
122         ldrb    r10, [r1],#8
123         orr     r3,r3,r11,lsl#24
124         orr     r4,r4,r12,lsl#8
125         orr     r4,r4,r9,lsl#16
126         orr     r4,r4,r10,lsl#24
127 #else
128         ldr     r3,[r1,#4]
129         ldr     r4,[r1],#8
130 #ifdef __ARMEL__
131         rev     r3,r3
132         rev     r4,r4
133 #endif
134 #endif
135         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
136         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
137         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
138         mov     r9,r7,lsr#14
139         str     r3,[sp,#64+0]
140         mov     r10,r8,lsr#14
141         str     r4,[sp,#64+4]
142         eor     r9,r9,r8,lsl#18
143         ldr     r11,[sp,#56+0]  @ h.lo
144         eor     r10,r10,r7,lsl#18
145         ldr     r12,[sp,#56+4]  @ h.hi
146         eor     r9,r9,r7,lsr#18
147         eor     r10,r10,r8,lsr#18
148         eor     r9,r9,r8,lsl#14
149         eor     r10,r10,r7,lsl#14
150         eor     r9,r9,r8,lsr#9
151         eor     r10,r10,r7,lsr#9
152         eor     r9,r9,r7,lsl#23
153         eor     r10,r10,r8,lsl#23       @ Sigma1(e)
154         adds    r3,r3,r9
155         ldr     r9,[sp,#40+0]   @ f.lo
156         adc     r4,r4,r10               @ T += Sigma1(e)
157         ldr     r10,[sp,#40+4]  @ f.hi
158         adds    r3,r3,r11
159         ldr     r11,[sp,#48+0]  @ g.lo
160         adc     r4,r4,r12               @ T += h
161         ldr     r12,[sp,#48+4]  @ g.hi
163         eor     r9,r9,r11
164         str     r7,[sp,#32+0]
165         eor     r10,r10,r12
166         str     r8,[sp,#32+4]
167         and     r9,r9,r7
168         str     r5,[sp,#0+0]
169         and     r10,r10,r8
170         str     r6,[sp,#0+4]
171         eor     r9,r9,r11
172         ldr     r11,[r14,#LO]   @ K[i].lo
173         eor     r10,r10,r12             @ Ch(e,f,g)
174         ldr     r12,[r14,#HI]   @ K[i].hi
176         adds    r3,r3,r9
177         ldr     r7,[sp,#24+0]   @ d.lo
178         adc     r4,r4,r10               @ T += Ch(e,f,g)
179         ldr     r8,[sp,#24+4]   @ d.hi
180         adds    r3,r3,r11
181         and     r9,r11,#0xff
182         adc     r4,r4,r12               @ T += K[i]
183         adds    r7,r7,r3
184         ldr     r11,[sp,#8+0]   @ b.lo
185         adc     r8,r8,r4                @ d += T
186         teq     r9,#148
188         ldr     r12,[sp,#16+0]  @ c.lo
189         orreq   r14,r14,#1
190         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
191         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
192         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
193         mov     r9,r5,lsr#28
194         mov     r10,r6,lsr#28
195         eor     r9,r9,r6,lsl#4
196         eor     r10,r10,r5,lsl#4
197         eor     r9,r9,r6,lsr#2
198         eor     r10,r10,r5,lsr#2
199         eor     r9,r9,r5,lsl#30
200         eor     r10,r10,r6,lsl#30
201         eor     r9,r9,r6,lsr#7
202         eor     r10,r10,r5,lsr#7
203         eor     r9,r9,r5,lsl#25
204         eor     r10,r10,r6,lsl#25       @ Sigma0(a)
205         adds    r3,r3,r9
206         and     r9,r5,r11
207         adc     r4,r4,r10               @ T += Sigma0(a)
209         ldr     r10,[sp,#8+4]   @ b.hi
210         orr     r5,r5,r11
211         ldr     r11,[sp,#16+4]  @ c.hi
212         and     r5,r5,r12
213         and     r12,r6,r10
214         orr     r6,r6,r10
215         orr     r5,r5,r9                @ Maj(a,b,c).lo
216         and     r6,r6,r11
217         adds    r5,r5,r3
218         orr     r6,r6,r12               @ Maj(a,b,c).hi
219         sub     sp,sp,#8
220         adc     r6,r6,r4                @ h += T
221         tst     r14,#1
222         add     r14,r14,#8
223         tst     r14,#1
224         beq     .L00_15
225         ldr     r9,[sp,#184+0]
226         ldr     r10,[sp,#184+4]
227         bic     r14,r14,#1
228 .L16_79:
229         @ sigma0(x)     (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
230         @ LO            lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
231         @ HI            hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
232         mov     r3,r9,lsr#1
233         ldr     r11,[sp,#80+0]
234         mov     r4,r10,lsr#1
235         ldr     r12,[sp,#80+4]
236         eor     r3,r3,r10,lsl#31
237         eor     r4,r4,r9,lsl#31
238         eor     r3,r3,r9,lsr#8
239         eor     r4,r4,r10,lsr#8
240         eor     r3,r3,r10,lsl#24
241         eor     r4,r4,r9,lsl#24
242         eor     r3,r3,r9,lsr#7
243         eor     r4,r4,r10,lsr#7
244         eor     r3,r3,r10,lsl#25
246         @ sigma1(x)     (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
247         @ LO            lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
248         @ HI            hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
249         mov     r9,r11,lsr#19
250         mov     r10,r12,lsr#19
251         eor     r9,r9,r12,lsl#13
252         eor     r10,r10,r11,lsl#13
253         eor     r9,r9,r12,lsr#29
254         eor     r10,r10,r11,lsr#29
255         eor     r9,r9,r11,lsl#3
256         eor     r10,r10,r12,lsl#3
257         eor     r9,r9,r11,lsr#6
258         eor     r10,r10,r12,lsr#6
259         ldr     r11,[sp,#120+0]
260         eor     r9,r9,r12,lsl#26
262         ldr     r12,[sp,#120+4]
263         adds    r3,r3,r9
264         ldr     r9,[sp,#192+0]
265         adc     r4,r4,r10
267         ldr     r10,[sp,#192+4]
268         adds    r3,r3,r11
269         adc     r4,r4,r12
270         adds    r3,r3,r9
271         adc     r4,r4,r10
272         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
273         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
274         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
275         mov     r9,r7,lsr#14
276         str     r3,[sp,#64+0]
277         mov     r10,r8,lsr#14
278         str     r4,[sp,#64+4]
279         eor     r9,r9,r8,lsl#18
280         ldr     r11,[sp,#56+0]  @ h.lo
281         eor     r10,r10,r7,lsl#18
282         ldr     r12,[sp,#56+4]  @ h.hi
283         eor     r9,r9,r7,lsr#18
284         eor     r10,r10,r8,lsr#18
285         eor     r9,r9,r8,lsl#14
286         eor     r10,r10,r7,lsl#14
287         eor     r9,r9,r8,lsr#9
288         eor     r10,r10,r7,lsr#9
289         eor     r9,r9,r7,lsl#23
290         eor     r10,r10,r8,lsl#23       @ Sigma1(e)
291         adds    r3,r3,r9
292         ldr     r9,[sp,#40+0]   @ f.lo
293         adc     r4,r4,r10               @ T += Sigma1(e)
294         ldr     r10,[sp,#40+4]  @ f.hi
295         adds    r3,r3,r11
296         ldr     r11,[sp,#48+0]  @ g.lo
297         adc     r4,r4,r12               @ T += h
298         ldr     r12,[sp,#48+4]  @ g.hi
300         eor     r9,r9,r11
301         str     r7,[sp,#32+0]
302         eor     r10,r10,r12
303         str     r8,[sp,#32+4]
304         and     r9,r9,r7
305         str     r5,[sp,#0+0]
306         and     r10,r10,r8
307         str     r6,[sp,#0+4]
308         eor     r9,r9,r11
309         ldr     r11,[r14,#LO]   @ K[i].lo
310         eor     r10,r10,r12             @ Ch(e,f,g)
311         ldr     r12,[r14,#HI]   @ K[i].hi
313         adds    r3,r3,r9
314         ldr     r7,[sp,#24+0]   @ d.lo
315         adc     r4,r4,r10               @ T += Ch(e,f,g)
316         ldr     r8,[sp,#24+4]   @ d.hi
317         adds    r3,r3,r11
318         and     r9,r11,#0xff
319         adc     r4,r4,r12               @ T += K[i]
320         adds    r7,r7,r3
321         ldr     r11,[sp,#8+0]   @ b.lo
322         adc     r8,r8,r4                @ d += T
323         teq     r9,#23
325         ldr     r12,[sp,#16+0]  @ c.lo
326         orreq   r14,r14,#1
327         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
328         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
329         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
330         mov     r9,r5,lsr#28
331         mov     r10,r6,lsr#28
332         eor     r9,r9,r6,lsl#4
333         eor     r10,r10,r5,lsl#4
334         eor     r9,r9,r6,lsr#2
335         eor     r10,r10,r5,lsr#2
336         eor     r9,r9,r5,lsl#30
337         eor     r10,r10,r6,lsl#30
338         eor     r9,r9,r6,lsr#7
339         eor     r10,r10,r5,lsr#7
340         eor     r9,r9,r5,lsl#25
341         eor     r10,r10,r6,lsl#25       @ Sigma0(a)
342         adds    r3,r3,r9
343         and     r9,r5,r11
344         adc     r4,r4,r10               @ T += Sigma0(a)
346         ldr     r10,[sp,#8+4]   @ b.hi
347         orr     r5,r5,r11
348         ldr     r11,[sp,#16+4]  @ c.hi
349         and     r5,r5,r12
350         and     r12,r6,r10
351         orr     r6,r6,r10
352         orr     r5,r5,r9                @ Maj(a,b,c).lo
353         and     r6,r6,r11
354         adds    r5,r5,r3
355         orr     r6,r6,r12               @ Maj(a,b,c).hi
356         sub     sp,sp,#8
357         adc     r6,r6,r4                @ h += T
358         tst     r14,#1
359         add     r14,r14,#8
360         ldreq   r9,[sp,#184+0]
361         ldreq   r10,[sp,#184+4]
362         beq     .L16_79
363         bic     r14,r14,#1
365         ldr     r3,[sp,#8+0]
366         ldr     r4,[sp,#8+4]
367         ldr     r9, [r0,#0+LO]
368         ldr     r10, [r0,#0+HI]
369         ldr     r11, [r0,#8+LO]
370         ldr     r12, [r0,#8+HI]
371         adds    r9,r5,r9
372         str     r9, [r0,#0+LO]
373         adc     r10,r6,r10
374         str     r10, [r0,#0+HI]
375         adds    r11,r3,r11
376         str     r11, [r0,#8+LO]
377         adc     r12,r4,r12
378         str     r12, [r0,#8+HI]
380         ldr     r5,[sp,#16+0]
381         ldr     r6,[sp,#16+4]
382         ldr     r3,[sp,#24+0]
383         ldr     r4,[sp,#24+4]
384         ldr     r9, [r0,#16+LO]
385         ldr     r10, [r0,#16+HI]
386         ldr     r11, [r0,#24+LO]
387         ldr     r12, [r0,#24+HI]
388         adds    r9,r5,r9
389         str     r9, [r0,#16+LO]
390         adc     r10,r6,r10
391         str     r10, [r0,#16+HI]
392         adds    r11,r3,r11
393         str     r11, [r0,#24+LO]
394         adc     r12,r4,r12
395         str     r12, [r0,#24+HI]
397         ldr     r3,[sp,#40+0]
398         ldr     r4,[sp,#40+4]
399         ldr     r9, [r0,#32+LO]
400         ldr     r10, [r0,#32+HI]
401         ldr     r11, [r0,#40+LO]
402         ldr     r12, [r0,#40+HI]
403         adds    r7,r7,r9
404         str     r7,[r0,#32+LO]
405         adc     r8,r8,r10
406         str     r8,[r0,#32+HI]
407         adds    r11,r3,r11
408         str     r11, [r0,#40+LO]
409         adc     r12,r4,r12
410         str     r12, [r0,#40+HI]
412         ldr     r5,[sp,#48+0]
413         ldr     r6,[sp,#48+4]
414         ldr     r3,[sp,#56+0]
415         ldr     r4,[sp,#56+4]
416         ldr     r9, [r0,#48+LO]
417         ldr     r10, [r0,#48+HI]
418         ldr     r11, [r0,#56+LO]
419         ldr     r12, [r0,#56+HI]
420         adds    r9,r5,r9
421         str     r9, [r0,#48+LO]
422         adc     r10,r6,r10
423         str     r10, [r0,#48+HI]
424         adds    r11,r3,r11
425         str     r11, [r0,#56+LO]
426         adc     r12,r4,r12
427         str     r12, [r0,#56+HI]
429         add     sp,sp,#640
430         sub     r14,r14,#640
432         teq     r1,r2
433         bne     .Loop
435         add     sp,sp,#8*9              @ destroy frame
436 #if __ARM_ARCH__>=5
437         ldmia   sp!,{r4-r12,pc}
438 #else
439         ldmia   sp!,{r4-r12,lr}
440         tst     lr,#1
441         moveq   pc,lr                   @ be binary compatible with V4, yet
442         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
443 #endif
444 #if __ARM_MAX_ARCH__>=7
445 .arch   armv7-a
446 .fpu    neon
448 .align  4
449 .LNEON:
450         dmb                             @ errata #451034 on early Cortex A8
451         vstmdb  sp!,{d8-d15}            @ ABI specification says so
452         sub     r3,r3,#672              @ K512
453         vldmia  r0,{d16-d23}            @ load context
454 .Loop_neon:
455         vshr.u64        d24,d20,#14     @ 0
456 #if 0<16
457         vld1.64         {d0},[r1]!      @ handles unaligned
458 #endif
459         vshr.u64        d25,d20,#18
460 #if 0>0
461          vadd.i64       d16,d30                 @ h+=Maj from the past
462 #endif
463         vshr.u64        d26,d20,#41
464         vld1.64         {d28},[r3,:64]! @ K[i++]
465         vsli.64         d24,d20,#50
466         vsli.64         d25,d20,#46
467         vmov            d29,d20
468         vsli.64         d26,d20,#23
469 #if 0<16 && defined(__ARMEL__)
470         vrev64.8        d0,d0
471 #endif
472         veor            d25,d24
473         vbsl            d29,d21,d22             @ Ch(e,f,g)
474         vshr.u64        d24,d16,#28
475         veor            d26,d25                 @ Sigma1(e)
476         vadd.i64        d27,d29,d23
477         vshr.u64        d25,d16,#34
478         vsli.64         d24,d16,#36
479         vadd.i64        d27,d26
480         vshr.u64        d26,d16,#39
481         vadd.i64        d28,d0
482         vsli.64         d25,d16,#30
483         veor            d30,d16,d17
484         vsli.64         d26,d16,#25
485         veor            d23,d24,d25
486         vadd.i64        d27,d28
487         vbsl            d30,d18,d17             @ Maj(a,b,c)
488         veor            d23,d26                 @ Sigma0(a)
489         vadd.i64        d19,d27
490         vadd.i64        d30,d27
491         @ vadd.i64      d23,d30
492         vshr.u64        d24,d19,#14     @ 1
493 #if 1<16
494         vld1.64         {d1},[r1]!      @ handles unaligned
495 #endif
496         vshr.u64        d25,d19,#18
497 #if 1>0
498          vadd.i64       d23,d30                 @ h+=Maj from the past
499 #endif
500         vshr.u64        d26,d19,#41
501         vld1.64         {d28},[r3,:64]! @ K[i++]
502         vsli.64         d24,d19,#50
503         vsli.64         d25,d19,#46
504         vmov            d29,d19
505         vsli.64         d26,d19,#23
506 #if 1<16 && defined(__ARMEL__)
507         vrev64.8        d1,d1
508 #endif
509         veor            d25,d24
510         vbsl            d29,d20,d21             @ Ch(e,f,g)
511         vshr.u64        d24,d23,#28
512         veor            d26,d25                 @ Sigma1(e)
513         vadd.i64        d27,d29,d22
514         vshr.u64        d25,d23,#34
515         vsli.64         d24,d23,#36
516         vadd.i64        d27,d26
517         vshr.u64        d26,d23,#39
518         vadd.i64        d28,d1
519         vsli.64         d25,d23,#30
520         veor            d30,d23,d16
521         vsli.64         d26,d23,#25
522         veor            d22,d24,d25
523         vadd.i64        d27,d28
524         vbsl            d30,d17,d16             @ Maj(a,b,c)
525         veor            d22,d26                 @ Sigma0(a)
526         vadd.i64        d18,d27
527         vadd.i64        d30,d27
528         @ vadd.i64      d22,d30
529         vshr.u64        d24,d18,#14     @ 2
530 #if 2<16
531         vld1.64         {d2},[r1]!      @ handles unaligned
532 #endif
533         vshr.u64        d25,d18,#18
534 #if 2>0
535          vadd.i64       d22,d30                 @ h+=Maj from the past
536 #endif
537         vshr.u64        d26,d18,#41
538         vld1.64         {d28},[r3,:64]! @ K[i++]
539         vsli.64         d24,d18,#50
540         vsli.64         d25,d18,#46
541         vmov            d29,d18
542         vsli.64         d26,d18,#23
543 #if 2<16 && defined(__ARMEL__)
544         vrev64.8        d2,d2
545 #endif
546         veor            d25,d24
547         vbsl            d29,d19,d20             @ Ch(e,f,g)
548         vshr.u64        d24,d22,#28
549         veor            d26,d25                 @ Sigma1(e)
550         vadd.i64        d27,d29,d21
551         vshr.u64        d25,d22,#34
552         vsli.64         d24,d22,#36
553         vadd.i64        d27,d26
554         vshr.u64        d26,d22,#39
555         vadd.i64        d28,d2
556         vsli.64         d25,d22,#30
557         veor            d30,d22,d23
558         vsli.64         d26,d22,#25
559         veor            d21,d24,d25
560         vadd.i64        d27,d28
561         vbsl            d30,d16,d23             @ Maj(a,b,c)
562         veor            d21,d26                 @ Sigma0(a)
563         vadd.i64        d17,d27
564         vadd.i64        d30,d27
565         @ vadd.i64      d21,d30
566         vshr.u64        d24,d17,#14     @ 3
567 #if 3<16
568         vld1.64         {d3},[r1]!      @ handles unaligned
569 #endif
570         vshr.u64        d25,d17,#18
571 #if 3>0
572          vadd.i64       d21,d30                 @ h+=Maj from the past
573 #endif
574         vshr.u64        d26,d17,#41
575         vld1.64         {d28},[r3,:64]! @ K[i++]
576         vsli.64         d24,d17,#50
577         vsli.64         d25,d17,#46
578         vmov            d29,d17
579         vsli.64         d26,d17,#23
580 #if 3<16 && defined(__ARMEL__)
581         vrev64.8        d3,d3
582 #endif
583         veor            d25,d24
584         vbsl            d29,d18,d19             @ Ch(e,f,g)
585         vshr.u64        d24,d21,#28
586         veor            d26,d25                 @ Sigma1(e)
587         vadd.i64        d27,d29,d20
588         vshr.u64        d25,d21,#34
589         vsli.64         d24,d21,#36
590         vadd.i64        d27,d26
591         vshr.u64        d26,d21,#39
592         vadd.i64        d28,d3
593         vsli.64         d25,d21,#30
594         veor            d30,d21,d22
595         vsli.64         d26,d21,#25
596         veor            d20,d24,d25
597         vadd.i64        d27,d28
598         vbsl            d30,d23,d22             @ Maj(a,b,c)
599         veor            d20,d26                 @ Sigma0(a)
600         vadd.i64        d16,d27
601         vadd.i64        d30,d27
602         @ vadd.i64      d20,d30
603         vshr.u64        d24,d16,#14     @ 4
604 #if 4<16
605         vld1.64         {d4},[r1]!      @ handles unaligned
606 #endif
607         vshr.u64        d25,d16,#18
608 #if 4>0
609          vadd.i64       d20,d30                 @ h+=Maj from the past
610 #endif
611         vshr.u64        d26,d16,#41
612         vld1.64         {d28},[r3,:64]! @ K[i++]
613         vsli.64         d24,d16,#50
614         vsli.64         d25,d16,#46
615         vmov            d29,d16
616         vsli.64         d26,d16,#23
617 #if 4<16 && defined(__ARMEL__)
618         vrev64.8        d4,d4
619 #endif
620         veor            d25,d24
621         vbsl            d29,d17,d18             @ Ch(e,f,g)
622         vshr.u64        d24,d20,#28
623         veor            d26,d25                 @ Sigma1(e)
624         vadd.i64        d27,d29,d19
625         vshr.u64        d25,d20,#34
626         vsli.64         d24,d20,#36
627         vadd.i64        d27,d26
628         vshr.u64        d26,d20,#39
629         vadd.i64        d28,d4
630         vsli.64         d25,d20,#30
631         veor            d30,d20,d21
632         vsli.64         d26,d20,#25
633         veor            d19,d24,d25
634         vadd.i64        d27,d28
635         vbsl            d30,d22,d21             @ Maj(a,b,c)
636         veor            d19,d26                 @ Sigma0(a)
637         vadd.i64        d23,d27
638         vadd.i64        d30,d27
639         @ vadd.i64      d19,d30
640         vshr.u64        d24,d23,#14     @ 5
641 #if 5<16
642         vld1.64         {d5},[r1]!      @ handles unaligned
643 #endif
644         vshr.u64        d25,d23,#18
645 #if 5>0
646          vadd.i64       d19,d30                 @ h+=Maj from the past
647 #endif
648         vshr.u64        d26,d23,#41
649         vld1.64         {d28},[r3,:64]! @ K[i++]
650         vsli.64         d24,d23,#50
651         vsli.64         d25,d23,#46
652         vmov            d29,d23
653         vsli.64         d26,d23,#23
654 #if 5<16 && defined(__ARMEL__)
655         vrev64.8        d5,d5
656 #endif
657         veor            d25,d24
658         vbsl            d29,d16,d17             @ Ch(e,f,g)
659         vshr.u64        d24,d19,#28
660         veor            d26,d25                 @ Sigma1(e)
661         vadd.i64        d27,d29,d18
662         vshr.u64        d25,d19,#34
663         vsli.64         d24,d19,#36
664         vadd.i64        d27,d26
665         vshr.u64        d26,d19,#39
666         vadd.i64        d28,d5
667         vsli.64         d25,d19,#30
668         veor            d30,d19,d20
669         vsli.64         d26,d19,#25
670         veor            d18,d24,d25
671         vadd.i64        d27,d28
672         vbsl            d30,d21,d20             @ Maj(a,b,c)
673         veor            d18,d26                 @ Sigma0(a)
674         vadd.i64        d22,d27
675         vadd.i64        d30,d27
676         @ vadd.i64      d18,d30
677         vshr.u64        d24,d22,#14     @ 6
678 #if 6<16
679         vld1.64         {d6},[r1]!      @ handles unaligned
680 #endif
681         vshr.u64        d25,d22,#18
682 #if 6>0
683          vadd.i64       d18,d30                 @ h+=Maj from the past
684 #endif
685         vshr.u64        d26,d22,#41
686         vld1.64         {d28},[r3,:64]! @ K[i++]
687         vsli.64         d24,d22,#50
688         vsli.64         d25,d22,#46
689         vmov            d29,d22
690         vsli.64         d26,d22,#23
691 #if 6<16 && defined(__ARMEL__)
692         vrev64.8        d6,d6
693 #endif
694         veor            d25,d24
695         vbsl            d29,d23,d16             @ Ch(e,f,g)
696         vshr.u64        d24,d18,#28
697         veor            d26,d25                 @ Sigma1(e)
698         vadd.i64        d27,d29,d17
699         vshr.u64        d25,d18,#34
700         vsli.64         d24,d18,#36
701         vadd.i64        d27,d26
702         vshr.u64        d26,d18,#39
703         vadd.i64        d28,d6
704         vsli.64         d25,d18,#30
705         veor            d30,d18,d19
706         vsli.64         d26,d18,#25
707         veor            d17,d24,d25
708         vadd.i64        d27,d28
709         vbsl            d30,d20,d19             @ Maj(a,b,c)
710         veor            d17,d26                 @ Sigma0(a)
711         vadd.i64        d21,d27
712         vadd.i64        d30,d27
713         @ vadd.i64      d17,d30
714         vshr.u64        d24,d21,#14     @ 7
715 #if 7<16
716         vld1.64         {d7},[r1]!      @ handles unaligned
717 #endif
718         vshr.u64        d25,d21,#18
719 #if 7>0
720          vadd.i64       d17,d30                 @ h+=Maj from the past
721 #endif
722         vshr.u64        d26,d21,#41
723         vld1.64         {d28},[r3,:64]! @ K[i++]
724         vsli.64         d24,d21,#50
725         vsli.64         d25,d21,#46
726         vmov            d29,d21
727         vsli.64         d26,d21,#23
728 #if 7<16 && defined(__ARMEL__)
729         vrev64.8        d7,d7
730 #endif
731         veor            d25,d24
732         vbsl            d29,d22,d23             @ Ch(e,f,g)
733         vshr.u64        d24,d17,#28
734         veor            d26,d25                 @ Sigma1(e)
735         vadd.i64        d27,d29,d16
736         vshr.u64        d25,d17,#34
737         vsli.64         d24,d17,#36
738         vadd.i64        d27,d26
739         vshr.u64        d26,d17,#39
740         vadd.i64        d28,d7
741         vsli.64         d25,d17,#30
742         veor            d30,d17,d18
743         vsli.64         d26,d17,#25
744         veor            d16,d24,d25
745         vadd.i64        d27,d28
746         vbsl            d30,d19,d18             @ Maj(a,b,c)
747         veor            d16,d26                 @ Sigma0(a)
748         vadd.i64        d20,d27
749         vadd.i64        d30,d27
750         @ vadd.i64      d16,d30
751         vshr.u64        d24,d20,#14     @ 8
752 #if 8<16
753         vld1.64         {d8},[r1]!      @ handles unaligned
754 #endif
755         vshr.u64        d25,d20,#18
756 #if 8>0
757          vadd.i64       d16,d30                 @ h+=Maj from the past
758 #endif
759         vshr.u64        d26,d20,#41
760         vld1.64         {d28},[r3,:64]! @ K[i++]
761         vsli.64         d24,d20,#50
762         vsli.64         d25,d20,#46
763         vmov            d29,d20
764         vsli.64         d26,d20,#23
765 #if 8<16 && defined(__ARMEL__)
766         vrev64.8        d8,d8
767 #endif
768         veor            d25,d24
769         vbsl            d29,d21,d22             @ Ch(e,f,g)
770         vshr.u64        d24,d16,#28
771         veor            d26,d25                 @ Sigma1(e)
772         vadd.i64        d27,d29,d23
773         vshr.u64        d25,d16,#34
774         vsli.64         d24,d16,#36
775         vadd.i64        d27,d26
776         vshr.u64        d26,d16,#39
777         vadd.i64        d28,d8
778         vsli.64         d25,d16,#30
779         veor            d30,d16,d17
780         vsli.64         d26,d16,#25
781         veor            d23,d24,d25
782         vadd.i64        d27,d28
783         vbsl            d30,d18,d17             @ Maj(a,b,c)
784         veor            d23,d26                 @ Sigma0(a)
785         vadd.i64        d19,d27
786         vadd.i64        d30,d27
787         @ vadd.i64      d23,d30
788         vshr.u64        d24,d19,#14     @ 9
789 #if 9<16
790         vld1.64         {d9},[r1]!      @ handles unaligned
791 #endif
792         vshr.u64        d25,d19,#18
793 #if 9>0
794          vadd.i64       d23,d30                 @ h+=Maj from the past
795 #endif
796         vshr.u64        d26,d19,#41
797         vld1.64         {d28},[r3,:64]! @ K[i++]
798         vsli.64         d24,d19,#50
799         vsli.64         d25,d19,#46
800         vmov            d29,d19
801         vsli.64         d26,d19,#23
802 #if 9<16 && defined(__ARMEL__)
803         vrev64.8        d9,d9
804 #endif
805         veor            d25,d24
806         vbsl            d29,d20,d21             @ Ch(e,f,g)
807         vshr.u64        d24,d23,#28
808         veor            d26,d25                 @ Sigma1(e)
809         vadd.i64        d27,d29,d22
810         vshr.u64        d25,d23,#34
811         vsli.64         d24,d23,#36
812         vadd.i64        d27,d26
813         vshr.u64        d26,d23,#39
814         vadd.i64        d28,d9
815         vsli.64         d25,d23,#30
816         veor            d30,d23,d16
817         vsli.64         d26,d23,#25
818         veor            d22,d24,d25
819         vadd.i64        d27,d28
820         vbsl            d30,d17,d16             @ Maj(a,b,c)
821         veor            d22,d26                 @ Sigma0(a)
822         vadd.i64        d18,d27
823         vadd.i64        d30,d27
824         @ vadd.i64      d22,d30
825         vshr.u64        d24,d18,#14     @ 10
826 #if 10<16
827         vld1.64         {d10},[r1]!     @ handles unaligned
828 #endif
829         vshr.u64        d25,d18,#18
830 #if 10>0
831          vadd.i64       d22,d30                 @ h+=Maj from the past
832 #endif
833         vshr.u64        d26,d18,#41
834         vld1.64         {d28},[r3,:64]! @ K[i++]
835         vsli.64         d24,d18,#50
836         vsli.64         d25,d18,#46
837         vmov            d29,d18
838         vsli.64         d26,d18,#23
839 #if 10<16 && defined(__ARMEL__)
840         vrev64.8        d10,d10
841 #endif
842         veor            d25,d24
843         vbsl            d29,d19,d20             @ Ch(e,f,g)
844         vshr.u64        d24,d22,#28
845         veor            d26,d25                 @ Sigma1(e)
846         vadd.i64        d27,d29,d21
847         vshr.u64        d25,d22,#34
848         vsli.64         d24,d22,#36
849         vadd.i64        d27,d26
850         vshr.u64        d26,d22,#39
851         vadd.i64        d28,d10
852         vsli.64         d25,d22,#30
853         veor            d30,d22,d23
854         vsli.64         d26,d22,#25
855         veor            d21,d24,d25
856         vadd.i64        d27,d28
857         vbsl            d30,d16,d23             @ Maj(a,b,c)
858         veor            d21,d26                 @ Sigma0(a)
859         vadd.i64        d17,d27
860         vadd.i64        d30,d27
861         @ vadd.i64      d21,d30
862         vshr.u64        d24,d17,#14     @ 11
863 #if 11<16
864         vld1.64         {d11},[r1]!     @ handles unaligned
865 #endif
866         vshr.u64        d25,d17,#18
867 #if 11>0
868          vadd.i64       d21,d30                 @ h+=Maj from the past
869 #endif
870         vshr.u64        d26,d17,#41
871         vld1.64         {d28},[r3,:64]! @ K[i++]
872         vsli.64         d24,d17,#50
873         vsli.64         d25,d17,#46
874         vmov            d29,d17
875         vsli.64         d26,d17,#23
876 #if 11<16 && defined(__ARMEL__)
877         vrev64.8        d11,d11
878 #endif
879         veor            d25,d24
880         vbsl            d29,d18,d19             @ Ch(e,f,g)
881         vshr.u64        d24,d21,#28
882         veor            d26,d25                 @ Sigma1(e)
883         vadd.i64        d27,d29,d20
884         vshr.u64        d25,d21,#34
885         vsli.64         d24,d21,#36
886         vadd.i64        d27,d26
887         vshr.u64        d26,d21,#39
888         vadd.i64        d28,d11
889         vsli.64         d25,d21,#30
890         veor            d30,d21,d22
891         vsli.64         d26,d21,#25
892         veor            d20,d24,d25
893         vadd.i64        d27,d28
894         vbsl            d30,d23,d22             @ Maj(a,b,c)
895         veor            d20,d26                 @ Sigma0(a)
896         vadd.i64        d16,d27
897         vadd.i64        d30,d27
898         @ vadd.i64      d20,d30
899         vshr.u64        d24,d16,#14     @ 12
900 #if 12<16
901         vld1.64         {d12},[r1]!     @ handles unaligned
902 #endif
903         vshr.u64        d25,d16,#18
904 #if 12>0
905          vadd.i64       d20,d30                 @ h+=Maj from the past
906 #endif
907         vshr.u64        d26,d16,#41
908         vld1.64         {d28},[r3,:64]! @ K[i++]
909         vsli.64         d24,d16,#50
910         vsli.64         d25,d16,#46
911         vmov            d29,d16
912         vsli.64         d26,d16,#23
913 #if 12<16 && defined(__ARMEL__)
914         vrev64.8        d12,d12
915 #endif
916         veor            d25,d24
917         vbsl            d29,d17,d18             @ Ch(e,f,g)
918         vshr.u64        d24,d20,#28
919         veor            d26,d25                 @ Sigma1(e)
920         vadd.i64        d27,d29,d19
921         vshr.u64        d25,d20,#34
922         vsli.64         d24,d20,#36
923         vadd.i64        d27,d26
924         vshr.u64        d26,d20,#39
925         vadd.i64        d28,d12
926         vsli.64         d25,d20,#30
927         veor            d30,d20,d21
928         vsli.64         d26,d20,#25
929         veor            d19,d24,d25
930         vadd.i64        d27,d28
931         vbsl            d30,d22,d21             @ Maj(a,b,c)
932         veor            d19,d26                 @ Sigma0(a)
933         vadd.i64        d23,d27
934         vadd.i64        d30,d27
935         @ vadd.i64      d19,d30
936         vshr.u64        d24,d23,#14     @ 13
937 #if 13<16
938         vld1.64         {d13},[r1]!     @ handles unaligned
939 #endif
940         vshr.u64        d25,d23,#18
941 #if 13>0
942          vadd.i64       d19,d30                 @ h+=Maj from the past
943 #endif
944         vshr.u64        d26,d23,#41
945         vld1.64         {d28},[r3,:64]! @ K[i++]
946         vsli.64         d24,d23,#50
947         vsli.64         d25,d23,#46
948         vmov            d29,d23
949         vsli.64         d26,d23,#23
950 #if 13<16 && defined(__ARMEL__)
951         vrev64.8        d13,d13
952 #endif
953         veor            d25,d24
954         vbsl            d29,d16,d17             @ Ch(e,f,g)
955         vshr.u64        d24,d19,#28
956         veor            d26,d25                 @ Sigma1(e)
957         vadd.i64        d27,d29,d18
958         vshr.u64        d25,d19,#34
959         vsli.64         d24,d19,#36
960         vadd.i64        d27,d26
961         vshr.u64        d26,d19,#39
962         vadd.i64        d28,d13
963         vsli.64         d25,d19,#30
964         veor            d30,d19,d20
965         vsli.64         d26,d19,#25
966         veor            d18,d24,d25
967         vadd.i64        d27,d28
968         vbsl            d30,d21,d20             @ Maj(a,b,c)
969         veor            d18,d26                 @ Sigma0(a)
970         vadd.i64        d22,d27
971         vadd.i64        d30,d27
972         @ vadd.i64      d18,d30
973         vshr.u64        d24,d22,#14     @ 14
974 #if 14<16
975         vld1.64         {d14},[r1]!     @ handles unaligned
976 #endif
977         vshr.u64        d25,d22,#18
978 #if 14>0
979          vadd.i64       d18,d30                 @ h+=Maj from the past
980 #endif
981         vshr.u64        d26,d22,#41
982         vld1.64         {d28},[r3,:64]! @ K[i++]
983         vsli.64         d24,d22,#50
984         vsli.64         d25,d22,#46
985         vmov            d29,d22
986         vsli.64         d26,d22,#23
987 #if 14<16 && defined(__ARMEL__)
988         vrev64.8        d14,d14
989 #endif
990         veor            d25,d24
991         vbsl            d29,d23,d16             @ Ch(e,f,g)
992         vshr.u64        d24,d18,#28
993         veor            d26,d25                 @ Sigma1(e)
994         vadd.i64        d27,d29,d17
995         vshr.u64        d25,d18,#34
996         vsli.64         d24,d18,#36
997         vadd.i64        d27,d26
998         vshr.u64        d26,d18,#39
999         vadd.i64        d28,d14
1000         vsli.64         d25,d18,#30
1001         veor            d30,d18,d19
1002         vsli.64         d26,d18,#25
1003         veor            d17,d24,d25
1004         vadd.i64        d27,d28
1005         vbsl            d30,d20,d19             @ Maj(a,b,c)
1006         veor            d17,d26                 @ Sigma0(a)
1007         vadd.i64        d21,d27
1008         vadd.i64        d30,d27
1009         @ vadd.i64      d17,d30
1010         vshr.u64        d24,d21,#14     @ 15
1011 #if 15<16
1012         vld1.64         {d15},[r1]!     @ handles unaligned
1013 #endif
1014         vshr.u64        d25,d21,#18
1015 #if 15>0
1016          vadd.i64       d17,d30                 @ h+=Maj from the past
1017 #endif
1018         vshr.u64        d26,d21,#41
1019         vld1.64         {d28},[r3,:64]! @ K[i++]
1020         vsli.64         d24,d21,#50
1021         vsli.64         d25,d21,#46
1022         vmov            d29,d21
1023         vsli.64         d26,d21,#23
1024 #if 15<16 && defined(__ARMEL__)
1025         vrev64.8        d15,d15
1026 #endif
1027         veor            d25,d24
1028         vbsl            d29,d22,d23             @ Ch(e,f,g)
1029         vshr.u64        d24,d17,#28
1030         veor            d26,d25                 @ Sigma1(e)
1031         vadd.i64        d27,d29,d16
1032         vshr.u64        d25,d17,#34
1033         vsli.64         d24,d17,#36
1034         vadd.i64        d27,d26
1035         vshr.u64        d26,d17,#39
1036         vadd.i64        d28,d15
1037         vsli.64         d25,d17,#30
1038         veor            d30,d17,d18
1039         vsli.64         d26,d17,#25
1040         veor            d16,d24,d25
1041         vadd.i64        d27,d28
1042         vbsl            d30,d19,d18             @ Maj(a,b,c)
1043         veor            d16,d26                 @ Sigma0(a)
1044         vadd.i64        d20,d27
1045         vadd.i64        d30,d27
1046         @ vadd.i64      d16,d30
1047         mov             r12,#4
1048 .L16_79_neon:
1049         subs            r12,#1
1050         vshr.u64        q12,q7,#19
1051         vshr.u64        q13,q7,#61
1052          vadd.i64       d16,d30                 @ h+=Maj from the past
1053         vshr.u64        q15,q7,#6
1054         vsli.64         q12,q7,#45
1055         vext.8          q14,q0,q1,#8    @ X[i+1]
1056         vsli.64         q13,q7,#3
1057         veor            q15,q12
1058         vshr.u64        q12,q14,#1
1059         veor            q15,q13                         @ sigma1(X[i+14])
1060         vshr.u64        q13,q14,#8
1061         vadd.i64        q0,q15
1062         vshr.u64        q15,q14,#7
1063         vsli.64         q12,q14,#63
1064         vsli.64         q13,q14,#56
1065         vext.8          q14,q4,q5,#8    @ X[i+9]
1066         veor            q15,q12
1067         vshr.u64        d24,d20,#14             @ from NEON_00_15
1068         vadd.i64        q0,q14
1069         vshr.u64        d25,d20,#18             @ from NEON_00_15
1070         veor            q15,q13                         @ sigma0(X[i+1])
1071         vshr.u64        d26,d20,#41             @ from NEON_00_15
1072         vadd.i64        q0,q15
1073         vld1.64         {d28},[r3,:64]! @ K[i++]
1074         vsli.64         d24,d20,#50
1075         vsli.64         d25,d20,#46
1076         vmov            d29,d20
1077         vsli.64         d26,d20,#23
1078 #if 16<16 && defined(__ARMEL__)
1079         vrev64.8        ,
1080 #endif
1081         veor            d25,d24
1082         vbsl            d29,d21,d22             @ Ch(e,f,g)
1083         vshr.u64        d24,d16,#28
1084         veor            d26,d25                 @ Sigma1(e)
1085         vadd.i64        d27,d29,d23
1086         vshr.u64        d25,d16,#34
1087         vsli.64         d24,d16,#36
1088         vadd.i64        d27,d26
1089         vshr.u64        d26,d16,#39
1090         vadd.i64        d28,d0
1091         vsli.64         d25,d16,#30
1092         veor            d30,d16,d17
1093         vsli.64         d26,d16,#25
1094         veor            d23,d24,d25
1095         vadd.i64        d27,d28
1096         vbsl            d30,d18,d17             @ Maj(a,b,c)
1097         veor            d23,d26                 @ Sigma0(a)
1098         vadd.i64        d19,d27
1099         vadd.i64        d30,d27
1100         @ vadd.i64      d23,d30
1101         vshr.u64        d24,d19,#14     @ 17
1102 #if 17<16
1103         vld1.64         {d1},[r1]!      @ handles unaligned
1104 #endif
1105         vshr.u64        d25,d19,#18
1106 #if 17>0
1107          vadd.i64       d23,d30                 @ h+=Maj from the past
1108 #endif
1109         vshr.u64        d26,d19,#41
1110         vld1.64         {d28},[r3,:64]! @ K[i++]
1111         vsli.64         d24,d19,#50
1112         vsli.64         d25,d19,#46
1113         vmov            d29,d19
1114         vsli.64         d26,d19,#23
1115 #if 17<16 && defined(__ARMEL__)
1116         vrev64.8        ,
1117 #endif
1118         veor            d25,d24
1119         vbsl            d29,d20,d21             @ Ch(e,f,g)
1120         vshr.u64        d24,d23,#28
1121         veor            d26,d25                 @ Sigma1(e)
1122         vadd.i64        d27,d29,d22
1123         vshr.u64        d25,d23,#34
1124         vsli.64         d24,d23,#36
1125         vadd.i64        d27,d26
1126         vshr.u64        d26,d23,#39
1127         vadd.i64        d28,d1
1128         vsli.64         d25,d23,#30
1129         veor            d30,d23,d16
1130         vsli.64         d26,d23,#25
1131         veor            d22,d24,d25
1132         vadd.i64        d27,d28
1133         vbsl            d30,d17,d16             @ Maj(a,b,c)
1134         veor            d22,d26                 @ Sigma0(a)
1135         vadd.i64        d18,d27
1136         vadd.i64        d30,d27
1137         @ vadd.i64      d22,d30
1138         vshr.u64        q12,q0,#19
1139         vshr.u64        q13,q0,#61
1140          vadd.i64       d22,d30                 @ h+=Maj from the past
1141         vshr.u64        q15,q0,#6
1142         vsli.64         q12,q0,#45
1143         vext.8          q14,q1,q2,#8    @ X[i+1]
1144         vsli.64         q13,q0,#3
1145         veor            q15,q12
1146         vshr.u64        q12,q14,#1
1147         veor            q15,q13                         @ sigma1(X[i+14])
1148         vshr.u64        q13,q14,#8
1149         vadd.i64        q1,q15
1150         vshr.u64        q15,q14,#7
1151         vsli.64         q12,q14,#63
1152         vsli.64         q13,q14,#56
1153         vext.8          q14,q5,q6,#8    @ X[i+9]
1154         veor            q15,q12
1155         vshr.u64        d24,d18,#14             @ from NEON_00_15
1156         vadd.i64        q1,q14
1157         vshr.u64        d25,d18,#18             @ from NEON_00_15
1158         veor            q15,q13                         @ sigma0(X[i+1])
1159         vshr.u64        d26,d18,#41             @ from NEON_00_15
1160         vadd.i64        q1,q15
1161         vld1.64         {d28},[r3,:64]! @ K[i++]
1162         vsli.64         d24,d18,#50
1163         vsli.64         d25,d18,#46
1164         vmov            d29,d18
1165         vsli.64         d26,d18,#23
1166 #if 18<16 && defined(__ARMEL__)
1167         vrev64.8        ,
1168 #endif
1169         veor            d25,d24
1170         vbsl            d29,d19,d20             @ Ch(e,f,g)
1171         vshr.u64        d24,d22,#28
1172         veor            d26,d25                 @ Sigma1(e)
1173         vadd.i64        d27,d29,d21
1174         vshr.u64        d25,d22,#34
1175         vsli.64         d24,d22,#36
1176         vadd.i64        d27,d26
1177         vshr.u64        d26,d22,#39
1178         vadd.i64        d28,d2
1179         vsli.64         d25,d22,#30
1180         veor            d30,d22,d23
1181         vsli.64         d26,d22,#25
1182         veor            d21,d24,d25
1183         vadd.i64        d27,d28
1184         vbsl            d30,d16,d23             @ Maj(a,b,c)
1185         veor            d21,d26                 @ Sigma0(a)
1186         vadd.i64        d17,d27
1187         vadd.i64        d30,d27
1188         @ vadd.i64      d21,d30
1189         vshr.u64        d24,d17,#14     @ 19
1190 #if 19<16
1191         vld1.64         {d3},[r1]!      @ handles unaligned
1192 #endif
1193         vshr.u64        d25,d17,#18
1194 #if 19>0
1195          vadd.i64       d21,d30                 @ h+=Maj from the past
1196 #endif
1197         vshr.u64        d26,d17,#41
1198         vld1.64         {d28},[r3,:64]! @ K[i++]
1199         vsli.64         d24,d17,#50
1200         vsli.64         d25,d17,#46
1201         vmov            d29,d17
1202         vsli.64         d26,d17,#23
1203 #if 19<16 && defined(__ARMEL__)
1204         vrev64.8        ,
1205 #endif
1206         veor            d25,d24
1207         vbsl            d29,d18,d19             @ Ch(e,f,g)
1208         vshr.u64        d24,d21,#28
1209         veor            d26,d25                 @ Sigma1(e)
1210         vadd.i64        d27,d29,d20
1211         vshr.u64        d25,d21,#34
1212         vsli.64         d24,d21,#36
1213         vadd.i64        d27,d26
1214         vshr.u64        d26,d21,#39
1215         vadd.i64        d28,d3
1216         vsli.64         d25,d21,#30
1217         veor            d30,d21,d22
1218         vsli.64         d26,d21,#25
1219         veor            d20,d24,d25
1220         vadd.i64        d27,d28
1221         vbsl            d30,d23,d22             @ Maj(a,b,c)
1222         veor            d20,d26                 @ Sigma0(a)
1223         vadd.i64        d16,d27
1224         vadd.i64        d30,d27
1225         @ vadd.i64      d20,d30
1226         vshr.u64        q12,q1,#19
1227         vshr.u64        q13,q1,#61
1228          vadd.i64       d20,d30                 @ h+=Maj from the past
1229         vshr.u64        q15,q1,#6
1230         vsli.64         q12,q1,#45
1231         vext.8          q14,q2,q3,#8    @ X[i+1]
1232         vsli.64         q13,q1,#3
1233         veor            q15,q12
1234         vshr.u64        q12,q14,#1
1235         veor            q15,q13                         @ sigma1(X[i+14])
1236         vshr.u64        q13,q14,#8
1237         vadd.i64        q2,q15
1238         vshr.u64        q15,q14,#7
1239         vsli.64         q12,q14,#63
1240         vsli.64         q13,q14,#56
1241         vext.8          q14,q6,q7,#8    @ X[i+9]
1242         veor            q15,q12
1243         vshr.u64        d24,d16,#14             @ from NEON_00_15
1244         vadd.i64        q2,q14
1245         vshr.u64        d25,d16,#18             @ from NEON_00_15
1246         veor            q15,q13                         @ sigma0(X[i+1])
1247         vshr.u64        d26,d16,#41             @ from NEON_00_15
1248         vadd.i64        q2,q15
1249         vld1.64         {d28},[r3,:64]! @ K[i++]
1250         vsli.64         d24,d16,#50
1251         vsli.64         d25,d16,#46
1252         vmov            d29,d16
1253         vsli.64         d26,d16,#23
1254 #if 20<16 && defined(__ARMEL__)
1255         vrev64.8        ,
1256 #endif
1257         veor            d25,d24
1258         vbsl            d29,d17,d18             @ Ch(e,f,g)
1259         vshr.u64        d24,d20,#28
1260         veor            d26,d25                 @ Sigma1(e)
1261         vadd.i64        d27,d29,d19
1262         vshr.u64        d25,d20,#34
1263         vsli.64         d24,d20,#36
1264         vadd.i64        d27,d26
1265         vshr.u64        d26,d20,#39
1266         vadd.i64        d28,d4
1267         vsli.64         d25,d20,#30
1268         veor            d30,d20,d21
1269         vsli.64         d26,d20,#25
1270         veor            d19,d24,d25
1271         vadd.i64        d27,d28
1272         vbsl            d30,d22,d21             @ Maj(a,b,c)
1273         veor            d19,d26                 @ Sigma0(a)
1274         vadd.i64        d23,d27
1275         vadd.i64        d30,d27
1276         @ vadd.i64      d19,d30
1277         vshr.u64        d24,d23,#14     @ 21
1278 #if 21<16
1279         vld1.64         {d5},[r1]!      @ handles unaligned
1280 #endif
1281         vshr.u64        d25,d23,#18
1282 #if 21>0
1283          vadd.i64       d19,d30                 @ h+=Maj from the past
1284 #endif
1285         vshr.u64        d26,d23,#41
1286         vld1.64         {d28},[r3,:64]! @ K[i++]
1287         vsli.64         d24,d23,#50
1288         vsli.64         d25,d23,#46
1289         vmov            d29,d23
1290         vsli.64         d26,d23,#23
1291 #if 21<16 && defined(__ARMEL__)
1292         vrev64.8        ,
1293 #endif
1294         veor            d25,d24
1295         vbsl            d29,d16,d17             @ Ch(e,f,g)
1296         vshr.u64        d24,d19,#28
1297         veor            d26,d25                 @ Sigma1(e)
1298         vadd.i64        d27,d29,d18
1299         vshr.u64        d25,d19,#34
1300         vsli.64         d24,d19,#36
1301         vadd.i64        d27,d26
1302         vshr.u64        d26,d19,#39
1303         vadd.i64        d28,d5
1304         vsli.64         d25,d19,#30
1305         veor            d30,d19,d20
1306         vsli.64         d26,d19,#25
1307         veor            d18,d24,d25
1308         vadd.i64        d27,d28
1309         vbsl            d30,d21,d20             @ Maj(a,b,c)
1310         veor            d18,d26                 @ Sigma0(a)
1311         vadd.i64        d22,d27
1312         vadd.i64        d30,d27
1313         @ vadd.i64      d18,d30
1314         vshr.u64        q12,q2,#19
1315         vshr.u64        q13,q2,#61
1316          vadd.i64       d18,d30                 @ h+=Maj from the past
1317         vshr.u64        q15,q2,#6
1318         vsli.64         q12,q2,#45
1319         vext.8          q14,q3,q4,#8    @ X[i+1]
1320         vsli.64         q13,q2,#3
1321         veor            q15,q12
1322         vshr.u64        q12,q14,#1
1323         veor            q15,q13                         @ sigma1(X[i+14])
1324         vshr.u64        q13,q14,#8
1325         vadd.i64        q3,q15
1326         vshr.u64        q15,q14,#7
1327         vsli.64         q12,q14,#63
1328         vsli.64         q13,q14,#56
1329         vext.8          q14,q7,q0,#8    @ X[i+9]
1330         veor            q15,q12
1331         vshr.u64        d24,d22,#14             @ from NEON_00_15
1332         vadd.i64        q3,q14
1333         vshr.u64        d25,d22,#18             @ from NEON_00_15
1334         veor            q15,q13                         @ sigma0(X[i+1])
1335         vshr.u64        d26,d22,#41             @ from NEON_00_15
1336         vadd.i64        q3,q15
1337         vld1.64         {d28},[r3,:64]! @ K[i++]
1338         vsli.64         d24,d22,#50
1339         vsli.64         d25,d22,#46
1340         vmov            d29,d22
1341         vsli.64         d26,d22,#23
1342 #if 22<16 && defined(__ARMEL__)
1343         vrev64.8        ,
1344 #endif
1345         veor            d25,d24
1346         vbsl            d29,d23,d16             @ Ch(e,f,g)
1347         vshr.u64        d24,d18,#28
1348         veor            d26,d25                 @ Sigma1(e)
1349         vadd.i64        d27,d29,d17
1350         vshr.u64        d25,d18,#34
1351         vsli.64         d24,d18,#36
1352         vadd.i64        d27,d26
1353         vshr.u64        d26,d18,#39
1354         vadd.i64        d28,d6
1355         vsli.64         d25,d18,#30
1356         veor            d30,d18,d19
1357         vsli.64         d26,d18,#25
1358         veor            d17,d24,d25
1359         vadd.i64        d27,d28
1360         vbsl            d30,d20,d19             @ Maj(a,b,c)
1361         veor            d17,d26                 @ Sigma0(a)
1362         vadd.i64        d21,d27
1363         vadd.i64        d30,d27
1364         @ vadd.i64      d17,d30
1365         vshr.u64        d24,d21,#14     @ 23
1366 #if 23<16
1367         vld1.64         {d7},[r1]!      @ handles unaligned
1368 #endif
1369         vshr.u64        d25,d21,#18
1370 #if 23>0
1371          vadd.i64       d17,d30                 @ h+=Maj from the past
1372 #endif
1373         vshr.u64        d26,d21,#41
1374         vld1.64         {d28},[r3,:64]! @ K[i++]
1375         vsli.64         d24,d21,#50
1376         vsli.64         d25,d21,#46
1377         vmov            d29,d21
1378         vsli.64         d26,d21,#23
1379 #if 23<16 && defined(__ARMEL__)
1380         vrev64.8        ,
1381 #endif
1382         veor            d25,d24
1383         vbsl            d29,d22,d23             @ Ch(e,f,g)
1384         vshr.u64        d24,d17,#28
1385         veor            d26,d25                 @ Sigma1(e)
1386         vadd.i64        d27,d29,d16
1387         vshr.u64        d25,d17,#34
1388         vsli.64         d24,d17,#36
1389         vadd.i64        d27,d26
1390         vshr.u64        d26,d17,#39
1391         vadd.i64        d28,d7
1392         vsli.64         d25,d17,#30
1393         veor            d30,d17,d18
1394         vsli.64         d26,d17,#25
1395         veor            d16,d24,d25
1396         vadd.i64        d27,d28
1397         vbsl            d30,d19,d18             @ Maj(a,b,c)
1398         veor            d16,d26                 @ Sigma0(a)
1399         vadd.i64        d20,d27
1400         vadd.i64        d30,d27
1401         @ vadd.i64      d16,d30
1402         vshr.u64        q12,q3,#19
1403         vshr.u64        q13,q3,#61
1404          vadd.i64       d16,d30                 @ h+=Maj from the past
1405         vshr.u64        q15,q3,#6
1406         vsli.64         q12,q3,#45
1407         vext.8          q14,q4,q5,#8    @ X[i+1]
1408         vsli.64         q13,q3,#3
1409         veor            q15,q12
1410         vshr.u64        q12,q14,#1
1411         veor            q15,q13                         @ sigma1(X[i+14])
1412         vshr.u64        q13,q14,#8
1413         vadd.i64        q4,q15
1414         vshr.u64        q15,q14,#7
1415         vsli.64         q12,q14,#63
1416         vsli.64         q13,q14,#56
1417         vext.8          q14,q0,q1,#8    @ X[i+9]
1418         veor            q15,q12
1419         vshr.u64        d24,d20,#14             @ from NEON_00_15
1420         vadd.i64        q4,q14
1421         vshr.u64        d25,d20,#18             @ from NEON_00_15
1422         veor            q15,q13                         @ sigma0(X[i+1])
1423         vshr.u64        d26,d20,#41             @ from NEON_00_15
1424         vadd.i64        q4,q15
1425         vld1.64         {d28},[r3,:64]! @ K[i++]
1426         vsli.64         d24,d20,#50
1427         vsli.64         d25,d20,#46
1428         vmov            d29,d20
1429         vsli.64         d26,d20,#23
1430 #if 24<16 && defined(__ARMEL__)
1431         vrev64.8        ,
1432 #endif
1433         veor            d25,d24
1434         vbsl            d29,d21,d22             @ Ch(e,f,g)
1435         vshr.u64        d24,d16,#28
1436         veor            d26,d25                 @ Sigma1(e)
1437         vadd.i64        d27,d29,d23
1438         vshr.u64        d25,d16,#34
1439         vsli.64         d24,d16,#36
1440         vadd.i64        d27,d26
1441         vshr.u64        d26,d16,#39
1442         vadd.i64        d28,d8
1443         vsli.64         d25,d16,#30
1444         veor            d30,d16,d17
1445         vsli.64         d26,d16,#25
1446         veor            d23,d24,d25
1447         vadd.i64        d27,d28
1448         vbsl            d30,d18,d17             @ Maj(a,b,c)
1449         veor            d23,d26                 @ Sigma0(a)
1450         vadd.i64        d19,d27
1451         vadd.i64        d30,d27
1452         @ vadd.i64      d23,d30
1453         vshr.u64        d24,d19,#14     @ 25
1454 #if 25<16
1455         vld1.64         {d9},[r1]!      @ handles unaligned
1456 #endif
1457         vshr.u64        d25,d19,#18
1458 #if 25>0
1459          vadd.i64       d23,d30                 @ h+=Maj from the past
1460 #endif
1461         vshr.u64        d26,d19,#41
1462         vld1.64         {d28},[r3,:64]! @ K[i++]
1463         vsli.64         d24,d19,#50
1464         vsli.64         d25,d19,#46
1465         vmov            d29,d19
1466         vsli.64         d26,d19,#23
1467 #if 25<16 && defined(__ARMEL__)
1468         vrev64.8        ,
1469 #endif
1470         veor            d25,d24
1471         vbsl            d29,d20,d21             @ Ch(e,f,g)
1472         vshr.u64        d24,d23,#28
1473         veor            d26,d25                 @ Sigma1(e)
1474         vadd.i64        d27,d29,d22
1475         vshr.u64        d25,d23,#34
1476         vsli.64         d24,d23,#36
1477         vadd.i64        d27,d26
1478         vshr.u64        d26,d23,#39
1479         vadd.i64        d28,d9
1480         vsli.64         d25,d23,#30
1481         veor            d30,d23,d16
1482         vsli.64         d26,d23,#25
1483         veor            d22,d24,d25
1484         vadd.i64        d27,d28
1485         vbsl            d30,d17,d16             @ Maj(a,b,c)
1486         veor            d22,d26                 @ Sigma0(a)
1487         vadd.i64        d18,d27
1488         vadd.i64        d30,d27
1489         @ vadd.i64      d22,d30
1490         vshr.u64        q12,q4,#19
1491         vshr.u64        q13,q4,#61
1492          vadd.i64       d22,d30                 @ h+=Maj from the past
1493         vshr.u64        q15,q4,#6
1494         vsli.64         q12,q4,#45
1495         vext.8          q14,q5,q6,#8    @ X[i+1]
1496         vsli.64         q13,q4,#3
1497         veor            q15,q12
1498         vshr.u64        q12,q14,#1
1499         veor            q15,q13                         @ sigma1(X[i+14])
1500         vshr.u64        q13,q14,#8
1501         vadd.i64        q5,q15
1502         vshr.u64        q15,q14,#7
1503         vsli.64         q12,q14,#63
1504         vsli.64         q13,q14,#56
1505         vext.8          q14,q1,q2,#8    @ X[i+9]
1506         veor            q15,q12
1507         vshr.u64        d24,d18,#14             @ from NEON_00_15
1508         vadd.i64        q5,q14
1509         vshr.u64        d25,d18,#18             @ from NEON_00_15
1510         veor            q15,q13                         @ sigma0(X[i+1])
1511         vshr.u64        d26,d18,#41             @ from NEON_00_15
1512         vadd.i64        q5,q15
1513         vld1.64         {d28},[r3,:64]! @ K[i++]
1514         vsli.64         d24,d18,#50
1515         vsli.64         d25,d18,#46
1516         vmov            d29,d18
1517         vsli.64         d26,d18,#23
1518 #if 26<16 && defined(__ARMEL__)
1519         vrev64.8        ,
1520 #endif
1521         veor            d25,d24
1522         vbsl            d29,d19,d20             @ Ch(e,f,g)
1523         vshr.u64        d24,d22,#28
1524         veor            d26,d25                 @ Sigma1(e)
1525         vadd.i64        d27,d29,d21
1526         vshr.u64        d25,d22,#34
1527         vsli.64         d24,d22,#36
1528         vadd.i64        d27,d26
1529         vshr.u64        d26,d22,#39
1530         vadd.i64        d28,d10
1531         vsli.64         d25,d22,#30
1532         veor            d30,d22,d23
1533         vsli.64         d26,d22,#25
1534         veor            d21,d24,d25
1535         vadd.i64        d27,d28
1536         vbsl            d30,d16,d23             @ Maj(a,b,c)
1537         veor            d21,d26                 @ Sigma0(a)
1538         vadd.i64        d17,d27
1539         vadd.i64        d30,d27
1540         @ vadd.i64      d21,d30
1541         vshr.u64        d24,d17,#14     @ 27
1542 #if 27<16
1543         vld1.64         {d11},[r1]!     @ handles unaligned
1544 #endif
1545         vshr.u64        d25,d17,#18
1546 #if 27>0
1547          vadd.i64       d21,d30                 @ h+=Maj from the past
1548 #endif
1549         vshr.u64        d26,d17,#41
1550         vld1.64         {d28},[r3,:64]! @ K[i++]
1551         vsli.64         d24,d17,#50
1552         vsli.64         d25,d17,#46
1553         vmov            d29,d17
1554         vsli.64         d26,d17,#23
1555 #if 27<16 && defined(__ARMEL__)
1556         vrev64.8        ,
1557 #endif
1558         veor            d25,d24
1559         vbsl            d29,d18,d19             @ Ch(e,f,g)
1560         vshr.u64        d24,d21,#28
1561         veor            d26,d25                 @ Sigma1(e)
1562         vadd.i64        d27,d29,d20
1563         vshr.u64        d25,d21,#34
1564         vsli.64         d24,d21,#36
1565         vadd.i64        d27,d26
1566         vshr.u64        d26,d21,#39
1567         vadd.i64        d28,d11
1568         vsli.64         d25,d21,#30
1569         veor            d30,d21,d22
1570         vsli.64         d26,d21,#25
1571         veor            d20,d24,d25
1572         vadd.i64        d27,d28
1573         vbsl            d30,d23,d22             @ Maj(a,b,c)
1574         veor            d20,d26                 @ Sigma0(a)
1575         vadd.i64        d16,d27
1576         vadd.i64        d30,d27
1577         @ vadd.i64      d20,d30
1578         vshr.u64        q12,q5,#19
1579         vshr.u64        q13,q5,#61
1580          vadd.i64       d20,d30                 @ h+=Maj from the past
1581         vshr.u64        q15,q5,#6
1582         vsli.64         q12,q5,#45
1583         vext.8          q14,q6,q7,#8    @ X[i+1]
1584         vsli.64         q13,q5,#3
1585         veor            q15,q12
1586         vshr.u64        q12,q14,#1
1587         veor            q15,q13                         @ sigma1(X[i+14])
1588         vshr.u64        q13,q14,#8
1589         vadd.i64        q6,q15
1590         vshr.u64        q15,q14,#7
1591         vsli.64         q12,q14,#63
1592         vsli.64         q13,q14,#56
1593         vext.8          q14,q2,q3,#8    @ X[i+9]
1594         veor            q15,q12
1595         vshr.u64        d24,d16,#14             @ from NEON_00_15
1596         vadd.i64        q6,q14
1597         vshr.u64        d25,d16,#18             @ from NEON_00_15
1598         veor            q15,q13                         @ sigma0(X[i+1])
1599         vshr.u64        d26,d16,#41             @ from NEON_00_15
1600         vadd.i64        q6,q15
1601         vld1.64         {d28},[r3,:64]! @ K[i++]
1602         vsli.64         d24,d16,#50
1603         vsli.64         d25,d16,#46
1604         vmov            d29,d16
1605         vsli.64         d26,d16,#23
1606 #if 28<16 && defined(__ARMEL__)
1607         vrev64.8        ,
1608 #endif
1609         veor            d25,d24
1610         vbsl            d29,d17,d18             @ Ch(e,f,g)
1611         vshr.u64        d24,d20,#28
1612         veor            d26,d25                 @ Sigma1(e)
1613         vadd.i64        d27,d29,d19
1614         vshr.u64        d25,d20,#34
1615         vsli.64         d24,d20,#36
1616         vadd.i64        d27,d26
1617         vshr.u64        d26,d20,#39
1618         vadd.i64        d28,d12
1619         vsli.64         d25,d20,#30
1620         veor            d30,d20,d21
1621         vsli.64         d26,d20,#25
1622         veor            d19,d24,d25
1623         vadd.i64        d27,d28
1624         vbsl            d30,d22,d21             @ Maj(a,b,c)
1625         veor            d19,d26                 @ Sigma0(a)
1626         vadd.i64        d23,d27
1627         vadd.i64        d30,d27
1628         @ vadd.i64      d19,d30
1629         vshr.u64        d24,d23,#14     @ 29
1630 #if 29<16
1631         vld1.64         {d13},[r1]!     @ handles unaligned
1632 #endif
1633         vshr.u64        d25,d23,#18
1634 #if 29>0
1635          vadd.i64       d19,d30                 @ h+=Maj from the past
1636 #endif
1637         vshr.u64        d26,d23,#41
1638         vld1.64         {d28},[r3,:64]! @ K[i++]
1639         vsli.64         d24,d23,#50
1640         vsli.64         d25,d23,#46
1641         vmov            d29,d23
1642         vsli.64         d26,d23,#23
1643 #if 29<16 && defined(__ARMEL__)
1644         vrev64.8        ,
1645 #endif
1646         veor            d25,d24
1647         vbsl            d29,d16,d17             @ Ch(e,f,g)
1648         vshr.u64        d24,d19,#28
1649         veor            d26,d25                 @ Sigma1(e)
1650         vadd.i64        d27,d29,d18
1651         vshr.u64        d25,d19,#34
1652         vsli.64         d24,d19,#36
1653         vadd.i64        d27,d26
1654         vshr.u64        d26,d19,#39
1655         vadd.i64        d28,d13
1656         vsli.64         d25,d19,#30
1657         veor            d30,d19,d20
1658         vsli.64         d26,d19,#25
1659         veor            d18,d24,d25
1660         vadd.i64        d27,d28
1661         vbsl            d30,d21,d20             @ Maj(a,b,c)
1662         veor            d18,d26                 @ Sigma0(a)
1663         vadd.i64        d22,d27
1664         vadd.i64        d30,d27
1665         @ vadd.i64      d18,d30
1666         vshr.u64        q12,q6,#19
1667         vshr.u64        q13,q6,#61
1668          vadd.i64       d18,d30                 @ h+=Maj from the past
1669         vshr.u64        q15,q6,#6
1670         vsli.64         q12,q6,#45
1671         vext.8          q14,q7,q0,#8    @ X[i+1]
1672         vsli.64         q13,q6,#3
1673         veor            q15,q12
1674         vshr.u64        q12,q14,#1
1675         veor            q15,q13                         @ sigma1(X[i+14])
1676         vshr.u64        q13,q14,#8
1677         vadd.i64        q7,q15
1678         vshr.u64        q15,q14,#7
1679         vsli.64         q12,q14,#63
1680         vsli.64         q13,q14,#56
1681         vext.8          q14,q3,q4,#8    @ X[i+9]
1682         veor            q15,q12
1683         vshr.u64        d24,d22,#14             @ from NEON_00_15
1684         vadd.i64        q7,q14
1685         vshr.u64        d25,d22,#18             @ from NEON_00_15
1686         veor            q15,q13                         @ sigma0(X[i+1])
1687         vshr.u64        d26,d22,#41             @ from NEON_00_15
1688         vadd.i64        q7,q15
1689         vld1.64         {d28},[r3,:64]! @ K[i++]
1690         vsli.64         d24,d22,#50
1691         vsli.64         d25,d22,#46
1692         vmov            d29,d22
1693         vsli.64         d26,d22,#23
1694 #if 30<16 && defined(__ARMEL__)
1695         vrev64.8        ,
1696 #endif
1697         veor            d25,d24
1698         vbsl            d29,d23,d16             @ Ch(e,f,g)
1699         vshr.u64        d24,d18,#28
1700         veor            d26,d25                 @ Sigma1(e)
1701         vadd.i64        d27,d29,d17
1702         vshr.u64        d25,d18,#34
1703         vsli.64         d24,d18,#36
1704         vadd.i64        d27,d26
1705         vshr.u64        d26,d18,#39
1706         vadd.i64        d28,d14
1707         vsli.64         d25,d18,#30
1708         veor            d30,d18,d19
1709         vsli.64         d26,d18,#25
1710         veor            d17,d24,d25
1711         vadd.i64        d27,d28
1712         vbsl            d30,d20,d19             @ Maj(a,b,c)
1713         veor            d17,d26                 @ Sigma0(a)
1714         vadd.i64        d21,d27
1715         vadd.i64        d30,d27
1716         @ vadd.i64      d17,d30
1717         vshr.u64        d24,d21,#14     @ 31
1718 #if 31<16
1719         vld1.64         {d15},[r1]!     @ handles unaligned
1720 #endif
1721         vshr.u64        d25,d21,#18
1722 #if 31>0
1723          vadd.i64       d17,d30                 @ h+=Maj from the past
1724 #endif
1725         vshr.u64        d26,d21,#41
1726         vld1.64         {d28},[r3,:64]! @ K[i++]
1727         vsli.64         d24,d21,#50
1728         vsli.64         d25,d21,#46
1729         vmov            d29,d21
1730         vsli.64         d26,d21,#23
1731 #if 31<16 && defined(__ARMEL__)
1732         vrev64.8        ,
1733 #endif
1734         veor            d25,d24
1735         vbsl            d29,d22,d23             @ Ch(e,f,g)
1736         vshr.u64        d24,d17,#28
1737         veor            d26,d25                 @ Sigma1(e)
1738         vadd.i64        d27,d29,d16
1739         vshr.u64        d25,d17,#34
1740         vsli.64         d24,d17,#36
1741         vadd.i64        d27,d26
1742         vshr.u64        d26,d17,#39
1743         vadd.i64        d28,d15
1744         vsli.64         d25,d17,#30
1745         veor            d30,d17,d18
1746         vsli.64         d26,d17,#25
1747         veor            d16,d24,d25
1748         vadd.i64        d27,d28
1749         vbsl            d30,d19,d18             @ Maj(a,b,c)
1750         veor            d16,d26                 @ Sigma0(a)
1751         vadd.i64        d20,d27
1752         vadd.i64        d30,d27
1753         @ vadd.i64      d16,d30
1754         bne             .L16_79_neon
1756          vadd.i64       d16,d30         @ h+=Maj from the past
1757         vldmia          r0,{d24-d31}    @ load context to temp
1758         vadd.i64        q8,q12          @ vectorized accumulate
1759         vadd.i64        q9,q13
1760         vadd.i64        q10,q14
1761         vadd.i64        q11,q15
1762         vstmia          r0,{d16-d23}    @ save context
1763         teq             r1,r2
1764         sub             r3,#640 @ rewind K512
1765         bne             .Loop_neon
1767         vldmia  sp!,{d8-d15}            @ epilogue
1768         bx      lr                              @ .word 0xe12fff1e
1769 #endif
1770 .size   sha512_block_data_order,.-sha512_block_data_order
1771 .asciz  "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
1772 .align  2
1773 #if __ARM_MAX_ARCH__>=7
1774 .comm   OPENSSL_armcap_P,4,4
1775 .hidden OPENSSL_armcap_P
1776 #endif