etc/services - sync with NetBSD-8
[minix.git] / crypto / external / bsd / openssl / lib / libcrypto / arch / arm / sha256-armv4.S
blob1fc5daf5f12ffce7d143f0069cbf578638007393
1 #include "arm_arch.h"
2 #include "arm_asm.h"
4 .text
5 .code   32
7 .type   K256,%object
8 .align  5
9 K256:
10 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
11 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
12 .word   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
13 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
14 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
15 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
16 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
17 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
18 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
19 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
20 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
21 .word   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
22 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
23 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
24 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
25 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
26 .size   K256,.-K256
27 .word   0                               @ terminator
28 #if __ARM_MAX_ARCH__>=7
29 .LOPENSSL_armcap:
30 .word   OPENSSL_armcap_P-sha256_block_data_order
31 #endif
32 .align  5
34 .global sha256_block_data_order
35 .type   sha256_block_data_order,%function
36 sha256_block_data_order:
37         sub     r3,pc,#8                @ sha256_block_data_order
38         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
39 #if __ARM_MAX_ARCH__>=7
40         ldr     r12,.LOPENSSL_armcap
41         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
42         tst     r12,#ARMV8_SHA256
43         bne     .LARMv8
44         tst     r12,#ARMV7_NEON
45         bne     .LNEON
46 #endif
47         stmdb   sp!,{r0,r1,r2,r4-r11,lr}
48         ldmia   r0,{r4,r5,r6,r7,r8,r9,r10,r11}
49         sub     r14,r3,#256+32  @ K256
50         sub     sp,sp,#16*4             @ alloca(X[16])
51 .Loop:
52 # if __ARM_ARCH__>=7
53         ldr     r2,[r1],#4
54 # else
55         ldrb    r2,[r1,#3]
56 # endif
57         eor     r3,r5,r6                @ magic
58         eor     r12,r12,r12
59 #if __ARM_ARCH__>=7
60         @ ldr   r2,[r1],#4                      @ 0
61 # if 0==15
62         str     r1,[sp,#17*4]                   @ make room for r1
63 # endif
64         eor     r0,r8,r8,ror#5
65         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
66         eor     r0,r0,r8,ror#19 @ Sigma1(e)
67         rev     r2,r2
68 #else
69         @ ldrb  r2,[r1,#3]                      @ 0
70         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
71         ldrb    r12,[r1,#2]
72         ldrb    r0,[r1,#1]
73         orr     r2,r2,r12,lsl#8
74         ldrb    r12,[r1],#4
75         orr     r2,r2,r0,lsl#16
76 # if 0==15
77         str     r1,[sp,#17*4]                   @ make room for r1
78 # endif
79         eor     r0,r8,r8,ror#5
80         orr     r2,r2,r12,lsl#24
81         eor     r0,r0,r8,ror#19 @ Sigma1(e)
82 #endif
83         ldr     r12,[r14],#4                    @ *K256++
84         add     r11,r11,r2                      @ h+=X[i]
85         str     r2,[sp,#0*4]
86         eor     r2,r9,r10
87         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
88         and     r2,r2,r8
89         add     r11,r11,r12                     @ h+=K256[i]
90         eor     r2,r2,r10                       @ Ch(e,f,g)
91         eor     r0,r4,r4,ror#11
92         add     r11,r11,r2                      @ h+=Ch(e,f,g)
93 #if 0==31
94         and     r12,r12,#0xff
95         cmp     r12,#0xf2                       @ done?
96 #endif
97 #if 0<15
98 # if __ARM_ARCH__>=7
99         ldr     r2,[r1],#4                      @ prefetch
100 # else
101         ldrb    r2,[r1,#3]
102 # endif
103         eor     r12,r4,r5                       @ a^b, b^c in next round
104 #else
105         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
106         eor     r12,r4,r5                       @ a^b, b^c in next round
107         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
108 #endif
109         eor     r0,r0,r4,ror#20 @ Sigma0(a)
110         and     r3,r3,r12                       @ (b^c)&=(a^b)
111         add     r7,r7,r11                       @ d+=h
112         eor     r3,r3,r5                        @ Maj(a,b,c)
113         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
114         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
115 #if __ARM_ARCH__>=7
116         @ ldr   r2,[r1],#4                      @ 1
117 # if 1==15
118         str     r1,[sp,#17*4]                   @ make room for r1
119 # endif
120         eor     r0,r7,r7,ror#5
121         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
122         eor     r0,r0,r7,ror#19 @ Sigma1(e)
123         rev     r2,r2
124 #else
125         @ ldrb  r2,[r1,#3]                      @ 1
126         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
127         ldrb    r3,[r1,#2]
128         ldrb    r0,[r1,#1]
129         orr     r2,r2,r3,lsl#8
130         ldrb    r3,[r1],#4
131         orr     r2,r2,r0,lsl#16
132 # if 1==15
133         str     r1,[sp,#17*4]                   @ make room for r1
134 # endif
135         eor     r0,r7,r7,ror#5
136         orr     r2,r2,r3,lsl#24
137         eor     r0,r0,r7,ror#19 @ Sigma1(e)
138 #endif
139         ldr     r3,[r14],#4                     @ *K256++
140         add     r10,r10,r2                      @ h+=X[i]
141         str     r2,[sp,#1*4]
142         eor     r2,r8,r9
143         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
144         and     r2,r2,r7
145         add     r10,r10,r3                      @ h+=K256[i]
146         eor     r2,r2,r9                        @ Ch(e,f,g)
147         eor     r0,r11,r11,ror#11
148         add     r10,r10,r2                      @ h+=Ch(e,f,g)
149 #if 1==31
150         and     r3,r3,#0xff
151         cmp     r3,#0xf2                        @ done?
152 #endif
153 #if 1<15
154 # if __ARM_ARCH__>=7
155         ldr     r2,[r1],#4                      @ prefetch
156 # else
157         ldrb    r2,[r1,#3]
158 # endif
159         eor     r3,r11,r4                       @ a^b, b^c in next round
160 #else
161         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
162         eor     r3,r11,r4                       @ a^b, b^c in next round
163         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
164 #endif
165         eor     r0,r0,r11,ror#20        @ Sigma0(a)
166         and     r12,r12,r3                      @ (b^c)&=(a^b)
167         add     r6,r6,r10                       @ d+=h
168         eor     r12,r12,r4                      @ Maj(a,b,c)
169         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
170         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
171 #if __ARM_ARCH__>=7
172         @ ldr   r2,[r1],#4                      @ 2
173 # if 2==15
174         str     r1,[sp,#17*4]                   @ make room for r1
175 # endif
176         eor     r0,r6,r6,ror#5
177         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
178         eor     r0,r0,r6,ror#19 @ Sigma1(e)
179         rev     r2,r2
180 #else
181         @ ldrb  r2,[r1,#3]                      @ 2
182         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
183         ldrb    r12,[r1,#2]
184         ldrb    r0,[r1,#1]
185         orr     r2,r2,r12,lsl#8
186         ldrb    r12,[r1],#4
187         orr     r2,r2,r0,lsl#16
188 # if 2==15
189         str     r1,[sp,#17*4]                   @ make room for r1
190 # endif
191         eor     r0,r6,r6,ror#5
192         orr     r2,r2,r12,lsl#24
193         eor     r0,r0,r6,ror#19 @ Sigma1(e)
194 #endif
195         ldr     r12,[r14],#4                    @ *K256++
196         add     r9,r9,r2                        @ h+=X[i]
197         str     r2,[sp,#2*4]
198         eor     r2,r7,r8
199         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
200         and     r2,r2,r6
201         add     r9,r9,r12                       @ h+=K256[i]
202         eor     r2,r2,r8                        @ Ch(e,f,g)
203         eor     r0,r10,r10,ror#11
204         add     r9,r9,r2                        @ h+=Ch(e,f,g)
205 #if 2==31
206         and     r12,r12,#0xff
207         cmp     r12,#0xf2                       @ done?
208 #endif
209 #if 2<15
210 # if __ARM_ARCH__>=7
211         ldr     r2,[r1],#4                      @ prefetch
212 # else
213         ldrb    r2,[r1,#3]
214 # endif
215         eor     r12,r10,r11                     @ a^b, b^c in next round
216 #else
217         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
218         eor     r12,r10,r11                     @ a^b, b^c in next round
219         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
220 #endif
221         eor     r0,r0,r10,ror#20        @ Sigma0(a)
222         and     r3,r3,r12                       @ (b^c)&=(a^b)
223         add     r5,r5,r9                        @ d+=h
224         eor     r3,r3,r11                       @ Maj(a,b,c)
225         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
226         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
227 #if __ARM_ARCH__>=7
228         @ ldr   r2,[r1],#4                      @ 3
229 # if 3==15
230         str     r1,[sp,#17*4]                   @ make room for r1
231 # endif
232         eor     r0,r5,r5,ror#5
233         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
234         eor     r0,r0,r5,ror#19 @ Sigma1(e)
235         rev     r2,r2
236 #else
237         @ ldrb  r2,[r1,#3]                      @ 3
238         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
239         ldrb    r3,[r1,#2]
240         ldrb    r0,[r1,#1]
241         orr     r2,r2,r3,lsl#8
242         ldrb    r3,[r1],#4
243         orr     r2,r2,r0,lsl#16
244 # if 3==15
245         str     r1,[sp,#17*4]                   @ make room for r1
246 # endif
247         eor     r0,r5,r5,ror#5
248         orr     r2,r2,r3,lsl#24
249         eor     r0,r0,r5,ror#19 @ Sigma1(e)
250 #endif
251         ldr     r3,[r14],#4                     @ *K256++
252         add     r8,r8,r2                        @ h+=X[i]
253         str     r2,[sp,#3*4]
254         eor     r2,r6,r7
255         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
256         and     r2,r2,r5
257         add     r8,r8,r3                        @ h+=K256[i]
258         eor     r2,r2,r7                        @ Ch(e,f,g)
259         eor     r0,r9,r9,ror#11
260         add     r8,r8,r2                        @ h+=Ch(e,f,g)
261 #if 3==31
262         and     r3,r3,#0xff
263         cmp     r3,#0xf2                        @ done?
264 #endif
265 #if 3<15
266 # if __ARM_ARCH__>=7
267         ldr     r2,[r1],#4                      @ prefetch
268 # else
269         ldrb    r2,[r1,#3]
270 # endif
271         eor     r3,r9,r10                       @ a^b, b^c in next round
272 #else
273         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
274         eor     r3,r9,r10                       @ a^b, b^c in next round
275         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
276 #endif
277         eor     r0,r0,r9,ror#20 @ Sigma0(a)
278         and     r12,r12,r3                      @ (b^c)&=(a^b)
279         add     r4,r4,r8                        @ d+=h
280         eor     r12,r12,r10                     @ Maj(a,b,c)
281         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
282         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
283 #if __ARM_ARCH__>=7
284         @ ldr   r2,[r1],#4                      @ 4
285 # if 4==15
286         str     r1,[sp,#17*4]                   @ make room for r1
287 # endif
288         eor     r0,r4,r4,ror#5
289         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
290         eor     r0,r0,r4,ror#19 @ Sigma1(e)
291         rev     r2,r2
292 #else
293         @ ldrb  r2,[r1,#3]                      @ 4
294         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
295         ldrb    r12,[r1,#2]
296         ldrb    r0,[r1,#1]
297         orr     r2,r2,r12,lsl#8
298         ldrb    r12,[r1],#4
299         orr     r2,r2,r0,lsl#16
300 # if 4==15
301         str     r1,[sp,#17*4]                   @ make room for r1
302 # endif
303         eor     r0,r4,r4,ror#5
304         orr     r2,r2,r12,lsl#24
305         eor     r0,r0,r4,ror#19 @ Sigma1(e)
306 #endif
307         ldr     r12,[r14],#4                    @ *K256++
308         add     r7,r7,r2                        @ h+=X[i]
309         str     r2,[sp,#4*4]
310         eor     r2,r5,r6
311         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
312         and     r2,r2,r4
313         add     r7,r7,r12                       @ h+=K256[i]
314         eor     r2,r2,r6                        @ Ch(e,f,g)
315         eor     r0,r8,r8,ror#11
316         add     r7,r7,r2                        @ h+=Ch(e,f,g)
317 #if 4==31
318         and     r12,r12,#0xff
319         cmp     r12,#0xf2                       @ done?
320 #endif
321 #if 4<15
322 # if __ARM_ARCH__>=7
323         ldr     r2,[r1],#4                      @ prefetch
324 # else
325         ldrb    r2,[r1,#3]
326 # endif
327         eor     r12,r8,r9                       @ a^b, b^c in next round
328 #else
329         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
330         eor     r12,r8,r9                       @ a^b, b^c in next round
331         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
332 #endif
333         eor     r0,r0,r8,ror#20 @ Sigma0(a)
334         and     r3,r3,r12                       @ (b^c)&=(a^b)
335         add     r11,r11,r7                      @ d+=h
336         eor     r3,r3,r9                        @ Maj(a,b,c)
337         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
338         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
339 #if __ARM_ARCH__>=7
340         @ ldr   r2,[r1],#4                      @ 5
341 # if 5==15
342         str     r1,[sp,#17*4]                   @ make room for r1
343 # endif
344         eor     r0,r11,r11,ror#5
345         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
346         eor     r0,r0,r11,ror#19        @ Sigma1(e)
347         rev     r2,r2
348 #else
349         @ ldrb  r2,[r1,#3]                      @ 5
350         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
351         ldrb    r3,[r1,#2]
352         ldrb    r0,[r1,#1]
353         orr     r2,r2,r3,lsl#8
354         ldrb    r3,[r1],#4
355         orr     r2,r2,r0,lsl#16
356 # if 5==15
357         str     r1,[sp,#17*4]                   @ make room for r1
358 # endif
359         eor     r0,r11,r11,ror#5
360         orr     r2,r2,r3,lsl#24
361         eor     r0,r0,r11,ror#19        @ Sigma1(e)
362 #endif
363         ldr     r3,[r14],#4                     @ *K256++
364         add     r6,r6,r2                        @ h+=X[i]
365         str     r2,[sp,#5*4]
366         eor     r2,r4,r5
367         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
368         and     r2,r2,r11
369         add     r6,r6,r3                        @ h+=K256[i]
370         eor     r2,r2,r5                        @ Ch(e,f,g)
371         eor     r0,r7,r7,ror#11
372         add     r6,r6,r2                        @ h+=Ch(e,f,g)
373 #if 5==31
374         and     r3,r3,#0xff
375         cmp     r3,#0xf2                        @ done?
376 #endif
377 #if 5<15
378 # if __ARM_ARCH__>=7
379         ldr     r2,[r1],#4                      @ prefetch
380 # else
381         ldrb    r2,[r1,#3]
382 # endif
383         eor     r3,r7,r8                        @ a^b, b^c in next round
384 #else
385         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
386         eor     r3,r7,r8                        @ a^b, b^c in next round
387         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
388 #endif
389         eor     r0,r0,r7,ror#20 @ Sigma0(a)
390         and     r12,r12,r3                      @ (b^c)&=(a^b)
391         add     r10,r10,r6                      @ d+=h
392         eor     r12,r12,r8                      @ Maj(a,b,c)
393         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
394         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
395 #if __ARM_ARCH__>=7
396         @ ldr   r2,[r1],#4                      @ 6
397 # if 6==15
398         str     r1,[sp,#17*4]                   @ make room for r1
399 # endif
400         eor     r0,r10,r10,ror#5
401         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
402         eor     r0,r0,r10,ror#19        @ Sigma1(e)
403         rev     r2,r2
404 #else
405         @ ldrb  r2,[r1,#3]                      @ 6
406         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
407         ldrb    r12,[r1,#2]
408         ldrb    r0,[r1,#1]
409         orr     r2,r2,r12,lsl#8
410         ldrb    r12,[r1],#4
411         orr     r2,r2,r0,lsl#16
412 # if 6==15
413         str     r1,[sp,#17*4]                   @ make room for r1
414 # endif
415         eor     r0,r10,r10,ror#5
416         orr     r2,r2,r12,lsl#24
417         eor     r0,r0,r10,ror#19        @ Sigma1(e)
418 #endif
419         ldr     r12,[r14],#4                    @ *K256++
420         add     r5,r5,r2                        @ h+=X[i]
421         str     r2,[sp,#6*4]
422         eor     r2,r11,r4
423         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
424         and     r2,r2,r10
425         add     r5,r5,r12                       @ h+=K256[i]
426         eor     r2,r2,r4                        @ Ch(e,f,g)
427         eor     r0,r6,r6,ror#11
428         add     r5,r5,r2                        @ h+=Ch(e,f,g)
429 #if 6==31
430         and     r12,r12,#0xff
431         cmp     r12,#0xf2                       @ done?
432 #endif
433 #if 6<15
434 # if __ARM_ARCH__>=7
435         ldr     r2,[r1],#4                      @ prefetch
436 # else
437         ldrb    r2,[r1,#3]
438 # endif
439         eor     r12,r6,r7                       @ a^b, b^c in next round
440 #else
441         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
442         eor     r12,r6,r7                       @ a^b, b^c in next round
443         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
444 #endif
445         eor     r0,r0,r6,ror#20 @ Sigma0(a)
446         and     r3,r3,r12                       @ (b^c)&=(a^b)
447         add     r9,r9,r5                        @ d+=h
448         eor     r3,r3,r7                        @ Maj(a,b,c)
449         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
450         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
451 #if __ARM_ARCH__>=7
452         @ ldr   r2,[r1],#4                      @ 7
453 # if 7==15
454         str     r1,[sp,#17*4]                   @ make room for r1
455 # endif
456         eor     r0,r9,r9,ror#5
457         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
458         eor     r0,r0,r9,ror#19 @ Sigma1(e)
459         rev     r2,r2
460 #else
461         @ ldrb  r2,[r1,#3]                      @ 7
462         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
463         ldrb    r3,[r1,#2]
464         ldrb    r0,[r1,#1]
465         orr     r2,r2,r3,lsl#8
466         ldrb    r3,[r1],#4
467         orr     r2,r2,r0,lsl#16
468 # if 7==15
469         str     r1,[sp,#17*4]                   @ make room for r1
470 # endif
471         eor     r0,r9,r9,ror#5
472         orr     r2,r2,r3,lsl#24
473         eor     r0,r0,r9,ror#19 @ Sigma1(e)
474 #endif
475         ldr     r3,[r14],#4                     @ *K256++
476         add     r4,r4,r2                        @ h+=X[i]
477         str     r2,[sp,#7*4]
478         eor     r2,r10,r11
479         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
480         and     r2,r2,r9
481         add     r4,r4,r3                        @ h+=K256[i]
482         eor     r2,r2,r11                       @ Ch(e,f,g)
483         eor     r0,r5,r5,ror#11
484         add     r4,r4,r2                        @ h+=Ch(e,f,g)
485 #if 7==31
486         and     r3,r3,#0xff
487         cmp     r3,#0xf2                        @ done?
488 #endif
489 #if 7<15
490 # if __ARM_ARCH__>=7
491         ldr     r2,[r1],#4                      @ prefetch
492 # else
493         ldrb    r2,[r1,#3]
494 # endif
495         eor     r3,r5,r6                        @ a^b, b^c in next round
496 #else
497         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
498         eor     r3,r5,r6                        @ a^b, b^c in next round
499         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
500 #endif
501         eor     r0,r0,r5,ror#20 @ Sigma0(a)
502         and     r12,r12,r3                      @ (b^c)&=(a^b)
503         add     r8,r8,r4                        @ d+=h
504         eor     r12,r12,r6                      @ Maj(a,b,c)
505         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
506         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
507 #if __ARM_ARCH__>=7
508         @ ldr   r2,[r1],#4                      @ 8
509 # if 8==15
510         str     r1,[sp,#17*4]                   @ make room for r1
511 # endif
512         eor     r0,r8,r8,ror#5
513         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
514         eor     r0,r0,r8,ror#19 @ Sigma1(e)
515         rev     r2,r2
516 #else
517         @ ldrb  r2,[r1,#3]                      @ 8
518         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
519         ldrb    r12,[r1,#2]
520         ldrb    r0,[r1,#1]
521         orr     r2,r2,r12,lsl#8
522         ldrb    r12,[r1],#4
523         orr     r2,r2,r0,lsl#16
524 # if 8==15
525         str     r1,[sp,#17*4]                   @ make room for r1
526 # endif
527         eor     r0,r8,r8,ror#5
528         orr     r2,r2,r12,lsl#24
529         eor     r0,r0,r8,ror#19 @ Sigma1(e)
530 #endif
531         ldr     r12,[r14],#4                    @ *K256++
532         add     r11,r11,r2                      @ h+=X[i]
533         str     r2,[sp,#8*4]
534         eor     r2,r9,r10
535         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
536         and     r2,r2,r8
537         add     r11,r11,r12                     @ h+=K256[i]
538         eor     r2,r2,r10                       @ Ch(e,f,g)
539         eor     r0,r4,r4,ror#11
540         add     r11,r11,r2                      @ h+=Ch(e,f,g)
541 #if 8==31
542         and     r12,r12,#0xff
543         cmp     r12,#0xf2                       @ done?
544 #endif
545 #if 8<15
546 # if __ARM_ARCH__>=7
547         ldr     r2,[r1],#4                      @ prefetch
548 # else
549         ldrb    r2,[r1,#3]
550 # endif
551         eor     r12,r4,r5                       @ a^b, b^c in next round
552 #else
553         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
554         eor     r12,r4,r5                       @ a^b, b^c in next round
555         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
556 #endif
557         eor     r0,r0,r4,ror#20 @ Sigma0(a)
558         and     r3,r3,r12                       @ (b^c)&=(a^b)
559         add     r7,r7,r11                       @ d+=h
560         eor     r3,r3,r5                        @ Maj(a,b,c)
561         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
562         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
563 #if __ARM_ARCH__>=7
564         @ ldr   r2,[r1],#4                      @ 9
565 # if 9==15
566         str     r1,[sp,#17*4]                   @ make room for r1
567 # endif
568         eor     r0,r7,r7,ror#5
569         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
570         eor     r0,r0,r7,ror#19 @ Sigma1(e)
571         rev     r2,r2
572 #else
573         @ ldrb  r2,[r1,#3]                      @ 9
574         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
575         ldrb    r3,[r1,#2]
576         ldrb    r0,[r1,#1]
577         orr     r2,r2,r3,lsl#8
578         ldrb    r3,[r1],#4
579         orr     r2,r2,r0,lsl#16
580 # if 9==15
581         str     r1,[sp,#17*4]                   @ make room for r1
582 # endif
583         eor     r0,r7,r7,ror#5
584         orr     r2,r2,r3,lsl#24
585         eor     r0,r0,r7,ror#19 @ Sigma1(e)
586 #endif
587         ldr     r3,[r14],#4                     @ *K256++
588         add     r10,r10,r2                      @ h+=X[i]
589         str     r2,[sp,#9*4]
590         eor     r2,r8,r9
591         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
592         and     r2,r2,r7
593         add     r10,r10,r3                      @ h+=K256[i]
594         eor     r2,r2,r9                        @ Ch(e,f,g)
595         eor     r0,r11,r11,ror#11
596         add     r10,r10,r2                      @ h+=Ch(e,f,g)
597 #if 9==31
598         and     r3,r3,#0xff
599         cmp     r3,#0xf2                        @ done?
600 #endif
601 #if 9<15
602 # if __ARM_ARCH__>=7
603         ldr     r2,[r1],#4                      @ prefetch
604 # else
605         ldrb    r2,[r1,#3]
606 # endif
607         eor     r3,r11,r4                       @ a^b, b^c in next round
608 #else
609         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
610         eor     r3,r11,r4                       @ a^b, b^c in next round
611         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
612 #endif
613         eor     r0,r0,r11,ror#20        @ Sigma0(a)
614         and     r12,r12,r3                      @ (b^c)&=(a^b)
615         add     r6,r6,r10                       @ d+=h
616         eor     r12,r12,r4                      @ Maj(a,b,c)
617         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
618         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
619 #if __ARM_ARCH__>=7
620         @ ldr   r2,[r1],#4                      @ 10
621 # if 10==15
622         str     r1,[sp,#17*4]                   @ make room for r1
623 # endif
624         eor     r0,r6,r6,ror#5
625         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
626         eor     r0,r0,r6,ror#19 @ Sigma1(e)
627         rev     r2,r2
628 #else
629         @ ldrb  r2,[r1,#3]                      @ 10
630         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
631         ldrb    r12,[r1,#2]
632         ldrb    r0,[r1,#1]
633         orr     r2,r2,r12,lsl#8
634         ldrb    r12,[r1],#4
635         orr     r2,r2,r0,lsl#16
636 # if 10==15
637         str     r1,[sp,#17*4]                   @ make room for r1
638 # endif
639         eor     r0,r6,r6,ror#5
640         orr     r2,r2,r12,lsl#24
641         eor     r0,r0,r6,ror#19 @ Sigma1(e)
642 #endif
643         ldr     r12,[r14],#4                    @ *K256++
644         add     r9,r9,r2                        @ h+=X[i]
645         str     r2,[sp,#10*4]
646         eor     r2,r7,r8
647         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
648         and     r2,r2,r6
649         add     r9,r9,r12                       @ h+=K256[i]
650         eor     r2,r2,r8                        @ Ch(e,f,g)
651         eor     r0,r10,r10,ror#11
652         add     r9,r9,r2                        @ h+=Ch(e,f,g)
653 #if 10==31
654         and     r12,r12,#0xff
655         cmp     r12,#0xf2                       @ done?
656 #endif
657 #if 10<15
658 # if __ARM_ARCH__>=7
659         ldr     r2,[r1],#4                      @ prefetch
660 # else
661         ldrb    r2,[r1,#3]
662 # endif
663         eor     r12,r10,r11                     @ a^b, b^c in next round
664 #else
665         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
666         eor     r12,r10,r11                     @ a^b, b^c in next round
667         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
668 #endif
669         eor     r0,r0,r10,ror#20        @ Sigma0(a)
670         and     r3,r3,r12                       @ (b^c)&=(a^b)
671         add     r5,r5,r9                        @ d+=h
672         eor     r3,r3,r11                       @ Maj(a,b,c)
673         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
674         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
675 #if __ARM_ARCH__>=7
676         @ ldr   r2,[r1],#4                      @ 11
677 # if 11==15
678         str     r1,[sp,#17*4]                   @ make room for r1
679 # endif
680         eor     r0,r5,r5,ror#5
681         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
682         eor     r0,r0,r5,ror#19 @ Sigma1(e)
683         rev     r2,r2
684 #else
685         @ ldrb  r2,[r1,#3]                      @ 11
686         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
687         ldrb    r3,[r1,#2]
688         ldrb    r0,[r1,#1]
689         orr     r2,r2,r3,lsl#8
690         ldrb    r3,[r1],#4
691         orr     r2,r2,r0,lsl#16
692 # if 11==15
693         str     r1,[sp,#17*4]                   @ make room for r1
694 # endif
695         eor     r0,r5,r5,ror#5
696         orr     r2,r2,r3,lsl#24
697         eor     r0,r0,r5,ror#19 @ Sigma1(e)
698 #endif
699         ldr     r3,[r14],#4                     @ *K256++
700         add     r8,r8,r2                        @ h+=X[i]
701         str     r2,[sp,#11*4]
702         eor     r2,r6,r7
703         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
704         and     r2,r2,r5
705         add     r8,r8,r3                        @ h+=K256[i]
706         eor     r2,r2,r7                        @ Ch(e,f,g)
707         eor     r0,r9,r9,ror#11
708         add     r8,r8,r2                        @ h+=Ch(e,f,g)
709 #if 11==31
710         and     r3,r3,#0xff
711         cmp     r3,#0xf2                        @ done?
712 #endif
713 #if 11<15
714 # if __ARM_ARCH__>=7
715         ldr     r2,[r1],#4                      @ prefetch
716 # else
717         ldrb    r2,[r1,#3]
718 # endif
719         eor     r3,r9,r10                       @ a^b, b^c in next round
720 #else
721         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
722         eor     r3,r9,r10                       @ a^b, b^c in next round
723         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
724 #endif
725         eor     r0,r0,r9,ror#20 @ Sigma0(a)
726         and     r12,r12,r3                      @ (b^c)&=(a^b)
727         add     r4,r4,r8                        @ d+=h
728         eor     r12,r12,r10                     @ Maj(a,b,c)
729         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
730         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
731 #if __ARM_ARCH__>=7
732         @ ldr   r2,[r1],#4                      @ 12
733 # if 12==15
734         str     r1,[sp,#17*4]                   @ make room for r1
735 # endif
736         eor     r0,r4,r4,ror#5
737         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
738         eor     r0,r0,r4,ror#19 @ Sigma1(e)
739         rev     r2,r2
740 #else
741         @ ldrb  r2,[r1,#3]                      @ 12
742         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
743         ldrb    r12,[r1,#2]
744         ldrb    r0,[r1,#1]
745         orr     r2,r2,r12,lsl#8
746         ldrb    r12,[r1],#4
747         orr     r2,r2,r0,lsl#16
748 # if 12==15
749         str     r1,[sp,#17*4]                   @ make room for r1
750 # endif
751         eor     r0,r4,r4,ror#5
752         orr     r2,r2,r12,lsl#24
753         eor     r0,r0,r4,ror#19 @ Sigma1(e)
754 #endif
755         ldr     r12,[r14],#4                    @ *K256++
756         add     r7,r7,r2                        @ h+=X[i]
757         str     r2,[sp,#12*4]
758         eor     r2,r5,r6
759         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
760         and     r2,r2,r4
761         add     r7,r7,r12                       @ h+=K256[i]
762         eor     r2,r2,r6                        @ Ch(e,f,g)
763         eor     r0,r8,r8,ror#11
764         add     r7,r7,r2                        @ h+=Ch(e,f,g)
765 #if 12==31
766         and     r12,r12,#0xff
767         cmp     r12,#0xf2                       @ done?
768 #endif
769 #if 12<15
770 # if __ARM_ARCH__>=7
771         ldr     r2,[r1],#4                      @ prefetch
772 # else
773         ldrb    r2,[r1,#3]
774 # endif
775         eor     r12,r8,r9                       @ a^b, b^c in next round
776 #else
777         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
778         eor     r12,r8,r9                       @ a^b, b^c in next round
779         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
780 #endif
781         eor     r0,r0,r8,ror#20 @ Sigma0(a)
782         and     r3,r3,r12                       @ (b^c)&=(a^b)
783         add     r11,r11,r7                      @ d+=h
784         eor     r3,r3,r9                        @ Maj(a,b,c)
785         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
786         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
787 #if __ARM_ARCH__>=7
788         @ ldr   r2,[r1],#4                      @ 13
789 # if 13==15
790         str     r1,[sp,#17*4]                   @ make room for r1
791 # endif
792         eor     r0,r11,r11,ror#5
793         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
794         eor     r0,r0,r11,ror#19        @ Sigma1(e)
795         rev     r2,r2
796 #else
797         @ ldrb  r2,[r1,#3]                      @ 13
798         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
799         ldrb    r3,[r1,#2]
800         ldrb    r0,[r1,#1]
801         orr     r2,r2,r3,lsl#8
802         ldrb    r3,[r1],#4
803         orr     r2,r2,r0,lsl#16
804 # if 13==15
805         str     r1,[sp,#17*4]                   @ make room for r1
806 # endif
807         eor     r0,r11,r11,ror#5
808         orr     r2,r2,r3,lsl#24
809         eor     r0,r0,r11,ror#19        @ Sigma1(e)
810 #endif
811         ldr     r3,[r14],#4                     @ *K256++
812         add     r6,r6,r2                        @ h+=X[i]
813         str     r2,[sp,#13*4]
814         eor     r2,r4,r5
815         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
816         and     r2,r2,r11
817         add     r6,r6,r3                        @ h+=K256[i]
818         eor     r2,r2,r5                        @ Ch(e,f,g)
819         eor     r0,r7,r7,ror#11
820         add     r6,r6,r2                        @ h+=Ch(e,f,g)
821 #if 13==31
822         and     r3,r3,#0xff
823         cmp     r3,#0xf2                        @ done?
824 #endif
825 #if 13<15
826 # if __ARM_ARCH__>=7
827         ldr     r2,[r1],#4                      @ prefetch
828 # else
829         ldrb    r2,[r1,#3]
830 # endif
831         eor     r3,r7,r8                        @ a^b, b^c in next round
832 #else
833         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
834         eor     r3,r7,r8                        @ a^b, b^c in next round
835         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
836 #endif
837         eor     r0,r0,r7,ror#20 @ Sigma0(a)
838         and     r12,r12,r3                      @ (b^c)&=(a^b)
839         add     r10,r10,r6                      @ d+=h
840         eor     r12,r12,r8                      @ Maj(a,b,c)
841         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
842         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
843 #if __ARM_ARCH__>=7
844         @ ldr   r2,[r1],#4                      @ 14
845 # if 14==15
846         str     r1,[sp,#17*4]                   @ make room for r1
847 # endif
848         eor     r0,r10,r10,ror#5
849         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
850         eor     r0,r0,r10,ror#19        @ Sigma1(e)
851         rev     r2,r2
852 #else
853         @ ldrb  r2,[r1,#3]                      @ 14
854         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
855         ldrb    r12,[r1,#2]
856         ldrb    r0,[r1,#1]
857         orr     r2,r2,r12,lsl#8
858         ldrb    r12,[r1],#4
859         orr     r2,r2,r0,lsl#16
860 # if 14==15
861         str     r1,[sp,#17*4]                   @ make room for r1
862 # endif
863         eor     r0,r10,r10,ror#5
864         orr     r2,r2,r12,lsl#24
865         eor     r0,r0,r10,ror#19        @ Sigma1(e)
866 #endif
867         ldr     r12,[r14],#4                    @ *K256++
868         add     r5,r5,r2                        @ h+=X[i]
869         str     r2,[sp,#14*4]
870         eor     r2,r11,r4
871         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
872         and     r2,r2,r10
873         add     r5,r5,r12                       @ h+=K256[i]
874         eor     r2,r2,r4                        @ Ch(e,f,g)
875         eor     r0,r6,r6,ror#11
876         add     r5,r5,r2                        @ h+=Ch(e,f,g)
877 #if 14==31
878         and     r12,r12,#0xff
879         cmp     r12,#0xf2                       @ done?
880 #endif
881 #if 14<15
882 # if __ARM_ARCH__>=7
883         ldr     r2,[r1],#4                      @ prefetch
884 # else
885         ldrb    r2,[r1,#3]
886 # endif
887         eor     r12,r6,r7                       @ a^b, b^c in next round
888 #else
889         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
890         eor     r12,r6,r7                       @ a^b, b^c in next round
891         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
892 #endif
893         eor     r0,r0,r6,ror#20 @ Sigma0(a)
894         and     r3,r3,r12                       @ (b^c)&=(a^b)
895         add     r9,r9,r5                        @ d+=h
896         eor     r3,r3,r7                        @ Maj(a,b,c)
897         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
898         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
899 #if __ARM_ARCH__>=7
900         @ ldr   r2,[r1],#4                      @ 15
901 # if 15==15
902         str     r1,[sp,#17*4]                   @ make room for r1
903 # endif
904         eor     r0,r9,r9,ror#5
905         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
906         eor     r0,r0,r9,ror#19 @ Sigma1(e)
907         rev     r2,r2
908 #else
909         @ ldrb  r2,[r1,#3]                      @ 15
910         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
911         ldrb    r3,[r1,#2]
912         ldrb    r0,[r1,#1]
913         orr     r2,r2,r3,lsl#8
914         ldrb    r3,[r1],#4
915         orr     r2,r2,r0,lsl#16
916 # if 15==15
917         str     r1,[sp,#17*4]                   @ make room for r1
918 # endif
919         eor     r0,r9,r9,ror#5
920         orr     r2,r2,r3,lsl#24
921         eor     r0,r0,r9,ror#19 @ Sigma1(e)
922 #endif
923         ldr     r3,[r14],#4                     @ *K256++
924         add     r4,r4,r2                        @ h+=X[i]
925         str     r2,[sp,#15*4]
926         eor     r2,r10,r11
927         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
928         and     r2,r2,r9
929         add     r4,r4,r3                        @ h+=K256[i]
930         eor     r2,r2,r11                       @ Ch(e,f,g)
931         eor     r0,r5,r5,ror#11
932         add     r4,r4,r2                        @ h+=Ch(e,f,g)
933 #if 15==31
934         and     r3,r3,#0xff
935         cmp     r3,#0xf2                        @ done?
936 #endif
937 #if 15<15
938 # if __ARM_ARCH__>=7
939         ldr     r2,[r1],#4                      @ prefetch
940 # else
941         ldrb    r2,[r1,#3]
942 # endif
943         eor     r3,r5,r6                        @ a^b, b^c in next round
944 #else
945         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
946         eor     r3,r5,r6                        @ a^b, b^c in next round
947         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
948 #endif
949         eor     r0,r0,r5,ror#20 @ Sigma0(a)
950         and     r12,r12,r3                      @ (b^c)&=(a^b)
951         add     r8,r8,r4                        @ d+=h
952         eor     r12,r12,r6                      @ Maj(a,b,c)
953         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
954         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
955 .Lrounds_16_xx:
956         @ ldr   r2,[sp,#1*4]            @ 16
957         @ ldr   r1,[sp,#14*4]
958         mov     r0,r2,ror#7
959         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
960         mov     r12,r1,ror#17
961         eor     r0,r0,r2,ror#18
962         eor     r12,r12,r1,ror#19
963         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
964         ldr     r2,[sp,#0*4]
965         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
966         ldr     r1,[sp,#9*4]
968         add     r12,r12,r0
969         eor     r0,r8,r8,ror#5  @ from BODY_00_15
970         add     r2,r2,r12
971         eor     r0,r0,r8,ror#19 @ Sigma1(e)
972         add     r2,r2,r1                        @ X[i]
973         ldr     r12,[r14],#4                    @ *K256++
974         add     r11,r11,r2                      @ h+=X[i]
975         str     r2,[sp,#0*4]
976         eor     r2,r9,r10
977         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
978         and     r2,r2,r8
979         add     r11,r11,r12                     @ h+=K256[i]
980         eor     r2,r2,r10                       @ Ch(e,f,g)
981         eor     r0,r4,r4,ror#11
982         add     r11,r11,r2                      @ h+=Ch(e,f,g)
983 #if 16==31
984         and     r12,r12,#0xff
985         cmp     r12,#0xf2                       @ done?
986 #endif
987 #if 16<15
988 # if __ARM_ARCH__>=7
989         ldr     r2,[r1],#4                      @ prefetch
990 # else
991         ldrb    r2,[r1,#3]
992 # endif
993         eor     r12,r4,r5                       @ a^b, b^c in next round
994 #else
995         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
996         eor     r12,r4,r5                       @ a^b, b^c in next round
997         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
998 #endif
999         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1000         and     r3,r3,r12                       @ (b^c)&=(a^b)
1001         add     r7,r7,r11                       @ d+=h
1002         eor     r3,r3,r5                        @ Maj(a,b,c)
1003         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1004         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1005         @ ldr   r2,[sp,#2*4]            @ 17
1006         @ ldr   r1,[sp,#15*4]
1007         mov     r0,r2,ror#7
1008         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1009         mov     r3,r1,ror#17
1010         eor     r0,r0,r2,ror#18
1011         eor     r3,r3,r1,ror#19
1012         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1013         ldr     r2,[sp,#1*4]
1014         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1015         ldr     r1,[sp,#10*4]
1017         add     r3,r3,r0
1018         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1019         add     r2,r2,r3
1020         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1021         add     r2,r2,r1                        @ X[i]
1022         ldr     r3,[r14],#4                     @ *K256++
1023         add     r10,r10,r2                      @ h+=X[i]
1024         str     r2,[sp,#1*4]
1025         eor     r2,r8,r9
1026         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1027         and     r2,r2,r7
1028         add     r10,r10,r3                      @ h+=K256[i]
1029         eor     r2,r2,r9                        @ Ch(e,f,g)
1030         eor     r0,r11,r11,ror#11
1031         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1032 #if 17==31
1033         and     r3,r3,#0xff
1034         cmp     r3,#0xf2                        @ done?
1035 #endif
1036 #if 17<15
1037 # if __ARM_ARCH__>=7
1038         ldr     r2,[r1],#4                      @ prefetch
1039 # else
1040         ldrb    r2,[r1,#3]
1041 # endif
1042         eor     r3,r11,r4                       @ a^b, b^c in next round
1043 #else
1044         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
1045         eor     r3,r11,r4                       @ a^b, b^c in next round
1046         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
1047 #endif
1048         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1049         and     r12,r12,r3                      @ (b^c)&=(a^b)
1050         add     r6,r6,r10                       @ d+=h
1051         eor     r12,r12,r4                      @ Maj(a,b,c)
1052         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1053         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1054         @ ldr   r2,[sp,#3*4]            @ 18
1055         @ ldr   r1,[sp,#0*4]
1056         mov     r0,r2,ror#7
1057         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1058         mov     r12,r1,ror#17
1059         eor     r0,r0,r2,ror#18
1060         eor     r12,r12,r1,ror#19
1061         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1062         ldr     r2,[sp,#2*4]
1063         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1064         ldr     r1,[sp,#11*4]
1066         add     r12,r12,r0
1067         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1068         add     r2,r2,r12
1069         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1070         add     r2,r2,r1                        @ X[i]
1071         ldr     r12,[r14],#4                    @ *K256++
1072         add     r9,r9,r2                        @ h+=X[i]
1073         str     r2,[sp,#2*4]
1074         eor     r2,r7,r8
1075         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1076         and     r2,r2,r6
1077         add     r9,r9,r12                       @ h+=K256[i]
1078         eor     r2,r2,r8                        @ Ch(e,f,g)
1079         eor     r0,r10,r10,ror#11
1080         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1081 #if 18==31
1082         and     r12,r12,#0xff
1083         cmp     r12,#0xf2                       @ done?
1084 #endif
1085 #if 18<15
1086 # if __ARM_ARCH__>=7
1087         ldr     r2,[r1],#4                      @ prefetch
1088 # else
1089         ldrb    r2,[r1,#3]
1090 # endif
1091         eor     r12,r10,r11                     @ a^b, b^c in next round
1092 #else
1093         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
1094         eor     r12,r10,r11                     @ a^b, b^c in next round
1095         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
1096 #endif
1097         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1098         and     r3,r3,r12                       @ (b^c)&=(a^b)
1099         add     r5,r5,r9                        @ d+=h
1100         eor     r3,r3,r11                       @ Maj(a,b,c)
1101         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1102         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1103         @ ldr   r2,[sp,#4*4]            @ 19
1104         @ ldr   r1,[sp,#1*4]
1105         mov     r0,r2,ror#7
1106         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1107         mov     r3,r1,ror#17
1108         eor     r0,r0,r2,ror#18
1109         eor     r3,r3,r1,ror#19
1110         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1111         ldr     r2,[sp,#3*4]
1112         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1113         ldr     r1,[sp,#12*4]
1115         add     r3,r3,r0
1116         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1117         add     r2,r2,r3
1118         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1119         add     r2,r2,r1                        @ X[i]
1120         ldr     r3,[r14],#4                     @ *K256++
1121         add     r8,r8,r2                        @ h+=X[i]
1122         str     r2,[sp,#3*4]
1123         eor     r2,r6,r7
1124         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1125         and     r2,r2,r5
1126         add     r8,r8,r3                        @ h+=K256[i]
1127         eor     r2,r2,r7                        @ Ch(e,f,g)
1128         eor     r0,r9,r9,ror#11
1129         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1130 #if 19==31
1131         and     r3,r3,#0xff
1132         cmp     r3,#0xf2                        @ done?
1133 #endif
1134 #if 19<15
1135 # if __ARM_ARCH__>=7
1136         ldr     r2,[r1],#4                      @ prefetch
1137 # else
1138         ldrb    r2,[r1,#3]
1139 # endif
1140         eor     r3,r9,r10                       @ a^b, b^c in next round
1141 #else
1142         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
1143         eor     r3,r9,r10                       @ a^b, b^c in next round
1144         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
1145 #endif
1146         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1147         and     r12,r12,r3                      @ (b^c)&=(a^b)
1148         add     r4,r4,r8                        @ d+=h
1149         eor     r12,r12,r10                     @ Maj(a,b,c)
1150         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1151         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1152         @ ldr   r2,[sp,#5*4]            @ 20
1153         @ ldr   r1,[sp,#2*4]
1154         mov     r0,r2,ror#7
1155         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1156         mov     r12,r1,ror#17
1157         eor     r0,r0,r2,ror#18
1158         eor     r12,r12,r1,ror#19
1159         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1160         ldr     r2,[sp,#4*4]
1161         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1162         ldr     r1,[sp,#13*4]
1164         add     r12,r12,r0
1165         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1166         add     r2,r2,r12
1167         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1168         add     r2,r2,r1                        @ X[i]
1169         ldr     r12,[r14],#4                    @ *K256++
1170         add     r7,r7,r2                        @ h+=X[i]
1171         str     r2,[sp,#4*4]
1172         eor     r2,r5,r6
1173         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1174         and     r2,r2,r4
1175         add     r7,r7,r12                       @ h+=K256[i]
1176         eor     r2,r2,r6                        @ Ch(e,f,g)
1177         eor     r0,r8,r8,ror#11
1178         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1179 #if 20==31
1180         and     r12,r12,#0xff
1181         cmp     r12,#0xf2                       @ done?
1182 #endif
1183 #if 20<15
1184 # if __ARM_ARCH__>=7
1185         ldr     r2,[r1],#4                      @ prefetch
1186 # else
1187         ldrb    r2,[r1,#3]
1188 # endif
1189         eor     r12,r8,r9                       @ a^b, b^c in next round
1190 #else
1191         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
1192         eor     r12,r8,r9                       @ a^b, b^c in next round
1193         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
1194 #endif
1195         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1196         and     r3,r3,r12                       @ (b^c)&=(a^b)
1197         add     r11,r11,r7                      @ d+=h
1198         eor     r3,r3,r9                        @ Maj(a,b,c)
1199         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1200         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1201         @ ldr   r2,[sp,#6*4]            @ 21
1202         @ ldr   r1,[sp,#3*4]
1203         mov     r0,r2,ror#7
1204         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1205         mov     r3,r1,ror#17
1206         eor     r0,r0,r2,ror#18
1207         eor     r3,r3,r1,ror#19
1208         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1209         ldr     r2,[sp,#5*4]
1210         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1211         ldr     r1,[sp,#14*4]
1213         add     r3,r3,r0
1214         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1215         add     r2,r2,r3
1216         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1217         add     r2,r2,r1                        @ X[i]
1218         ldr     r3,[r14],#4                     @ *K256++
1219         add     r6,r6,r2                        @ h+=X[i]
1220         str     r2,[sp,#5*4]
1221         eor     r2,r4,r5
1222         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1223         and     r2,r2,r11
1224         add     r6,r6,r3                        @ h+=K256[i]
1225         eor     r2,r2,r5                        @ Ch(e,f,g)
1226         eor     r0,r7,r7,ror#11
1227         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1228 #if 21==31
1229         and     r3,r3,#0xff
1230         cmp     r3,#0xf2                        @ done?
1231 #endif
1232 #if 21<15
1233 # if __ARM_ARCH__>=7
1234         ldr     r2,[r1],#4                      @ prefetch
1235 # else
1236         ldrb    r2,[r1,#3]
1237 # endif
1238         eor     r3,r7,r8                        @ a^b, b^c in next round
1239 #else
1240         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
1241         eor     r3,r7,r8                        @ a^b, b^c in next round
1242         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
1243 #endif
1244         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1245         and     r12,r12,r3                      @ (b^c)&=(a^b)
1246         add     r10,r10,r6                      @ d+=h
1247         eor     r12,r12,r8                      @ Maj(a,b,c)
1248         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1249         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1250         @ ldr   r2,[sp,#7*4]            @ 22
1251         @ ldr   r1,[sp,#4*4]
1252         mov     r0,r2,ror#7
1253         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1254         mov     r12,r1,ror#17
1255         eor     r0,r0,r2,ror#18
1256         eor     r12,r12,r1,ror#19
1257         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1258         ldr     r2,[sp,#6*4]
1259         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1260         ldr     r1,[sp,#15*4]
1262         add     r12,r12,r0
1263         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1264         add     r2,r2,r12
1265         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1266         add     r2,r2,r1                        @ X[i]
1267         ldr     r12,[r14],#4                    @ *K256++
1268         add     r5,r5,r2                        @ h+=X[i]
1269         str     r2,[sp,#6*4]
1270         eor     r2,r11,r4
1271         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1272         and     r2,r2,r10
1273         add     r5,r5,r12                       @ h+=K256[i]
1274         eor     r2,r2,r4                        @ Ch(e,f,g)
1275         eor     r0,r6,r6,ror#11
1276         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1277 #if 22==31
1278         and     r12,r12,#0xff
1279         cmp     r12,#0xf2                       @ done?
1280 #endif
1281 #if 22<15
1282 # if __ARM_ARCH__>=7
1283         ldr     r2,[r1],#4                      @ prefetch
1284 # else
1285         ldrb    r2,[r1,#3]
1286 # endif
1287         eor     r12,r6,r7                       @ a^b, b^c in next round
1288 #else
1289         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
1290         eor     r12,r6,r7                       @ a^b, b^c in next round
1291         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
1292 #endif
1293         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1294         and     r3,r3,r12                       @ (b^c)&=(a^b)
1295         add     r9,r9,r5                        @ d+=h
1296         eor     r3,r3,r7                        @ Maj(a,b,c)
1297         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1298         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1299         @ ldr   r2,[sp,#8*4]            @ 23
1300         @ ldr   r1,[sp,#5*4]
1301         mov     r0,r2,ror#7
1302         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1303         mov     r3,r1,ror#17
1304         eor     r0,r0,r2,ror#18
1305         eor     r3,r3,r1,ror#19
1306         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1307         ldr     r2,[sp,#7*4]
1308         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1309         ldr     r1,[sp,#0*4]
1311         add     r3,r3,r0
1312         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1313         add     r2,r2,r3
1314         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1315         add     r2,r2,r1                        @ X[i]
1316         ldr     r3,[r14],#4                     @ *K256++
1317         add     r4,r4,r2                        @ h+=X[i]
1318         str     r2,[sp,#7*4]
1319         eor     r2,r10,r11
1320         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1321         and     r2,r2,r9
1322         add     r4,r4,r3                        @ h+=K256[i]
1323         eor     r2,r2,r11                       @ Ch(e,f,g)
1324         eor     r0,r5,r5,ror#11
1325         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1326 #if 23==31
1327         and     r3,r3,#0xff
1328         cmp     r3,#0xf2                        @ done?
1329 #endif
1330 #if 23<15
1331 # if __ARM_ARCH__>=7
1332         ldr     r2,[r1],#4                      @ prefetch
1333 # else
1334         ldrb    r2,[r1,#3]
1335 # endif
1336         eor     r3,r5,r6                        @ a^b, b^c in next round
1337 #else
1338         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
1339         eor     r3,r5,r6                        @ a^b, b^c in next round
1340         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
1341 #endif
1342         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1343         and     r12,r12,r3                      @ (b^c)&=(a^b)
1344         add     r8,r8,r4                        @ d+=h
1345         eor     r12,r12,r6                      @ Maj(a,b,c)
1346         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1347         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1348         @ ldr   r2,[sp,#9*4]            @ 24
1349         @ ldr   r1,[sp,#6*4]
1350         mov     r0,r2,ror#7
1351         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1352         mov     r12,r1,ror#17
1353         eor     r0,r0,r2,ror#18
1354         eor     r12,r12,r1,ror#19
1355         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1356         ldr     r2,[sp,#8*4]
1357         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1358         ldr     r1,[sp,#1*4]
1360         add     r12,r12,r0
1361         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1362         add     r2,r2,r12
1363         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1364         add     r2,r2,r1                        @ X[i]
1365         ldr     r12,[r14],#4                    @ *K256++
1366         add     r11,r11,r2                      @ h+=X[i]
1367         str     r2,[sp,#8*4]
1368         eor     r2,r9,r10
1369         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1370         and     r2,r2,r8
1371         add     r11,r11,r12                     @ h+=K256[i]
1372         eor     r2,r2,r10                       @ Ch(e,f,g)
1373         eor     r0,r4,r4,ror#11
1374         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1375 #if 24==31
1376         and     r12,r12,#0xff
1377         cmp     r12,#0xf2                       @ done?
1378 #endif
1379 #if 24<15
1380 # if __ARM_ARCH__>=7
1381         ldr     r2,[r1],#4                      @ prefetch
1382 # else
1383         ldrb    r2,[r1,#3]
1384 # endif
1385         eor     r12,r4,r5                       @ a^b, b^c in next round
1386 #else
1387         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
1388         eor     r12,r4,r5                       @ a^b, b^c in next round
1389         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
1390 #endif
1391         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1392         and     r3,r3,r12                       @ (b^c)&=(a^b)
1393         add     r7,r7,r11                       @ d+=h
1394         eor     r3,r3,r5                        @ Maj(a,b,c)
1395         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1396         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1397         @ ldr   r2,[sp,#10*4]           @ 25
1398         @ ldr   r1,[sp,#7*4]
1399         mov     r0,r2,ror#7
1400         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1401         mov     r3,r1,ror#17
1402         eor     r0,r0,r2,ror#18
1403         eor     r3,r3,r1,ror#19
1404         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1405         ldr     r2,[sp,#9*4]
1406         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1407         ldr     r1,[sp,#2*4]
1409         add     r3,r3,r0
1410         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1411         add     r2,r2,r3
1412         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1413         add     r2,r2,r1                        @ X[i]
1414         ldr     r3,[r14],#4                     @ *K256++
1415         add     r10,r10,r2                      @ h+=X[i]
1416         str     r2,[sp,#9*4]
1417         eor     r2,r8,r9
1418         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1419         and     r2,r2,r7
1420         add     r10,r10,r3                      @ h+=K256[i]
1421         eor     r2,r2,r9                        @ Ch(e,f,g)
1422         eor     r0,r11,r11,ror#11
1423         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1424 #if 25==31
1425         and     r3,r3,#0xff
1426         cmp     r3,#0xf2                        @ done?
1427 #endif
1428 #if 25<15
1429 # if __ARM_ARCH__>=7
1430         ldr     r2,[r1],#4                      @ prefetch
1431 # else
1432         ldrb    r2,[r1,#3]
1433 # endif
1434         eor     r3,r11,r4                       @ a^b, b^c in next round
1435 #else
1436         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
1437         eor     r3,r11,r4                       @ a^b, b^c in next round
1438         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
1439 #endif
1440         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1441         and     r12,r12,r3                      @ (b^c)&=(a^b)
1442         add     r6,r6,r10                       @ d+=h
1443         eor     r12,r12,r4                      @ Maj(a,b,c)
1444         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1445         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1446         @ ldr   r2,[sp,#11*4]           @ 26
1447         @ ldr   r1,[sp,#8*4]
1448         mov     r0,r2,ror#7
1449         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1450         mov     r12,r1,ror#17
1451         eor     r0,r0,r2,ror#18
1452         eor     r12,r12,r1,ror#19
1453         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1454         ldr     r2,[sp,#10*4]
1455         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1456         ldr     r1,[sp,#3*4]
1458         add     r12,r12,r0
1459         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1460         add     r2,r2,r12
1461         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1462         add     r2,r2,r1                        @ X[i]
1463         ldr     r12,[r14],#4                    @ *K256++
1464         add     r9,r9,r2                        @ h+=X[i]
1465         str     r2,[sp,#10*4]
1466         eor     r2,r7,r8
1467         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1468         and     r2,r2,r6
1469         add     r9,r9,r12                       @ h+=K256[i]
1470         eor     r2,r2,r8                        @ Ch(e,f,g)
1471         eor     r0,r10,r10,ror#11
1472         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1473 #if 26==31
1474         and     r12,r12,#0xff
1475         cmp     r12,#0xf2                       @ done?
1476 #endif
1477 #if 26<15
1478 # if __ARM_ARCH__>=7
1479         ldr     r2,[r1],#4                      @ prefetch
1480 # else
1481         ldrb    r2,[r1,#3]
1482 # endif
1483         eor     r12,r10,r11                     @ a^b, b^c in next round
1484 #else
1485         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
1486         eor     r12,r10,r11                     @ a^b, b^c in next round
1487         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
1488 #endif
1489         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1490         and     r3,r3,r12                       @ (b^c)&=(a^b)
1491         add     r5,r5,r9                        @ d+=h
1492         eor     r3,r3,r11                       @ Maj(a,b,c)
1493         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1494         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1495         @ ldr   r2,[sp,#12*4]           @ 27
1496         @ ldr   r1,[sp,#9*4]
1497         mov     r0,r2,ror#7
1498         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1499         mov     r3,r1,ror#17
1500         eor     r0,r0,r2,ror#18
1501         eor     r3,r3,r1,ror#19
1502         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1503         ldr     r2,[sp,#11*4]
1504         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1505         ldr     r1,[sp,#4*4]
1507         add     r3,r3,r0
1508         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1509         add     r2,r2,r3
1510         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1511         add     r2,r2,r1                        @ X[i]
1512         ldr     r3,[r14],#4                     @ *K256++
1513         add     r8,r8,r2                        @ h+=X[i]
1514         str     r2,[sp,#11*4]
1515         eor     r2,r6,r7
1516         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1517         and     r2,r2,r5
1518         add     r8,r8,r3                        @ h+=K256[i]
1519         eor     r2,r2,r7                        @ Ch(e,f,g)
1520         eor     r0,r9,r9,ror#11
1521         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1522 #if 27==31
1523         and     r3,r3,#0xff
1524         cmp     r3,#0xf2                        @ done?
1525 #endif
1526 #if 27<15
1527 # if __ARM_ARCH__>=7
1528         ldr     r2,[r1],#4                      @ prefetch
1529 # else
1530         ldrb    r2,[r1,#3]
1531 # endif
1532         eor     r3,r9,r10                       @ a^b, b^c in next round
1533 #else
1534         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
1535         eor     r3,r9,r10                       @ a^b, b^c in next round
1536         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
1537 #endif
1538         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1539         and     r12,r12,r3                      @ (b^c)&=(a^b)
1540         add     r4,r4,r8                        @ d+=h
1541         eor     r12,r12,r10                     @ Maj(a,b,c)
1542         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1543         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1544         @ ldr   r2,[sp,#13*4]           @ 28
1545         @ ldr   r1,[sp,#10*4]
1546         mov     r0,r2,ror#7
1547         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1548         mov     r12,r1,ror#17
1549         eor     r0,r0,r2,ror#18
1550         eor     r12,r12,r1,ror#19
1551         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1552         ldr     r2,[sp,#12*4]
1553         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1554         ldr     r1,[sp,#5*4]
1556         add     r12,r12,r0
1557         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1558         add     r2,r2,r12
1559         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1560         add     r2,r2,r1                        @ X[i]
1561         ldr     r12,[r14],#4                    @ *K256++
1562         add     r7,r7,r2                        @ h+=X[i]
1563         str     r2,[sp,#12*4]
1564         eor     r2,r5,r6
1565         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1566         and     r2,r2,r4
1567         add     r7,r7,r12                       @ h+=K256[i]
1568         eor     r2,r2,r6                        @ Ch(e,f,g)
1569         eor     r0,r8,r8,ror#11
1570         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1571 #if 28==31
1572         and     r12,r12,#0xff
1573         cmp     r12,#0xf2                       @ done?
1574 #endif
1575 #if 28<15
1576 # if __ARM_ARCH__>=7
1577         ldr     r2,[r1],#4                      @ prefetch
1578 # else
1579         ldrb    r2,[r1,#3]
1580 # endif
1581         eor     r12,r8,r9                       @ a^b, b^c in next round
1582 #else
1583         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
1584         eor     r12,r8,r9                       @ a^b, b^c in next round
1585         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
1586 #endif
1587         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1588         and     r3,r3,r12                       @ (b^c)&=(a^b)
1589         add     r11,r11,r7                      @ d+=h
1590         eor     r3,r3,r9                        @ Maj(a,b,c)
1591         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1592         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1593         @ ldr   r2,[sp,#14*4]           @ 29
1594         @ ldr   r1,[sp,#11*4]
1595         mov     r0,r2,ror#7
1596         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1597         mov     r3,r1,ror#17
1598         eor     r0,r0,r2,ror#18
1599         eor     r3,r3,r1,ror#19
1600         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1601         ldr     r2,[sp,#13*4]
1602         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1603         ldr     r1,[sp,#6*4]
1605         add     r3,r3,r0
1606         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1607         add     r2,r2,r3
1608         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1609         add     r2,r2,r1                        @ X[i]
1610         ldr     r3,[r14],#4                     @ *K256++
1611         add     r6,r6,r2                        @ h+=X[i]
1612         str     r2,[sp,#13*4]
1613         eor     r2,r4,r5
1614         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1615         and     r2,r2,r11
1616         add     r6,r6,r3                        @ h+=K256[i]
1617         eor     r2,r2,r5                        @ Ch(e,f,g)
1618         eor     r0,r7,r7,ror#11
1619         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1620 #if 29==31
1621         and     r3,r3,#0xff
1622         cmp     r3,#0xf2                        @ done?
1623 #endif
1624 #if 29<15
1625 # if __ARM_ARCH__>=7
1626         ldr     r2,[r1],#4                      @ prefetch
1627 # else
1628         ldrb    r2,[r1,#3]
1629 # endif
1630         eor     r3,r7,r8                        @ a^b, b^c in next round
1631 #else
1632         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
1633         eor     r3,r7,r8                        @ a^b, b^c in next round
1634         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
1635 #endif
1636         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1637         and     r12,r12,r3                      @ (b^c)&=(a^b)
1638         add     r10,r10,r6                      @ d+=h
1639         eor     r12,r12,r8                      @ Maj(a,b,c)
1640         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1641         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1642         @ ldr   r2,[sp,#15*4]           @ 30
1643         @ ldr   r1,[sp,#12*4]
1644         mov     r0,r2,ror#7
1645         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1646         mov     r12,r1,ror#17
1647         eor     r0,r0,r2,ror#18
1648         eor     r12,r12,r1,ror#19
1649         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1650         ldr     r2,[sp,#14*4]
1651         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1652         ldr     r1,[sp,#7*4]
1654         add     r12,r12,r0
1655         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1656         add     r2,r2,r12
1657         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1658         add     r2,r2,r1                        @ X[i]
1659         ldr     r12,[r14],#4                    @ *K256++
1660         add     r5,r5,r2                        @ h+=X[i]
1661         str     r2,[sp,#14*4]
1662         eor     r2,r11,r4
1663         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1664         and     r2,r2,r10
1665         add     r5,r5,r12                       @ h+=K256[i]
1666         eor     r2,r2,r4                        @ Ch(e,f,g)
1667         eor     r0,r6,r6,ror#11
1668         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1669 #if 30==31
1670         and     r12,r12,#0xff
1671         cmp     r12,#0xf2                       @ done?
1672 #endif
1673 #if 30<15
1674 # if __ARM_ARCH__>=7
1675         ldr     r2,[r1],#4                      @ prefetch
1676 # else
1677         ldrb    r2,[r1,#3]
1678 # endif
1679         eor     r12,r6,r7                       @ a^b, b^c in next round
1680 #else
1681         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
1682         eor     r12,r6,r7                       @ a^b, b^c in next round
1683         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
1684 #endif
1685         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1686         and     r3,r3,r12                       @ (b^c)&=(a^b)
1687         add     r9,r9,r5                        @ d+=h
1688         eor     r3,r3,r7                        @ Maj(a,b,c)
1689         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1690         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1691         @ ldr   r2,[sp,#0*4]            @ 31
1692         @ ldr   r1,[sp,#13*4]
1693         mov     r0,r2,ror#7
1694         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1695         mov     r3,r1,ror#17
1696         eor     r0,r0,r2,ror#18
1697         eor     r3,r3,r1,ror#19
1698         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1699         ldr     r2,[sp,#15*4]
1700         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1701         ldr     r1,[sp,#8*4]
1703         add     r3,r3,r0
1704         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1705         add     r2,r2,r3
1706         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1707         add     r2,r2,r1                        @ X[i]
1708         ldr     r3,[r14],#4                     @ *K256++
1709         add     r4,r4,r2                        @ h+=X[i]
1710         str     r2,[sp,#15*4]
1711         eor     r2,r10,r11
1712         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1713         and     r2,r2,r9
1714         add     r4,r4,r3                        @ h+=K256[i]
1715         eor     r2,r2,r11                       @ Ch(e,f,g)
1716         eor     r0,r5,r5,ror#11
1717         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1718 #if 31==31
1719         and     r3,r3,#0xff
1720         cmp     r3,#0xf2                        @ done?
1721 #endif
1722 #if 31<15
1723 # if __ARM_ARCH__>=7
1724         ldr     r2,[r1],#4                      @ prefetch
1725 # else
1726         ldrb    r2,[r1,#3]
1727 # endif
1728         eor     r3,r5,r6                        @ a^b, b^c in next round
1729 #else
1730         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1731         eor     r3,r5,r6                        @ a^b, b^c in next round
1732         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1733 #endif
1734         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1735         and     r12,r12,r3                      @ (b^c)&=(a^b)
1736         add     r8,r8,r4                        @ d+=h
1737         eor     r12,r12,r6                      @ Maj(a,b,c)
1738         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1739         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1740         ldreq   r3,[sp,#16*4]           @ pull ctx
1741         bne     .Lrounds_16_xx
1743         add     r4,r4,r12               @ h+=Maj(a,b,c) from the past
1744         ldr     r0,[r3,#0]
1745         ldr     r2,[r3,#4]
1746         ldr     r12,[r3,#8]
1747         add     r4,r4,r0
1748         ldr     r0,[r3,#12]
1749         add     r5,r5,r2
1750         ldr     r2,[r3,#16]
1751         add     r6,r6,r12
1752         ldr     r12,[r3,#20]
1753         add     r7,r7,r0
1754         ldr     r0,[r3,#24]
1755         add     r8,r8,r2
1756         ldr     r2,[r3,#28]
1757         add     r9,r9,r12
1758         ldr     r1,[sp,#17*4]           @ pull inp
1759         ldr     r12,[sp,#18*4]          @ pull inp+len
1760         add     r10,r10,r0
1761         add     r11,r11,r2
1762         stmia   r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1763         cmp     r1,r12
1764         sub     r14,r14,#256    @ rewind Ktbl
1765         bne     .Loop
1767         add     sp,sp,#19*4     @ destroy frame
1768 #if __ARM_ARCH__>=5
1769         ldmia   sp!,{r4-r11,pc}
1770 #else
1771         ldmia   sp!,{r4-r11,lr}
1772         tst     lr,#1
1773         moveq   pc,lr                   @ be binary compatible with V4, yet
1774         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
1775 #endif
1776 .size   sha256_block_data_order,.-sha256_block_data_order
1777 #if __ARM_MAX_ARCH__>=7
1778 .arch   armv7-a
1779 .fpu    neon
1781 .type   sha256_block_data_order_neon,%function
1782 .align  4
1783 sha256_block_data_order_neon:
1784 .LNEON:
1785         stmdb   sp!,{r4-r12,lr}
1787         mov     r12,sp
1788         sub     sp,sp,#16*4+16          @ alloca
1789         sub     r14,r3,#256+32  @ K256
1790         bic     sp,sp,#15               @ align for 128-bit stores
1792         vld1.8          {q0},[r1]!
1793         vld1.8          {q1},[r1]!
1794         vld1.8          {q2},[r1]!
1795         vld1.8          {q3},[r1]!
1796         vld1.32         {q8},[r14,:128]!
1797         vld1.32         {q9},[r14,:128]!
1798         vld1.32         {q10},[r14,:128]!
1799         vld1.32         {q11},[r14,:128]!
1800         vrev32.8        q0,q0           @ yes, even on
1801         str             r0,[sp,#64]
1802         vrev32.8        q1,q1           @ big-endian
1803         str             r1,[sp,#68]
1804         mov             r1,sp
1805         vrev32.8        q2,q2
1806         str             r2,[sp,#72]
1807         vrev32.8        q3,q3
1808         str             r12,[sp,#76]            @ save original sp
1809         vadd.i32        q8,q8,q0
1810         vadd.i32        q9,q9,q1
1811         vst1.32         {q8},[r1,:128]!
1812         vadd.i32        q10,q10,q2
1813         vst1.32         {q9},[r1,:128]!
1814         vadd.i32        q11,q11,q3
1815         vst1.32         {q10},[r1,:128]!
1816         vst1.32         {q11},[r1,:128]!
1818         ldmia           r0,{r4-r11}
1819         sub             r1,r1,#64
1820         ldr             r2,[sp,#0]
1821         eor             r12,r12,r12
1822         eor             r3,r5,r6
1823         b               .L_00_48
1825 .align  4
1826 .L_00_48:
1827         vext.8  q8,q0,q1,#4
1828         add     r11,r11,r2
1829         eor     r2,r9,r10
1830         eor     r0,r8,r8,ror#5
1831         vext.8  q9,q2,q3,#4
1832         add     r4,r4,r12
1833         and     r2,r2,r8
1834         eor     r12,r0,r8,ror#19
1835         vshr.u32        q10,q8,#7
1836         eor     r0,r4,r4,ror#11
1837         eor     r2,r2,r10
1838         vadd.i32        q0,q0,q9
1839         add     r11,r11,r12,ror#6
1840         eor     r12,r4,r5
1841         vshr.u32        q9,q8,#3
1842         eor     r0,r0,r4,ror#20
1843         add     r11,r11,r2
1844         vsli.32 q10,q8,#25
1845         ldr     r2,[sp,#4]
1846         and     r3,r3,r12
1847         vshr.u32        q11,q8,#18
1848         add     r7,r7,r11
1849         add     r11,r11,r0,ror#2
1850         eor     r3,r3,r5
1851         veor    q9,q9,q10
1852         add     r10,r10,r2
1853         vsli.32 q11,q8,#14
1854         eor     r2,r8,r9
1855         eor     r0,r7,r7,ror#5
1856         vshr.u32        d24,d7,#17
1857         add     r11,r11,r3
1858         and     r2,r2,r7
1859         veor    q9,q9,q11
1860         eor     r3,r0,r7,ror#19
1861         eor     r0,r11,r11,ror#11
1862         vsli.32 d24,d7,#15
1863         eor     r2,r2,r9
1864         add     r10,r10,r3,ror#6
1865         vshr.u32        d25,d7,#10
1866         eor     r3,r11,r4
1867         eor     r0,r0,r11,ror#20
1868         vadd.i32        q0,q0,q9
1869         add     r10,r10,r2
1870         ldr     r2,[sp,#8]
1871         veor    d25,d25,d24
1872         and     r12,r12,r3
1873         add     r6,r6,r10
1874         vshr.u32        d24,d7,#19
1875         add     r10,r10,r0,ror#2
1876         eor     r12,r12,r4
1877         vsli.32 d24,d7,#13
1878         add     r9,r9,r2
1879         eor     r2,r7,r8
1880         veor    d25,d25,d24
1881         eor     r0,r6,r6,ror#5
1882         add     r10,r10,r12
1883         vadd.i32        d0,d0,d25
1884         and     r2,r2,r6
1885         eor     r12,r0,r6,ror#19
1886         vshr.u32        d24,d0,#17
1887         eor     r0,r10,r10,ror#11
1888         eor     r2,r2,r8
1889         vsli.32 d24,d0,#15
1890         add     r9,r9,r12,ror#6
1891         eor     r12,r10,r11
1892         vshr.u32        d25,d0,#10
1893         eor     r0,r0,r10,ror#20
1894         add     r9,r9,r2
1895         veor    d25,d25,d24
1896         ldr     r2,[sp,#12]
1897         and     r3,r3,r12
1898         vshr.u32        d24,d0,#19
1899         add     r5,r5,r9
1900         add     r9,r9,r0,ror#2
1901         eor     r3,r3,r11
1902         vld1.32 {q8},[r14,:128]!
1903         add     r8,r8,r2
1904         vsli.32 d24,d0,#13
1905         eor     r2,r6,r7
1906         eor     r0,r5,r5,ror#5
1907         veor    d25,d25,d24
1908         add     r9,r9,r3
1909         and     r2,r2,r5
1910         vadd.i32        d1,d1,d25
1911         eor     r3,r0,r5,ror#19
1912         eor     r0,r9,r9,ror#11
1913         vadd.i32        q8,q8,q0
1914         eor     r2,r2,r7
1915         add     r8,r8,r3,ror#6
1916         eor     r3,r9,r10
1917         eor     r0,r0,r9,ror#20
1918         add     r8,r8,r2
1919         ldr     r2,[sp,#16]
1920         and     r12,r12,r3
1921         add     r4,r4,r8
1922         vst1.32 {q8},[r1,:128]!
1923         add     r8,r8,r0,ror#2
1924         eor     r12,r12,r10
1925         vext.8  q8,q1,q2,#4
1926         add     r7,r7,r2
1927         eor     r2,r5,r6
1928         eor     r0,r4,r4,ror#5
1929         vext.8  q9,q3,q0,#4
1930         add     r8,r8,r12
1931         and     r2,r2,r4
1932         eor     r12,r0,r4,ror#19
1933         vshr.u32        q10,q8,#7
1934         eor     r0,r8,r8,ror#11
1935         eor     r2,r2,r6
1936         vadd.i32        q1,q1,q9
1937         add     r7,r7,r12,ror#6
1938         eor     r12,r8,r9
1939         vshr.u32        q9,q8,#3
1940         eor     r0,r0,r8,ror#20
1941         add     r7,r7,r2
1942         vsli.32 q10,q8,#25
1943         ldr     r2,[sp,#20]
1944         and     r3,r3,r12
1945         vshr.u32        q11,q8,#18
1946         add     r11,r11,r7
1947         add     r7,r7,r0,ror#2
1948         eor     r3,r3,r9
1949         veor    q9,q9,q10
1950         add     r6,r6,r2
1951         vsli.32 q11,q8,#14
1952         eor     r2,r4,r5
1953         eor     r0,r11,r11,ror#5
1954         vshr.u32        d24,d1,#17
1955         add     r7,r7,r3
1956         and     r2,r2,r11
1957         veor    q9,q9,q11
1958         eor     r3,r0,r11,ror#19
1959         eor     r0,r7,r7,ror#11
1960         vsli.32 d24,d1,#15
1961         eor     r2,r2,r5
1962         add     r6,r6,r3,ror#6
1963         vshr.u32        d25,d1,#10
1964         eor     r3,r7,r8
1965         eor     r0,r0,r7,ror#20
1966         vadd.i32        q1,q1,q9
1967         add     r6,r6,r2
1968         ldr     r2,[sp,#24]
1969         veor    d25,d25,d24
1970         and     r12,r12,r3
1971         add     r10,r10,r6
1972         vshr.u32        d24,d1,#19
1973         add     r6,r6,r0,ror#2
1974         eor     r12,r12,r8
1975         vsli.32 d24,d1,#13
1976         add     r5,r5,r2
1977         eor     r2,r11,r4
1978         veor    d25,d25,d24
1979         eor     r0,r10,r10,ror#5
1980         add     r6,r6,r12
1981         vadd.i32        d2,d2,d25
1982         and     r2,r2,r10
1983         eor     r12,r0,r10,ror#19
1984         vshr.u32        d24,d2,#17
1985         eor     r0,r6,r6,ror#11
1986         eor     r2,r2,r4
1987         vsli.32 d24,d2,#15
1988         add     r5,r5,r12,ror#6
1989         eor     r12,r6,r7
1990         vshr.u32        d25,d2,#10
1991         eor     r0,r0,r6,ror#20
1992         add     r5,r5,r2
1993         veor    d25,d25,d24
1994         ldr     r2,[sp,#28]
1995         and     r3,r3,r12
1996         vshr.u32        d24,d2,#19
1997         add     r9,r9,r5
1998         add     r5,r5,r0,ror#2
1999         eor     r3,r3,r7
2000         vld1.32 {q8},[r14,:128]!
2001         add     r4,r4,r2
2002         vsli.32 d24,d2,#13
2003         eor     r2,r10,r11
2004         eor     r0,r9,r9,ror#5
2005         veor    d25,d25,d24
2006         add     r5,r5,r3
2007         and     r2,r2,r9
2008         vadd.i32        d3,d3,d25
2009         eor     r3,r0,r9,ror#19
2010         eor     r0,r5,r5,ror#11
2011         vadd.i32        q8,q8,q1
2012         eor     r2,r2,r11
2013         add     r4,r4,r3,ror#6
2014         eor     r3,r5,r6
2015         eor     r0,r0,r5,ror#20
2016         add     r4,r4,r2
2017         ldr     r2,[sp,#32]
2018         and     r12,r12,r3
2019         add     r8,r8,r4
2020         vst1.32 {q8},[r1,:128]!
2021         add     r4,r4,r0,ror#2
2022         eor     r12,r12,r6
2023         vext.8  q8,q2,q3,#4
2024         add     r11,r11,r2
2025         eor     r2,r9,r10
2026         eor     r0,r8,r8,ror#5
2027         vext.8  q9,q0,q1,#4
2028         add     r4,r4,r12
2029         and     r2,r2,r8
2030         eor     r12,r0,r8,ror#19
2031         vshr.u32        q10,q8,#7
2032         eor     r0,r4,r4,ror#11
2033         eor     r2,r2,r10
2034         vadd.i32        q2,q2,q9
2035         add     r11,r11,r12,ror#6
2036         eor     r12,r4,r5
2037         vshr.u32        q9,q8,#3
2038         eor     r0,r0,r4,ror#20
2039         add     r11,r11,r2
2040         vsli.32 q10,q8,#25
2041         ldr     r2,[sp,#36]
2042         and     r3,r3,r12
2043         vshr.u32        q11,q8,#18
2044         add     r7,r7,r11
2045         add     r11,r11,r0,ror#2
2046         eor     r3,r3,r5
2047         veor    q9,q9,q10
2048         add     r10,r10,r2
2049         vsli.32 q11,q8,#14
2050         eor     r2,r8,r9
2051         eor     r0,r7,r7,ror#5
2052         vshr.u32        d24,d3,#17
2053         add     r11,r11,r3
2054         and     r2,r2,r7
2055         veor    q9,q9,q11
2056         eor     r3,r0,r7,ror#19
2057         eor     r0,r11,r11,ror#11
2058         vsli.32 d24,d3,#15
2059         eor     r2,r2,r9
2060         add     r10,r10,r3,ror#6
2061         vshr.u32        d25,d3,#10
2062         eor     r3,r11,r4
2063         eor     r0,r0,r11,ror#20
2064         vadd.i32        q2,q2,q9
2065         add     r10,r10,r2
2066         ldr     r2,[sp,#40]
2067         veor    d25,d25,d24
2068         and     r12,r12,r3
2069         add     r6,r6,r10
2070         vshr.u32        d24,d3,#19
2071         add     r10,r10,r0,ror#2
2072         eor     r12,r12,r4
2073         vsli.32 d24,d3,#13
2074         add     r9,r9,r2
2075         eor     r2,r7,r8
2076         veor    d25,d25,d24
2077         eor     r0,r6,r6,ror#5
2078         add     r10,r10,r12
2079         vadd.i32        d4,d4,d25
2080         and     r2,r2,r6
2081         eor     r12,r0,r6,ror#19
2082         vshr.u32        d24,d4,#17
2083         eor     r0,r10,r10,ror#11
2084         eor     r2,r2,r8
2085         vsli.32 d24,d4,#15
2086         add     r9,r9,r12,ror#6
2087         eor     r12,r10,r11
2088         vshr.u32        d25,d4,#10
2089         eor     r0,r0,r10,ror#20
2090         add     r9,r9,r2
2091         veor    d25,d25,d24
2092         ldr     r2,[sp,#44]
2093         and     r3,r3,r12
2094         vshr.u32        d24,d4,#19
2095         add     r5,r5,r9
2096         add     r9,r9,r0,ror#2
2097         eor     r3,r3,r11
2098         vld1.32 {q8},[r14,:128]!
2099         add     r8,r8,r2
2100         vsli.32 d24,d4,#13
2101         eor     r2,r6,r7
2102         eor     r0,r5,r5,ror#5
2103         veor    d25,d25,d24
2104         add     r9,r9,r3
2105         and     r2,r2,r5
2106         vadd.i32        d5,d5,d25
2107         eor     r3,r0,r5,ror#19
2108         eor     r0,r9,r9,ror#11
2109         vadd.i32        q8,q8,q2
2110         eor     r2,r2,r7
2111         add     r8,r8,r3,ror#6
2112         eor     r3,r9,r10
2113         eor     r0,r0,r9,ror#20
2114         add     r8,r8,r2
2115         ldr     r2,[sp,#48]
2116         and     r12,r12,r3
2117         add     r4,r4,r8
2118         vst1.32 {q8},[r1,:128]!
2119         add     r8,r8,r0,ror#2
2120         eor     r12,r12,r10
2121         vext.8  q8,q3,q0,#4
2122         add     r7,r7,r2
2123         eor     r2,r5,r6
2124         eor     r0,r4,r4,ror#5
2125         vext.8  q9,q1,q2,#4
2126         add     r8,r8,r12
2127         and     r2,r2,r4
2128         eor     r12,r0,r4,ror#19
2129         vshr.u32        q10,q8,#7
2130         eor     r0,r8,r8,ror#11
2131         eor     r2,r2,r6
2132         vadd.i32        q3,q3,q9
2133         add     r7,r7,r12,ror#6
2134         eor     r12,r8,r9
2135         vshr.u32        q9,q8,#3
2136         eor     r0,r0,r8,ror#20
2137         add     r7,r7,r2
2138         vsli.32 q10,q8,#25
2139         ldr     r2,[sp,#52]
2140         and     r3,r3,r12
2141         vshr.u32        q11,q8,#18
2142         add     r11,r11,r7
2143         add     r7,r7,r0,ror#2
2144         eor     r3,r3,r9
2145         veor    q9,q9,q10
2146         add     r6,r6,r2
2147         vsli.32 q11,q8,#14
2148         eor     r2,r4,r5
2149         eor     r0,r11,r11,ror#5
2150         vshr.u32        d24,d5,#17
2151         add     r7,r7,r3
2152         and     r2,r2,r11
2153         veor    q9,q9,q11
2154         eor     r3,r0,r11,ror#19
2155         eor     r0,r7,r7,ror#11
2156         vsli.32 d24,d5,#15
2157         eor     r2,r2,r5
2158         add     r6,r6,r3,ror#6
2159         vshr.u32        d25,d5,#10
2160         eor     r3,r7,r8
2161         eor     r0,r0,r7,ror#20
2162         vadd.i32        q3,q3,q9
2163         add     r6,r6,r2
2164         ldr     r2,[sp,#56]
2165         veor    d25,d25,d24
2166         and     r12,r12,r3
2167         add     r10,r10,r6
2168         vshr.u32        d24,d5,#19
2169         add     r6,r6,r0,ror#2
2170         eor     r12,r12,r8
2171         vsli.32 d24,d5,#13
2172         add     r5,r5,r2
2173         eor     r2,r11,r4
2174         veor    d25,d25,d24
2175         eor     r0,r10,r10,ror#5
2176         add     r6,r6,r12
2177         vadd.i32        d6,d6,d25
2178         and     r2,r2,r10
2179         eor     r12,r0,r10,ror#19
2180         vshr.u32        d24,d6,#17
2181         eor     r0,r6,r6,ror#11
2182         eor     r2,r2,r4
2183         vsli.32 d24,d6,#15
2184         add     r5,r5,r12,ror#6
2185         eor     r12,r6,r7
2186         vshr.u32        d25,d6,#10
2187         eor     r0,r0,r6,ror#20
2188         add     r5,r5,r2
2189         veor    d25,d25,d24
2190         ldr     r2,[sp,#60]
2191         and     r3,r3,r12
2192         vshr.u32        d24,d6,#19
2193         add     r9,r9,r5
2194         add     r5,r5,r0,ror#2
2195         eor     r3,r3,r7
2196         vld1.32 {q8},[r14,:128]!
2197         add     r4,r4,r2
2198         vsli.32 d24,d6,#13
2199         eor     r2,r10,r11
2200         eor     r0,r9,r9,ror#5
2201         veor    d25,d25,d24
2202         add     r5,r5,r3
2203         and     r2,r2,r9
2204         vadd.i32        d7,d7,d25
2205         eor     r3,r0,r9,ror#19
2206         eor     r0,r5,r5,ror#11
2207         vadd.i32        q8,q8,q3
2208         eor     r2,r2,r11
2209         add     r4,r4,r3,ror#6
2210         eor     r3,r5,r6
2211         eor     r0,r0,r5,ror#20
2212         add     r4,r4,r2
2213         ldr     r2,[r14]
2214         and     r12,r12,r3
2215         add     r8,r8,r4
2216         vst1.32 {q8},[r1,:128]!
2217         add     r4,r4,r0,ror#2
2218         eor     r12,r12,r6
2219         teq     r2,#0                           @ check for K256 terminator
2220         ldr     r2,[sp,#0]
2221         sub     r1,r1,#64
2222         bne     .L_00_48
2224         ldr             r1,[sp,#68]
2225         ldr             r0,[sp,#72]
2226         sub             r14,r14,#256    @ rewind r14
2227         teq             r1,r0
2228         subeq           r1,r1,#64               @ avoid SEGV
2229         vld1.8          {q0},[r1]!              @ load next input block
2230         vld1.8          {q1},[r1]!
2231         vld1.8          {q2},[r1]!
2232         vld1.8          {q3},[r1]!
2233         strne           r1,[sp,#68]
2234         mov             r1,sp
2235         add     r11,r11,r2
2236         eor     r2,r9,r10
2237         eor     r0,r8,r8,ror#5
2238         add     r4,r4,r12
2239         vld1.32 {q8},[r14,:128]!
2240         and     r2,r2,r8
2241         eor     r12,r0,r8,ror#19
2242         eor     r0,r4,r4,ror#11
2243         eor     r2,r2,r10
2244         vrev32.8        q0,q0
2245         add     r11,r11,r12,ror#6
2246         eor     r12,r4,r5
2247         eor     r0,r0,r4,ror#20
2248         add     r11,r11,r2
2249         vadd.i32        q8,q8,q0
2250         ldr     r2,[sp,#4]
2251         and     r3,r3,r12
2252         add     r7,r7,r11
2253         add     r11,r11,r0,ror#2
2254         eor     r3,r3,r5
2255         add     r10,r10,r2
2256         eor     r2,r8,r9
2257         eor     r0,r7,r7,ror#5
2258         add     r11,r11,r3
2259         and     r2,r2,r7
2260         eor     r3,r0,r7,ror#19
2261         eor     r0,r11,r11,ror#11
2262         eor     r2,r2,r9
2263         add     r10,r10,r3,ror#6
2264         eor     r3,r11,r4
2265         eor     r0,r0,r11,ror#20
2266         add     r10,r10,r2
2267         ldr     r2,[sp,#8]
2268         and     r12,r12,r3
2269         add     r6,r6,r10
2270         add     r10,r10,r0,ror#2
2271         eor     r12,r12,r4
2272         add     r9,r9,r2
2273         eor     r2,r7,r8
2274         eor     r0,r6,r6,ror#5
2275         add     r10,r10,r12
2276         and     r2,r2,r6
2277         eor     r12,r0,r6,ror#19
2278         eor     r0,r10,r10,ror#11
2279         eor     r2,r2,r8
2280         add     r9,r9,r12,ror#6
2281         eor     r12,r10,r11
2282         eor     r0,r0,r10,ror#20
2283         add     r9,r9,r2
2284         ldr     r2,[sp,#12]
2285         and     r3,r3,r12
2286         add     r5,r5,r9
2287         add     r9,r9,r0,ror#2
2288         eor     r3,r3,r11
2289         add     r8,r8,r2
2290         eor     r2,r6,r7
2291         eor     r0,r5,r5,ror#5
2292         add     r9,r9,r3
2293         and     r2,r2,r5
2294         eor     r3,r0,r5,ror#19
2295         eor     r0,r9,r9,ror#11
2296         eor     r2,r2,r7
2297         add     r8,r8,r3,ror#6
2298         eor     r3,r9,r10
2299         eor     r0,r0,r9,ror#20
2300         add     r8,r8,r2
2301         ldr     r2,[sp,#16]
2302         and     r12,r12,r3
2303         add     r4,r4,r8
2304         add     r8,r8,r0,ror#2
2305         eor     r12,r12,r10
2306         vst1.32 {q8},[r1,:128]!
2307         add     r7,r7,r2
2308         eor     r2,r5,r6
2309         eor     r0,r4,r4,ror#5
2310         add     r8,r8,r12
2311         vld1.32 {q8},[r14,:128]!
2312         and     r2,r2,r4
2313         eor     r12,r0,r4,ror#19
2314         eor     r0,r8,r8,ror#11
2315         eor     r2,r2,r6
2316         vrev32.8        q1,q1
2317         add     r7,r7,r12,ror#6
2318         eor     r12,r8,r9
2319         eor     r0,r0,r8,ror#20
2320         add     r7,r7,r2
2321         vadd.i32        q8,q8,q1
2322         ldr     r2,[sp,#20]
2323         and     r3,r3,r12
2324         add     r11,r11,r7
2325         add     r7,r7,r0,ror#2
2326         eor     r3,r3,r9
2327         add     r6,r6,r2
2328         eor     r2,r4,r5
2329         eor     r0,r11,r11,ror#5
2330         add     r7,r7,r3
2331         and     r2,r2,r11
2332         eor     r3,r0,r11,ror#19
2333         eor     r0,r7,r7,ror#11
2334         eor     r2,r2,r5
2335         add     r6,r6,r3,ror#6
2336         eor     r3,r7,r8
2337         eor     r0,r0,r7,ror#20
2338         add     r6,r6,r2
2339         ldr     r2,[sp,#24]
2340         and     r12,r12,r3
2341         add     r10,r10,r6
2342         add     r6,r6,r0,ror#2
2343         eor     r12,r12,r8
2344         add     r5,r5,r2
2345         eor     r2,r11,r4
2346         eor     r0,r10,r10,ror#5
2347         add     r6,r6,r12
2348         and     r2,r2,r10
2349         eor     r12,r0,r10,ror#19
2350         eor     r0,r6,r6,ror#11
2351         eor     r2,r2,r4
2352         add     r5,r5,r12,ror#6
2353         eor     r12,r6,r7
2354         eor     r0,r0,r6,ror#20
2355         add     r5,r5,r2
2356         ldr     r2,[sp,#28]
2357         and     r3,r3,r12
2358         add     r9,r9,r5
2359         add     r5,r5,r0,ror#2
2360         eor     r3,r3,r7
2361         add     r4,r4,r2
2362         eor     r2,r10,r11
2363         eor     r0,r9,r9,ror#5
2364         add     r5,r5,r3
2365         and     r2,r2,r9
2366         eor     r3,r0,r9,ror#19
2367         eor     r0,r5,r5,ror#11
2368         eor     r2,r2,r11
2369         add     r4,r4,r3,ror#6
2370         eor     r3,r5,r6
2371         eor     r0,r0,r5,ror#20
2372         add     r4,r4,r2
2373         ldr     r2,[sp,#32]
2374         and     r12,r12,r3
2375         add     r8,r8,r4
2376         add     r4,r4,r0,ror#2
2377         eor     r12,r12,r6
2378         vst1.32 {q8},[r1,:128]!
2379         add     r11,r11,r2
2380         eor     r2,r9,r10
2381         eor     r0,r8,r8,ror#5
2382         add     r4,r4,r12
2383         vld1.32 {q8},[r14,:128]!
2384         and     r2,r2,r8
2385         eor     r12,r0,r8,ror#19
2386         eor     r0,r4,r4,ror#11
2387         eor     r2,r2,r10
2388         vrev32.8        q2,q2
2389         add     r11,r11,r12,ror#6
2390         eor     r12,r4,r5
2391         eor     r0,r0,r4,ror#20
2392         add     r11,r11,r2
2393         vadd.i32        q8,q8,q2
2394         ldr     r2,[sp,#36]
2395         and     r3,r3,r12
2396         add     r7,r7,r11
2397         add     r11,r11,r0,ror#2
2398         eor     r3,r3,r5
2399         add     r10,r10,r2
2400         eor     r2,r8,r9
2401         eor     r0,r7,r7,ror#5
2402         add     r11,r11,r3
2403         and     r2,r2,r7
2404         eor     r3,r0,r7,ror#19
2405         eor     r0,r11,r11,ror#11
2406         eor     r2,r2,r9
2407         add     r10,r10,r3,ror#6
2408         eor     r3,r11,r4
2409         eor     r0,r0,r11,ror#20
2410         add     r10,r10,r2
2411         ldr     r2,[sp,#40]
2412         and     r12,r12,r3
2413         add     r6,r6,r10
2414         add     r10,r10,r0,ror#2
2415         eor     r12,r12,r4
2416         add     r9,r9,r2
2417         eor     r2,r7,r8
2418         eor     r0,r6,r6,ror#5
2419         add     r10,r10,r12
2420         and     r2,r2,r6
2421         eor     r12,r0,r6,ror#19
2422         eor     r0,r10,r10,ror#11
2423         eor     r2,r2,r8
2424         add     r9,r9,r12,ror#6
2425         eor     r12,r10,r11
2426         eor     r0,r0,r10,ror#20
2427         add     r9,r9,r2
2428         ldr     r2,[sp,#44]
2429         and     r3,r3,r12
2430         add     r5,r5,r9
2431         add     r9,r9,r0,ror#2
2432         eor     r3,r3,r11
2433         add     r8,r8,r2
2434         eor     r2,r6,r7
2435         eor     r0,r5,r5,ror#5
2436         add     r9,r9,r3
2437         and     r2,r2,r5
2438         eor     r3,r0,r5,ror#19
2439         eor     r0,r9,r9,ror#11
2440         eor     r2,r2,r7
2441         add     r8,r8,r3,ror#6
2442         eor     r3,r9,r10
2443         eor     r0,r0,r9,ror#20
2444         add     r8,r8,r2
2445         ldr     r2,[sp,#48]
2446         and     r12,r12,r3
2447         add     r4,r4,r8
2448         add     r8,r8,r0,ror#2
2449         eor     r12,r12,r10
2450         vst1.32 {q8},[r1,:128]!
2451         add     r7,r7,r2
2452         eor     r2,r5,r6
2453         eor     r0,r4,r4,ror#5
2454         add     r8,r8,r12
2455         vld1.32 {q8},[r14,:128]!
2456         and     r2,r2,r4
2457         eor     r12,r0,r4,ror#19
2458         eor     r0,r8,r8,ror#11
2459         eor     r2,r2,r6
2460         vrev32.8        q3,q3
2461         add     r7,r7,r12,ror#6
2462         eor     r12,r8,r9
2463         eor     r0,r0,r8,ror#20
2464         add     r7,r7,r2
2465         vadd.i32        q8,q8,q3
2466         ldr     r2,[sp,#52]
2467         and     r3,r3,r12
2468         add     r11,r11,r7
2469         add     r7,r7,r0,ror#2
2470         eor     r3,r3,r9
2471         add     r6,r6,r2
2472         eor     r2,r4,r5
2473         eor     r0,r11,r11,ror#5
2474         add     r7,r7,r3
2475         and     r2,r2,r11
2476         eor     r3,r0,r11,ror#19
2477         eor     r0,r7,r7,ror#11
2478         eor     r2,r2,r5
2479         add     r6,r6,r3,ror#6
2480         eor     r3,r7,r8
2481         eor     r0,r0,r7,ror#20
2482         add     r6,r6,r2
2483         ldr     r2,[sp,#56]
2484         and     r12,r12,r3
2485         add     r10,r10,r6
2486         add     r6,r6,r0,ror#2
2487         eor     r12,r12,r8
2488         add     r5,r5,r2
2489         eor     r2,r11,r4
2490         eor     r0,r10,r10,ror#5
2491         add     r6,r6,r12
2492         and     r2,r2,r10
2493         eor     r12,r0,r10,ror#19
2494         eor     r0,r6,r6,ror#11
2495         eor     r2,r2,r4
2496         add     r5,r5,r12,ror#6
2497         eor     r12,r6,r7
2498         eor     r0,r0,r6,ror#20
2499         add     r5,r5,r2
2500         ldr     r2,[sp,#60]
2501         and     r3,r3,r12
2502         add     r9,r9,r5
2503         add     r5,r5,r0,ror#2
2504         eor     r3,r3,r7
2505         add     r4,r4,r2
2506         eor     r2,r10,r11
2507         eor     r0,r9,r9,ror#5
2508         add     r5,r5,r3
2509         and     r2,r2,r9
2510         eor     r3,r0,r9,ror#19
2511         eor     r0,r5,r5,ror#11
2512         eor     r2,r2,r11
2513         add     r4,r4,r3,ror#6
2514         eor     r3,r5,r6
2515         eor     r0,r0,r5,ror#20
2516         add     r4,r4,r2
2517         ldr     r2,[sp,#64]
2518         and     r12,r12,r3
2519         add     r8,r8,r4
2520         add     r4,r4,r0,ror#2
2521         eor     r12,r12,r6
2522         vst1.32 {q8},[r1,:128]!
2523         ldr     r0,[r2,#0]
2524         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
2525         ldr     r12,[r2,#4]
2526         ldr     r3,[r2,#8]
2527         ldr     r1,[r2,#12]
2528         add     r4,r4,r0                        @ accumulate
2529         ldr     r0,[r2,#16]
2530         add     r5,r5,r12
2531         ldr     r12,[r2,#20]
2532         add     r6,r6,r3
2533         ldr     r3,[r2,#24]
2534         add     r7,r7,r1
2535         ldr     r1,[r2,#28]
2536         add     r8,r8,r0
2537         str     r4,[r2],#4
2538         add     r9,r9,r12
2539         str     r5,[r2],#4
2540         add     r10,r10,r3
2541         str     r6,[r2],#4
2542         add     r11,r11,r1
2543         str     r7,[r2],#4
2544         stmia   r2,{r8-r11}
2546         movne   r1,sp
2547         ldrne   r2,[sp,#0]
2548         eorne   r12,r12,r12
2549         ldreq   sp,[sp,#76]                     @ restore original sp
2550         eorne   r3,r5,r6
2551         bne     .L_00_48
2553         ldmia   sp!,{r4-r12,pc}
2554 .size   sha256_block_data_order_neon,.-sha256_block_data_order_neon
2555 #endif
2556 #if __ARM_MAX_ARCH__>=7
2557 .type   sha256_block_data_order_armv8,%function
2558 .align  5
2559 sha256_block_data_order_armv8:
2560 .LARMv8:
2561         vld1.32 {q0,q1},[r0]
2562         sub     r3,r3,#sha256_block_data_order-K256
2564 .Loop_v8:
2565         vld1.8          {q8-q9},[r1]!
2566         vld1.8          {q10-q11},[r1]!
2567         vld1.32         {q12},[r3]!
2568         vrev32.8        q8,q8
2569         vrev32.8        q9,q9
2570         vrev32.8        q10,q10
2571         vrev32.8        q11,q11
2572         vmov            q14,q0  @ offload
2573         vmov            q15,q1
2574         teq             r1,r2
2575         vld1.32         {q13},[r3]!
2576         vadd.i32        q12,q12,q8
2577         .inst   0xf3fa03e2      @ sha256su0 q8,q9
2578         vmov            q2,q0
2579         .inst   0xf3020c68      @ sha256h q0,q1,q12
2580         .inst   0xf3142c68      @ sha256h2 q1,q2,q12
2581         .inst   0xf3640ce6      @ sha256su1 q8,q10,q11
2582         vld1.32         {q12},[r3]!
2583         vadd.i32        q13,q13,q9
2584         .inst   0xf3fa23e4      @ sha256su0 q9,q10
2585         vmov            q2,q0
2586         .inst   0xf3020c6a      @ sha256h q0,q1,q13
2587         .inst   0xf3142c6a      @ sha256h2 q1,q2,q13
2588         .inst   0xf3662ce0      @ sha256su1 q9,q11,q8
2589         vld1.32         {q13},[r3]!
2590         vadd.i32        q12,q12,q10
2591         .inst   0xf3fa43e6      @ sha256su0 q10,q11
2592         vmov            q2,q0
2593         .inst   0xf3020c68      @ sha256h q0,q1,q12
2594         .inst   0xf3142c68      @ sha256h2 q1,q2,q12
2595         .inst   0xf3604ce2      @ sha256su1 q10,q8,q9
2596         vld1.32         {q12},[r3]!
2597         vadd.i32        q13,q13,q11
2598         .inst   0xf3fa63e0      @ sha256su0 q11,q8
2599         vmov            q2,q0
2600         .inst   0xf3020c6a      @ sha256h q0,q1,q13
2601         .inst   0xf3142c6a      @ sha256h2 q1,q2,q13
2602         .inst   0xf3626ce4      @ sha256su1 q11,q9,q10
2603         vld1.32         {q13},[r3]!
2604         vadd.i32        q12,q12,q8
2605         .inst   0xf3fa03e2      @ sha256su0 q8,q9
2606         vmov            q2,q0
2607         .inst   0xf3020c68      @ sha256h q0,q1,q12
2608         .inst   0xf3142c68      @ sha256h2 q1,q2,q12
2609         .inst   0xf3640ce6      @ sha256su1 q8,q10,q11
2610         vld1.32         {q12},[r3]!
2611         vadd.i32        q13,q13,q9
2612         .inst   0xf3fa23e4      @ sha256su0 q9,q10
2613         vmov            q2,q0
2614         .inst   0xf3020c6a      @ sha256h q0,q1,q13
2615         .inst   0xf3142c6a      @ sha256h2 q1,q2,q13
2616         .inst   0xf3662ce0      @ sha256su1 q9,q11,q8
2617         vld1.32         {q13},[r3]!
2618         vadd.i32        q12,q12,q10
2619         .inst   0xf3fa43e6      @ sha256su0 q10,q11
2620         vmov            q2,q0
2621         .inst   0xf3020c68      @ sha256h q0,q1,q12
2622         .inst   0xf3142c68      @ sha256h2 q1,q2,q12
2623         .inst   0xf3604ce2      @ sha256su1 q10,q8,q9
2624         vld1.32         {q12},[r3]!
2625         vadd.i32        q13,q13,q11
2626         .inst   0xf3fa63e0      @ sha256su0 q11,q8
2627         vmov            q2,q0
2628         .inst   0xf3020c6a      @ sha256h q0,q1,q13
2629         .inst   0xf3142c6a      @ sha256h2 q1,q2,q13
2630         .inst   0xf3626ce4      @ sha256su1 q11,q9,q10
2631         vld1.32         {q13},[r3]!
2632         vadd.i32        q12,q12,q8
2633         .inst   0xf3fa03e2      @ sha256su0 q8,q9
2634         vmov            q2,q0
2635         .inst   0xf3020c68      @ sha256h q0,q1,q12
2636         .inst   0xf3142c68      @ sha256h2 q1,q2,q12
2637         .inst   0xf3640ce6      @ sha256su1 q8,q10,q11
2638         vld1.32         {q12},[r3]!
2639         vadd.i32        q13,q13,q9
2640         .inst   0xf3fa23e4      @ sha256su0 q9,q10
2641         vmov            q2,q0
2642         .inst   0xf3020c6a      @ sha256h q0,q1,q13
2643         .inst   0xf3142c6a      @ sha256h2 q1,q2,q13
2644         .inst   0xf3662ce0      @ sha256su1 q9,q11,q8
2645         vld1.32         {q13},[r3]!
2646         vadd.i32        q12,q12,q10
2647         .inst   0xf3fa43e6      @ sha256su0 q10,q11
2648         vmov            q2,q0
2649         .inst   0xf3020c68      @ sha256h q0,q1,q12
2650         .inst   0xf3142c68      @ sha256h2 q1,q2,q12
2651         .inst   0xf3604ce2      @ sha256su1 q10,q8,q9
2652         vld1.32         {q12},[r3]!
2653         vadd.i32        q13,q13,q11
2654         .inst   0xf3fa63e0      @ sha256su0 q11,q8
2655         vmov            q2,q0
2656         .inst   0xf3020c6a      @ sha256h q0,q1,q13
2657         .inst   0xf3142c6a      @ sha256h2 q1,q2,q13
2658         .inst   0xf3626ce4      @ sha256su1 q11,q9,q10
2659         vld1.32         {q13},[r3]!
2660         vadd.i32        q12,q12,q8
2661         vmov            q2,q0
2662         .inst   0xf3020c68      @ sha256h q0,q1,q12
2663         .inst   0xf3142c68      @ sha256h2 q1,q2,q12
2665         vld1.32         {q12},[r3]!
2666         vadd.i32        q13,q13,q9
2667         vmov            q2,q0
2668         .inst   0xf3020c6a      @ sha256h q0,q1,q13
2669         .inst   0xf3142c6a      @ sha256h2 q1,q2,q13
2671         vld1.32         {q13},[r3]
2672         vadd.i32        q12,q12,q10
2673         sub             r3,r3,#256-16   @ rewind
2674         vmov            q2,q0
2675         .inst   0xf3020c68      @ sha256h q0,q1,q12
2676         .inst   0xf3142c68      @ sha256h2 q1,q2,q12
2678         vadd.i32        q13,q13,q11
2679         vmov            q2,q0
2680         .inst   0xf3020c6a      @ sha256h q0,q1,q13
2681         .inst   0xf3142c6a      @ sha256h2 q1,q2,q13
2683         vadd.i32        q0,q0,q14
2684         vadd.i32        q1,q1,q15
2685         bne             .Loop_v8
2687         vst1.32         {q0,q1},[r0]
2689         RET             @ bx lr
2690 .size   sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2691 #endif
2692 .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2693 .align  2
2694 #if __ARM_MAX_ARCH__>=7
2695 .comm   OPENSSL_armcap_P,4,4
2696 #endif