Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / boringssl / linux-arm / crypto / sha / sha256-armv4.S
blob9fc3e0b8b684213ef1485d07bd5df5d8ed61cb79
1 #if defined(__arm__)
3 @ ====================================================================
4 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 @ project. The module is, however, dual licensed under OpenSSL and
6 @ CRYPTOGAMS licenses depending on where you obtain it. For further
7 @ details see http://www.openssl.org/~appro/cryptogams/.
9 @ Permission to use under GPL terms is granted.
10 @ ====================================================================
12 @ SHA256 block procedure for ARMv4. May 2007.
14 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
15 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
16 @ byte [on single-issue Xscale PXA250 core].
18 @ July 2010.
20 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
21 @ Cortex A8 core and ~20 cycles per processed byte.
23 @ February 2011.
25 @ Profiler-assisted and platform-specific optimization resulted in 16%
26 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
28 @ September 2013.
30 @ Add NEON implementation. On Cortex A8 it was measured to process one
31 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
32 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
33 @ code (meaning that latter performs sub-optimally, nothing was done
34 @ about it).
36 @ May 2014.
38 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
40 #ifndef __KERNEL__
41 # include <openssl/arm_arch.h>
42 #else
43 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
44 # define __ARM_MAX_ARCH__ 7
45 #endif
47 .text
48 #if __ARM_ARCH__<7
49 .code   32
50 #else
51 .syntax unified
52 # if defined(__thumb2__) && !defined(__APPLE__)
53 #  define adrl adr
54 .thumb
55 # else
56 .code   32
57 # endif
58 #endif
60 .type   K256,%object
61 .align  5
62 K256:
63 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
64 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
65 .word   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
66 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
67 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
68 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
69 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
70 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
71 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
72 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
73 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
74 .word   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
75 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
76 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
77 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
78 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
79 .size   K256,.-K256
80 .word   0                               @ terminator
81 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
82 .LOPENSSL_armcap:
83 .word   OPENSSL_armcap_P-.Lsha256_block_data_order
84 #endif
85 .align  5
87 .globl  sha256_block_data_order
88 .type   sha256_block_data_order,%function
89 sha256_block_data_order:
90 .Lsha256_block_data_order:
91 #if __ARM_ARCH__<7
92         sub     r3,pc,#8                @ sha256_block_data_order
93 #else
94         adr     r3,sha256_block_data_order
95 #endif
96 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
97         ldr     r12,.LOPENSSL_armcap
98         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
99 #ifdef  __APPLE__
100         ldr     r12,[r12]
101 #endif
102         tst     r12,#ARMV8_SHA256
103         bne     .LARMv8
104         tst     r12,#ARMV7_NEON
105         bne     .LNEON
106 #endif
107         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
108         stmdb   sp!,{r0,r1,r2,r4-r11,lr}
109         ldmia   r0,{r4,r5,r6,r7,r8,r9,r10,r11}
110         sub     r14,r3,#256+32  @ K256
111         sub     sp,sp,#16*4             @ alloca(X[16])
112 .Loop:
113 # if __ARM_ARCH__>=7
114         ldr     r2,[r1],#4
115 # else
116         ldrb    r2,[r1,#3]
117 # endif
118         eor     r3,r5,r6                @ magic
119         eor     r12,r12,r12
120 #if __ARM_ARCH__>=7
121         @ ldr   r2,[r1],#4                      @ 0
122 # if 0==15
123         str     r1,[sp,#17*4]                   @ make room for r1
124 # endif
125         eor     r0,r8,r8,ror#5
126         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
127         eor     r0,r0,r8,ror#19 @ Sigma1(e)
128 # ifndef __ARMEB__
129         rev     r2,r2
130 # endif
131 #else
132         @ ldrb  r2,[r1,#3]                      @ 0
133         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
134         ldrb    r12,[r1,#2]
135         ldrb    r0,[r1,#1]
136         orr     r2,r2,r12,lsl#8
137         ldrb    r12,[r1],#4
138         orr     r2,r2,r0,lsl#16
139 # if 0==15
140         str     r1,[sp,#17*4]                   @ make room for r1
141 # endif
142         eor     r0,r8,r8,ror#5
143         orr     r2,r2,r12,lsl#24
144         eor     r0,r0,r8,ror#19 @ Sigma1(e)
145 #endif
146         ldr     r12,[r14],#4                    @ *K256++
147         add     r11,r11,r2                      @ h+=X[i]
148         str     r2,[sp,#0*4]
149         eor     r2,r9,r10
150         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
151         and     r2,r2,r8
152         add     r11,r11,r12                     @ h+=K256[i]
153         eor     r2,r2,r10                       @ Ch(e,f,g)
154         eor     r0,r4,r4,ror#11
155         add     r11,r11,r2                      @ h+=Ch(e,f,g)
156 #if 0==31
157         and     r12,r12,#0xff
158         cmp     r12,#0xf2                       @ done?
159 #endif
160 #if 0<15
161 # if __ARM_ARCH__>=7
162         ldr     r2,[r1],#4                      @ prefetch
163 # else
164         ldrb    r2,[r1,#3]
165 # endif
166         eor     r12,r4,r5                       @ a^b, b^c in next round
167 #else
168         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
169         eor     r12,r4,r5                       @ a^b, b^c in next round
170         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
171 #endif
172         eor     r0,r0,r4,ror#20 @ Sigma0(a)
173         and     r3,r3,r12                       @ (b^c)&=(a^b)
174         add     r7,r7,r11                       @ d+=h
175         eor     r3,r3,r5                        @ Maj(a,b,c)
176         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
177         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
178 #if __ARM_ARCH__>=7
179         @ ldr   r2,[r1],#4                      @ 1
180 # if 1==15
181         str     r1,[sp,#17*4]                   @ make room for r1
182 # endif
183         eor     r0,r7,r7,ror#5
184         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
185         eor     r0,r0,r7,ror#19 @ Sigma1(e)
186 # ifndef __ARMEB__
187         rev     r2,r2
188 # endif
189 #else
190         @ ldrb  r2,[r1,#3]                      @ 1
191         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
192         ldrb    r3,[r1,#2]
193         ldrb    r0,[r1,#1]
194         orr     r2,r2,r3,lsl#8
195         ldrb    r3,[r1],#4
196         orr     r2,r2,r0,lsl#16
197 # if 1==15
198         str     r1,[sp,#17*4]                   @ make room for r1
199 # endif
200         eor     r0,r7,r7,ror#5
201         orr     r2,r2,r3,lsl#24
202         eor     r0,r0,r7,ror#19 @ Sigma1(e)
203 #endif
204         ldr     r3,[r14],#4                     @ *K256++
205         add     r10,r10,r2                      @ h+=X[i]
206         str     r2,[sp,#1*4]
207         eor     r2,r8,r9
208         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
209         and     r2,r2,r7
210         add     r10,r10,r3                      @ h+=K256[i]
211         eor     r2,r2,r9                        @ Ch(e,f,g)
212         eor     r0,r11,r11,ror#11
213         add     r10,r10,r2                      @ h+=Ch(e,f,g)
214 #if 1==31
215         and     r3,r3,#0xff
216         cmp     r3,#0xf2                        @ done?
217 #endif
218 #if 1<15
219 # if __ARM_ARCH__>=7
220         ldr     r2,[r1],#4                      @ prefetch
221 # else
222         ldrb    r2,[r1,#3]
223 # endif
224         eor     r3,r11,r4                       @ a^b, b^c in next round
225 #else
226         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
227         eor     r3,r11,r4                       @ a^b, b^c in next round
228         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
229 #endif
230         eor     r0,r0,r11,ror#20        @ Sigma0(a)
231         and     r12,r12,r3                      @ (b^c)&=(a^b)
232         add     r6,r6,r10                       @ d+=h
233         eor     r12,r12,r4                      @ Maj(a,b,c)
234         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
235         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
236 #if __ARM_ARCH__>=7
237         @ ldr   r2,[r1],#4                      @ 2
238 # if 2==15
239         str     r1,[sp,#17*4]                   @ make room for r1
240 # endif
241         eor     r0,r6,r6,ror#5
242         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
243         eor     r0,r0,r6,ror#19 @ Sigma1(e)
244 # ifndef __ARMEB__
245         rev     r2,r2
246 # endif
247 #else
248         @ ldrb  r2,[r1,#3]                      @ 2
249         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
250         ldrb    r12,[r1,#2]
251         ldrb    r0,[r1,#1]
252         orr     r2,r2,r12,lsl#8
253         ldrb    r12,[r1],#4
254         orr     r2,r2,r0,lsl#16
255 # if 2==15
256         str     r1,[sp,#17*4]                   @ make room for r1
257 # endif
258         eor     r0,r6,r6,ror#5
259         orr     r2,r2,r12,lsl#24
260         eor     r0,r0,r6,ror#19 @ Sigma1(e)
261 #endif
262         ldr     r12,[r14],#4                    @ *K256++
263         add     r9,r9,r2                        @ h+=X[i]
264         str     r2,[sp,#2*4]
265         eor     r2,r7,r8
266         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
267         and     r2,r2,r6
268         add     r9,r9,r12                       @ h+=K256[i]
269         eor     r2,r2,r8                        @ Ch(e,f,g)
270         eor     r0,r10,r10,ror#11
271         add     r9,r9,r2                        @ h+=Ch(e,f,g)
272 #if 2==31
273         and     r12,r12,#0xff
274         cmp     r12,#0xf2                       @ done?
275 #endif
276 #if 2<15
277 # if __ARM_ARCH__>=7
278         ldr     r2,[r1],#4                      @ prefetch
279 # else
280         ldrb    r2,[r1,#3]
281 # endif
282         eor     r12,r10,r11                     @ a^b, b^c in next round
283 #else
284         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
285         eor     r12,r10,r11                     @ a^b, b^c in next round
286         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
287 #endif
288         eor     r0,r0,r10,ror#20        @ Sigma0(a)
289         and     r3,r3,r12                       @ (b^c)&=(a^b)
290         add     r5,r5,r9                        @ d+=h
291         eor     r3,r3,r11                       @ Maj(a,b,c)
292         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
293         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
294 #if __ARM_ARCH__>=7
295         @ ldr   r2,[r1],#4                      @ 3
296 # if 3==15
297         str     r1,[sp,#17*4]                   @ make room for r1
298 # endif
299         eor     r0,r5,r5,ror#5
300         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
301         eor     r0,r0,r5,ror#19 @ Sigma1(e)
302 # ifndef __ARMEB__
303         rev     r2,r2
304 # endif
305 #else
306         @ ldrb  r2,[r1,#3]                      @ 3
307         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
308         ldrb    r3,[r1,#2]
309         ldrb    r0,[r1,#1]
310         orr     r2,r2,r3,lsl#8
311         ldrb    r3,[r1],#4
312         orr     r2,r2,r0,lsl#16
313 # if 3==15
314         str     r1,[sp,#17*4]                   @ make room for r1
315 # endif
316         eor     r0,r5,r5,ror#5
317         orr     r2,r2,r3,lsl#24
318         eor     r0,r0,r5,ror#19 @ Sigma1(e)
319 #endif
320         ldr     r3,[r14],#4                     @ *K256++
321         add     r8,r8,r2                        @ h+=X[i]
322         str     r2,[sp,#3*4]
323         eor     r2,r6,r7
324         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
325         and     r2,r2,r5
326         add     r8,r8,r3                        @ h+=K256[i]
327         eor     r2,r2,r7                        @ Ch(e,f,g)
328         eor     r0,r9,r9,ror#11
329         add     r8,r8,r2                        @ h+=Ch(e,f,g)
330 #if 3==31
331         and     r3,r3,#0xff
332         cmp     r3,#0xf2                        @ done?
333 #endif
334 #if 3<15
335 # if __ARM_ARCH__>=7
336         ldr     r2,[r1],#4                      @ prefetch
337 # else
338         ldrb    r2,[r1,#3]
339 # endif
340         eor     r3,r9,r10                       @ a^b, b^c in next round
341 #else
342         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
343         eor     r3,r9,r10                       @ a^b, b^c in next round
344         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
345 #endif
346         eor     r0,r0,r9,ror#20 @ Sigma0(a)
347         and     r12,r12,r3                      @ (b^c)&=(a^b)
348         add     r4,r4,r8                        @ d+=h
349         eor     r12,r12,r10                     @ Maj(a,b,c)
350         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
351         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
352 #if __ARM_ARCH__>=7
353         @ ldr   r2,[r1],#4                      @ 4
354 # if 4==15
355         str     r1,[sp,#17*4]                   @ make room for r1
356 # endif
357         eor     r0,r4,r4,ror#5
358         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
359         eor     r0,r0,r4,ror#19 @ Sigma1(e)
360 # ifndef __ARMEB__
361         rev     r2,r2
362 # endif
363 #else
364         @ ldrb  r2,[r1,#3]                      @ 4
365         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
366         ldrb    r12,[r1,#2]
367         ldrb    r0,[r1,#1]
368         orr     r2,r2,r12,lsl#8
369         ldrb    r12,[r1],#4
370         orr     r2,r2,r0,lsl#16
371 # if 4==15
372         str     r1,[sp,#17*4]                   @ make room for r1
373 # endif
374         eor     r0,r4,r4,ror#5
375         orr     r2,r2,r12,lsl#24
376         eor     r0,r0,r4,ror#19 @ Sigma1(e)
377 #endif
378         ldr     r12,[r14],#4                    @ *K256++
379         add     r7,r7,r2                        @ h+=X[i]
380         str     r2,[sp,#4*4]
381         eor     r2,r5,r6
382         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
383         and     r2,r2,r4
384         add     r7,r7,r12                       @ h+=K256[i]
385         eor     r2,r2,r6                        @ Ch(e,f,g)
386         eor     r0,r8,r8,ror#11
387         add     r7,r7,r2                        @ h+=Ch(e,f,g)
388 #if 4==31
389         and     r12,r12,#0xff
390         cmp     r12,#0xf2                       @ done?
391 #endif
392 #if 4<15
393 # if __ARM_ARCH__>=7
394         ldr     r2,[r1],#4                      @ prefetch
395 # else
396         ldrb    r2,[r1,#3]
397 # endif
398         eor     r12,r8,r9                       @ a^b, b^c in next round
399 #else
400         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
401         eor     r12,r8,r9                       @ a^b, b^c in next round
402         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
403 #endif
404         eor     r0,r0,r8,ror#20 @ Sigma0(a)
405         and     r3,r3,r12                       @ (b^c)&=(a^b)
406         add     r11,r11,r7                      @ d+=h
407         eor     r3,r3,r9                        @ Maj(a,b,c)
408         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
409         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
410 #if __ARM_ARCH__>=7
411         @ ldr   r2,[r1],#4                      @ 5
412 # if 5==15
413         str     r1,[sp,#17*4]                   @ make room for r1
414 # endif
415         eor     r0,r11,r11,ror#5
416         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
417         eor     r0,r0,r11,ror#19        @ Sigma1(e)
418 # ifndef __ARMEB__
419         rev     r2,r2
420 # endif
421 #else
422         @ ldrb  r2,[r1,#3]                      @ 5
423         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
424         ldrb    r3,[r1,#2]
425         ldrb    r0,[r1,#1]
426         orr     r2,r2,r3,lsl#8
427         ldrb    r3,[r1],#4
428         orr     r2,r2,r0,lsl#16
429 # if 5==15
430         str     r1,[sp,#17*4]                   @ make room for r1
431 # endif
432         eor     r0,r11,r11,ror#5
433         orr     r2,r2,r3,lsl#24
434         eor     r0,r0,r11,ror#19        @ Sigma1(e)
435 #endif
436         ldr     r3,[r14],#4                     @ *K256++
437         add     r6,r6,r2                        @ h+=X[i]
438         str     r2,[sp,#5*4]
439         eor     r2,r4,r5
440         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
441         and     r2,r2,r11
442         add     r6,r6,r3                        @ h+=K256[i]
443         eor     r2,r2,r5                        @ Ch(e,f,g)
444         eor     r0,r7,r7,ror#11
445         add     r6,r6,r2                        @ h+=Ch(e,f,g)
446 #if 5==31
447         and     r3,r3,#0xff
448         cmp     r3,#0xf2                        @ done?
449 #endif
450 #if 5<15
451 # if __ARM_ARCH__>=7
452         ldr     r2,[r1],#4                      @ prefetch
453 # else
454         ldrb    r2,[r1,#3]
455 # endif
456         eor     r3,r7,r8                        @ a^b, b^c in next round
457 #else
458         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
459         eor     r3,r7,r8                        @ a^b, b^c in next round
460         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
461 #endif
462         eor     r0,r0,r7,ror#20 @ Sigma0(a)
463         and     r12,r12,r3                      @ (b^c)&=(a^b)
464         add     r10,r10,r6                      @ d+=h
465         eor     r12,r12,r8                      @ Maj(a,b,c)
466         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
467         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
468 #if __ARM_ARCH__>=7
469         @ ldr   r2,[r1],#4                      @ 6
470 # if 6==15
471         str     r1,[sp,#17*4]                   @ make room for r1
472 # endif
473         eor     r0,r10,r10,ror#5
474         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
475         eor     r0,r0,r10,ror#19        @ Sigma1(e)
476 # ifndef __ARMEB__
477         rev     r2,r2
478 # endif
479 #else
480         @ ldrb  r2,[r1,#3]                      @ 6
481         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
482         ldrb    r12,[r1,#2]
483         ldrb    r0,[r1,#1]
484         orr     r2,r2,r12,lsl#8
485         ldrb    r12,[r1],#4
486         orr     r2,r2,r0,lsl#16
487 # if 6==15
488         str     r1,[sp,#17*4]                   @ make room for r1
489 # endif
490         eor     r0,r10,r10,ror#5
491         orr     r2,r2,r12,lsl#24
492         eor     r0,r0,r10,ror#19        @ Sigma1(e)
493 #endif
494         ldr     r12,[r14],#4                    @ *K256++
495         add     r5,r5,r2                        @ h+=X[i]
496         str     r2,[sp,#6*4]
497         eor     r2,r11,r4
498         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
499         and     r2,r2,r10
500         add     r5,r5,r12                       @ h+=K256[i]
501         eor     r2,r2,r4                        @ Ch(e,f,g)
502         eor     r0,r6,r6,ror#11
503         add     r5,r5,r2                        @ h+=Ch(e,f,g)
504 #if 6==31
505         and     r12,r12,#0xff
506         cmp     r12,#0xf2                       @ done?
507 #endif
508 #if 6<15
509 # if __ARM_ARCH__>=7
510         ldr     r2,[r1],#4                      @ prefetch
511 # else
512         ldrb    r2,[r1,#3]
513 # endif
514         eor     r12,r6,r7                       @ a^b, b^c in next round
515 #else
516         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
517         eor     r12,r6,r7                       @ a^b, b^c in next round
518         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
519 #endif
520         eor     r0,r0,r6,ror#20 @ Sigma0(a)
521         and     r3,r3,r12                       @ (b^c)&=(a^b)
522         add     r9,r9,r5                        @ d+=h
523         eor     r3,r3,r7                        @ Maj(a,b,c)
524         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
525         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
526 #if __ARM_ARCH__>=7
527         @ ldr   r2,[r1],#4                      @ 7
528 # if 7==15
529         str     r1,[sp,#17*4]                   @ make room for r1
530 # endif
531         eor     r0,r9,r9,ror#5
532         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
533         eor     r0,r0,r9,ror#19 @ Sigma1(e)
534 # ifndef __ARMEB__
535         rev     r2,r2
536 # endif
537 #else
538         @ ldrb  r2,[r1,#3]                      @ 7
539         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
540         ldrb    r3,[r1,#2]
541         ldrb    r0,[r1,#1]
542         orr     r2,r2,r3,lsl#8
543         ldrb    r3,[r1],#4
544         orr     r2,r2,r0,lsl#16
545 # if 7==15
546         str     r1,[sp,#17*4]                   @ make room for r1
547 # endif
548         eor     r0,r9,r9,ror#5
549         orr     r2,r2,r3,lsl#24
550         eor     r0,r0,r9,ror#19 @ Sigma1(e)
551 #endif
552         ldr     r3,[r14],#4                     @ *K256++
553         add     r4,r4,r2                        @ h+=X[i]
554         str     r2,[sp,#7*4]
555         eor     r2,r10,r11
556         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
557         and     r2,r2,r9
558         add     r4,r4,r3                        @ h+=K256[i]
559         eor     r2,r2,r11                       @ Ch(e,f,g)
560         eor     r0,r5,r5,ror#11
561         add     r4,r4,r2                        @ h+=Ch(e,f,g)
562 #if 7==31
563         and     r3,r3,#0xff
564         cmp     r3,#0xf2                        @ done?
565 #endif
566 #if 7<15
567 # if __ARM_ARCH__>=7
568         ldr     r2,[r1],#4                      @ prefetch
569 # else
570         ldrb    r2,[r1,#3]
571 # endif
572         eor     r3,r5,r6                        @ a^b, b^c in next round
573 #else
574         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
575         eor     r3,r5,r6                        @ a^b, b^c in next round
576         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
577 #endif
578         eor     r0,r0,r5,ror#20 @ Sigma0(a)
579         and     r12,r12,r3                      @ (b^c)&=(a^b)
580         add     r8,r8,r4                        @ d+=h
581         eor     r12,r12,r6                      @ Maj(a,b,c)
582         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
583         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
584 #if __ARM_ARCH__>=7
585         @ ldr   r2,[r1],#4                      @ 8
586 # if 8==15
587         str     r1,[sp,#17*4]                   @ make room for r1
588 # endif
589         eor     r0,r8,r8,ror#5
590         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
591         eor     r0,r0,r8,ror#19 @ Sigma1(e)
592 # ifndef __ARMEB__
593         rev     r2,r2
594 # endif
595 #else
596         @ ldrb  r2,[r1,#3]                      @ 8
597         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
598         ldrb    r12,[r1,#2]
599         ldrb    r0,[r1,#1]
600         orr     r2,r2,r12,lsl#8
601         ldrb    r12,[r1],#4
602         orr     r2,r2,r0,lsl#16
603 # if 8==15
604         str     r1,[sp,#17*4]                   @ make room for r1
605 # endif
606         eor     r0,r8,r8,ror#5
607         orr     r2,r2,r12,lsl#24
608         eor     r0,r0,r8,ror#19 @ Sigma1(e)
609 #endif
610         ldr     r12,[r14],#4                    @ *K256++
611         add     r11,r11,r2                      @ h+=X[i]
612         str     r2,[sp,#8*4]
613         eor     r2,r9,r10
614         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
615         and     r2,r2,r8
616         add     r11,r11,r12                     @ h+=K256[i]
617         eor     r2,r2,r10                       @ Ch(e,f,g)
618         eor     r0,r4,r4,ror#11
619         add     r11,r11,r2                      @ h+=Ch(e,f,g)
620 #if 8==31
621         and     r12,r12,#0xff
622         cmp     r12,#0xf2                       @ done?
623 #endif
624 #if 8<15
625 # if __ARM_ARCH__>=7
626         ldr     r2,[r1],#4                      @ prefetch
627 # else
628         ldrb    r2,[r1,#3]
629 # endif
630         eor     r12,r4,r5                       @ a^b, b^c in next round
631 #else
632         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
633         eor     r12,r4,r5                       @ a^b, b^c in next round
634         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
635 #endif
636         eor     r0,r0,r4,ror#20 @ Sigma0(a)
637         and     r3,r3,r12                       @ (b^c)&=(a^b)
638         add     r7,r7,r11                       @ d+=h
639         eor     r3,r3,r5                        @ Maj(a,b,c)
640         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
641         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
642 #if __ARM_ARCH__>=7
643         @ ldr   r2,[r1],#4                      @ 9
644 # if 9==15
645         str     r1,[sp,#17*4]                   @ make room for r1
646 # endif
647         eor     r0,r7,r7,ror#5
648         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
649         eor     r0,r0,r7,ror#19 @ Sigma1(e)
650 # ifndef __ARMEB__
651         rev     r2,r2
652 # endif
653 #else
654         @ ldrb  r2,[r1,#3]                      @ 9
655         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
656         ldrb    r3,[r1,#2]
657         ldrb    r0,[r1,#1]
658         orr     r2,r2,r3,lsl#8
659         ldrb    r3,[r1],#4
660         orr     r2,r2,r0,lsl#16
661 # if 9==15
662         str     r1,[sp,#17*4]                   @ make room for r1
663 # endif
664         eor     r0,r7,r7,ror#5
665         orr     r2,r2,r3,lsl#24
666         eor     r0,r0,r7,ror#19 @ Sigma1(e)
667 #endif
668         ldr     r3,[r14],#4                     @ *K256++
669         add     r10,r10,r2                      @ h+=X[i]
670         str     r2,[sp,#9*4]
671         eor     r2,r8,r9
672         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
673         and     r2,r2,r7
674         add     r10,r10,r3                      @ h+=K256[i]
675         eor     r2,r2,r9                        @ Ch(e,f,g)
676         eor     r0,r11,r11,ror#11
677         add     r10,r10,r2                      @ h+=Ch(e,f,g)
678 #if 9==31
679         and     r3,r3,#0xff
680         cmp     r3,#0xf2                        @ done?
681 #endif
682 #if 9<15
683 # if __ARM_ARCH__>=7
684         ldr     r2,[r1],#4                      @ prefetch
685 # else
686         ldrb    r2,[r1,#3]
687 # endif
688         eor     r3,r11,r4                       @ a^b, b^c in next round
689 #else
690         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
691         eor     r3,r11,r4                       @ a^b, b^c in next round
692         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
693 #endif
694         eor     r0,r0,r11,ror#20        @ Sigma0(a)
695         and     r12,r12,r3                      @ (b^c)&=(a^b)
696         add     r6,r6,r10                       @ d+=h
697         eor     r12,r12,r4                      @ Maj(a,b,c)
698         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
699         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
700 #if __ARM_ARCH__>=7
701         @ ldr   r2,[r1],#4                      @ 10
702 # if 10==15
703         str     r1,[sp,#17*4]                   @ make room for r1
704 # endif
705         eor     r0,r6,r6,ror#5
706         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
707         eor     r0,r0,r6,ror#19 @ Sigma1(e)
708 # ifndef __ARMEB__
709         rev     r2,r2
710 # endif
711 #else
712         @ ldrb  r2,[r1,#3]                      @ 10
713         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
714         ldrb    r12,[r1,#2]
715         ldrb    r0,[r1,#1]
716         orr     r2,r2,r12,lsl#8
717         ldrb    r12,[r1],#4
718         orr     r2,r2,r0,lsl#16
719 # if 10==15
720         str     r1,[sp,#17*4]                   @ make room for r1
721 # endif
722         eor     r0,r6,r6,ror#5
723         orr     r2,r2,r12,lsl#24
724         eor     r0,r0,r6,ror#19 @ Sigma1(e)
725 #endif
726         ldr     r12,[r14],#4                    @ *K256++
727         add     r9,r9,r2                        @ h+=X[i]
728         str     r2,[sp,#10*4]
729         eor     r2,r7,r8
730         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
731         and     r2,r2,r6
732         add     r9,r9,r12                       @ h+=K256[i]
733         eor     r2,r2,r8                        @ Ch(e,f,g)
734         eor     r0,r10,r10,ror#11
735         add     r9,r9,r2                        @ h+=Ch(e,f,g)
736 #if 10==31
737         and     r12,r12,#0xff
738         cmp     r12,#0xf2                       @ done?
739 #endif
740 #if 10<15
741 # if __ARM_ARCH__>=7
742         ldr     r2,[r1],#4                      @ prefetch
743 # else
744         ldrb    r2,[r1,#3]
745 # endif
746         eor     r12,r10,r11                     @ a^b, b^c in next round
747 #else
748         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
749         eor     r12,r10,r11                     @ a^b, b^c in next round
750         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
751 #endif
752         eor     r0,r0,r10,ror#20        @ Sigma0(a)
753         and     r3,r3,r12                       @ (b^c)&=(a^b)
754         add     r5,r5,r9                        @ d+=h
755         eor     r3,r3,r11                       @ Maj(a,b,c)
756         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
757         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
758 #if __ARM_ARCH__>=7
759         @ ldr   r2,[r1],#4                      @ 11
760 # if 11==15
761         str     r1,[sp,#17*4]                   @ make room for r1
762 # endif
763         eor     r0,r5,r5,ror#5
764         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
765         eor     r0,r0,r5,ror#19 @ Sigma1(e)
766 # ifndef __ARMEB__
767         rev     r2,r2
768 # endif
769 #else
770         @ ldrb  r2,[r1,#3]                      @ 11
771         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
772         ldrb    r3,[r1,#2]
773         ldrb    r0,[r1,#1]
774         orr     r2,r2,r3,lsl#8
775         ldrb    r3,[r1],#4
776         orr     r2,r2,r0,lsl#16
777 # if 11==15
778         str     r1,[sp,#17*4]                   @ make room for r1
779 # endif
780         eor     r0,r5,r5,ror#5
781         orr     r2,r2,r3,lsl#24
782         eor     r0,r0,r5,ror#19 @ Sigma1(e)
783 #endif
784         ldr     r3,[r14],#4                     @ *K256++
785         add     r8,r8,r2                        @ h+=X[i]
786         str     r2,[sp,#11*4]
787         eor     r2,r6,r7
788         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
789         and     r2,r2,r5
790         add     r8,r8,r3                        @ h+=K256[i]
791         eor     r2,r2,r7                        @ Ch(e,f,g)
792         eor     r0,r9,r9,ror#11
793         add     r8,r8,r2                        @ h+=Ch(e,f,g)
794 #if 11==31
795         and     r3,r3,#0xff
796         cmp     r3,#0xf2                        @ done?
797 #endif
798 #if 11<15
799 # if __ARM_ARCH__>=7
800         ldr     r2,[r1],#4                      @ prefetch
801 # else
802         ldrb    r2,[r1,#3]
803 # endif
804         eor     r3,r9,r10                       @ a^b, b^c in next round
805 #else
806         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
807         eor     r3,r9,r10                       @ a^b, b^c in next round
808         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
809 #endif
810         eor     r0,r0,r9,ror#20 @ Sigma0(a)
811         and     r12,r12,r3                      @ (b^c)&=(a^b)
812         add     r4,r4,r8                        @ d+=h
813         eor     r12,r12,r10                     @ Maj(a,b,c)
814         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
815         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
816 #if __ARM_ARCH__>=7
817         @ ldr   r2,[r1],#4                      @ 12
818 # if 12==15
819         str     r1,[sp,#17*4]                   @ make room for r1
820 # endif
821         eor     r0,r4,r4,ror#5
822         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
823         eor     r0,r0,r4,ror#19 @ Sigma1(e)
824 # ifndef __ARMEB__
825         rev     r2,r2
826 # endif
827 #else
828         @ ldrb  r2,[r1,#3]                      @ 12
829         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
830         ldrb    r12,[r1,#2]
831         ldrb    r0,[r1,#1]
832         orr     r2,r2,r12,lsl#8
833         ldrb    r12,[r1],#4
834         orr     r2,r2,r0,lsl#16
835 # if 12==15
836         str     r1,[sp,#17*4]                   @ make room for r1
837 # endif
838         eor     r0,r4,r4,ror#5
839         orr     r2,r2,r12,lsl#24
840         eor     r0,r0,r4,ror#19 @ Sigma1(e)
841 #endif
842         ldr     r12,[r14],#4                    @ *K256++
843         add     r7,r7,r2                        @ h+=X[i]
844         str     r2,[sp,#12*4]
845         eor     r2,r5,r6
846         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
847         and     r2,r2,r4
848         add     r7,r7,r12                       @ h+=K256[i]
849         eor     r2,r2,r6                        @ Ch(e,f,g)
850         eor     r0,r8,r8,ror#11
851         add     r7,r7,r2                        @ h+=Ch(e,f,g)
852 #if 12==31
853         and     r12,r12,#0xff
854         cmp     r12,#0xf2                       @ done?
855 #endif
856 #if 12<15
857 # if __ARM_ARCH__>=7
858         ldr     r2,[r1],#4                      @ prefetch
859 # else
860         ldrb    r2,[r1,#3]
861 # endif
862         eor     r12,r8,r9                       @ a^b, b^c in next round
863 #else
864         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
865         eor     r12,r8,r9                       @ a^b, b^c in next round
866         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
867 #endif
868         eor     r0,r0,r8,ror#20 @ Sigma0(a)
869         and     r3,r3,r12                       @ (b^c)&=(a^b)
870         add     r11,r11,r7                      @ d+=h
871         eor     r3,r3,r9                        @ Maj(a,b,c)
872         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
873         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
874 #if __ARM_ARCH__>=7
875         @ ldr   r2,[r1],#4                      @ 13
876 # if 13==15
877         str     r1,[sp,#17*4]                   @ make room for r1
878 # endif
879         eor     r0,r11,r11,ror#5
880         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
881         eor     r0,r0,r11,ror#19        @ Sigma1(e)
882 # ifndef __ARMEB__
883         rev     r2,r2
884 # endif
885 #else
886         @ ldrb  r2,[r1,#3]                      @ 13
887         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
888         ldrb    r3,[r1,#2]
889         ldrb    r0,[r1,#1]
890         orr     r2,r2,r3,lsl#8
891         ldrb    r3,[r1],#4
892         orr     r2,r2,r0,lsl#16
893 # if 13==15
894         str     r1,[sp,#17*4]                   @ make room for r1
895 # endif
896         eor     r0,r11,r11,ror#5
897         orr     r2,r2,r3,lsl#24
898         eor     r0,r0,r11,ror#19        @ Sigma1(e)
899 #endif
900         ldr     r3,[r14],#4                     @ *K256++
901         add     r6,r6,r2                        @ h+=X[i]
902         str     r2,[sp,#13*4]
903         eor     r2,r4,r5
904         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
905         and     r2,r2,r11
906         add     r6,r6,r3                        @ h+=K256[i]
907         eor     r2,r2,r5                        @ Ch(e,f,g)
908         eor     r0,r7,r7,ror#11
909         add     r6,r6,r2                        @ h+=Ch(e,f,g)
910 #if 13==31
911         and     r3,r3,#0xff
912         cmp     r3,#0xf2                        @ done?
913 #endif
914 #if 13<15
915 # if __ARM_ARCH__>=7
916         ldr     r2,[r1],#4                      @ prefetch
917 # else
918         ldrb    r2,[r1,#3]
919 # endif
920         eor     r3,r7,r8                        @ a^b, b^c in next round
921 #else
922         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
923         eor     r3,r7,r8                        @ a^b, b^c in next round
924         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
925 #endif
926         eor     r0,r0,r7,ror#20 @ Sigma0(a)
927         and     r12,r12,r3                      @ (b^c)&=(a^b)
928         add     r10,r10,r6                      @ d+=h
929         eor     r12,r12,r8                      @ Maj(a,b,c)
930         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
931         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
932 #if __ARM_ARCH__>=7
933         @ ldr   r2,[r1],#4                      @ 14
934 # if 14==15
935         str     r1,[sp,#17*4]                   @ make room for r1
936 # endif
937         eor     r0,r10,r10,ror#5
938         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
939         eor     r0,r0,r10,ror#19        @ Sigma1(e)
940 # ifndef __ARMEB__
941         rev     r2,r2
942 # endif
943 #else
944         @ ldrb  r2,[r1,#3]                      @ 14
945         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
946         ldrb    r12,[r1,#2]
947         ldrb    r0,[r1,#1]
948         orr     r2,r2,r12,lsl#8
949         ldrb    r12,[r1],#4
950         orr     r2,r2,r0,lsl#16
951 # if 14==15
952         str     r1,[sp,#17*4]                   @ make room for r1
953 # endif
954         eor     r0,r10,r10,ror#5
955         orr     r2,r2,r12,lsl#24
956         eor     r0,r0,r10,ror#19        @ Sigma1(e)
957 #endif
958         ldr     r12,[r14],#4                    @ *K256++
959         add     r5,r5,r2                        @ h+=X[i]
960         str     r2,[sp,#14*4]
961         eor     r2,r11,r4
962         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
963         and     r2,r2,r10
964         add     r5,r5,r12                       @ h+=K256[i]
965         eor     r2,r2,r4                        @ Ch(e,f,g)
966         eor     r0,r6,r6,ror#11
967         add     r5,r5,r2                        @ h+=Ch(e,f,g)
968 #if 14==31
969         and     r12,r12,#0xff
970         cmp     r12,#0xf2                       @ done?
971 #endif
972 #if 14<15
973 # if __ARM_ARCH__>=7
974         ldr     r2,[r1],#4                      @ prefetch
975 # else
976         ldrb    r2,[r1,#3]
977 # endif
978         eor     r12,r6,r7                       @ a^b, b^c in next round
979 #else
980         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
981         eor     r12,r6,r7                       @ a^b, b^c in next round
982         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
983 #endif
984         eor     r0,r0,r6,ror#20 @ Sigma0(a)
985         and     r3,r3,r12                       @ (b^c)&=(a^b)
986         add     r9,r9,r5                        @ d+=h
987         eor     r3,r3,r7                        @ Maj(a,b,c)
988         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
989         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
990 #if __ARM_ARCH__>=7
991         @ ldr   r2,[r1],#4                      @ 15
992 # if 15==15
993         str     r1,[sp,#17*4]                   @ make room for r1
994 # endif
995         eor     r0,r9,r9,ror#5
996         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
997         eor     r0,r0,r9,ror#19 @ Sigma1(e)
998 # ifndef __ARMEB__
999         rev     r2,r2
1000 # endif
1001 #else
1002         @ ldrb  r2,[r1,#3]                      @ 15
1003         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1004         ldrb    r3,[r1,#2]
1005         ldrb    r0,[r1,#1]
1006         orr     r2,r2,r3,lsl#8
1007         ldrb    r3,[r1],#4
1008         orr     r2,r2,r0,lsl#16
1009 # if 15==15
1010         str     r1,[sp,#17*4]                   @ make room for r1
1011 # endif
1012         eor     r0,r9,r9,ror#5
1013         orr     r2,r2,r3,lsl#24
1014         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1015 #endif
1016         ldr     r3,[r14],#4                     @ *K256++
1017         add     r4,r4,r2                        @ h+=X[i]
1018         str     r2,[sp,#15*4]
1019         eor     r2,r10,r11
1020         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1021         and     r2,r2,r9
1022         add     r4,r4,r3                        @ h+=K256[i]
1023         eor     r2,r2,r11                       @ Ch(e,f,g)
1024         eor     r0,r5,r5,ror#11
1025         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1026 #if 15==31
1027         and     r3,r3,#0xff
1028         cmp     r3,#0xf2                        @ done?
1029 #endif
1030 #if 15<15
1031 # if __ARM_ARCH__>=7
1032         ldr     r2,[r1],#4                      @ prefetch
1033 # else
1034         ldrb    r2,[r1,#3]
1035 # endif
1036         eor     r3,r5,r6                        @ a^b, b^c in next round
1037 #else
1038         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1039         eor     r3,r5,r6                        @ a^b, b^c in next round
1040         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1041 #endif
1042         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1043         and     r12,r12,r3                      @ (b^c)&=(a^b)
1044         add     r8,r8,r4                        @ d+=h
1045         eor     r12,r12,r6                      @ Maj(a,b,c)
1046         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1047         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1048 .Lrounds_16_xx:
1049         @ ldr   r2,[sp,#1*4]            @ 16
1050         @ ldr   r1,[sp,#14*4]
1051         mov     r0,r2,ror#7
1052         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1053         mov     r12,r1,ror#17
1054         eor     r0,r0,r2,ror#18
1055         eor     r12,r12,r1,ror#19
1056         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1057         ldr     r2,[sp,#0*4]
1058         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1059         ldr     r1,[sp,#9*4]
1061         add     r12,r12,r0
1062         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1063         add     r2,r2,r12
1064         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1065         add     r2,r2,r1                        @ X[i]
1066         ldr     r12,[r14],#4                    @ *K256++
1067         add     r11,r11,r2                      @ h+=X[i]
1068         str     r2,[sp,#0*4]
1069         eor     r2,r9,r10
1070         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1071         and     r2,r2,r8
1072         add     r11,r11,r12                     @ h+=K256[i]
1073         eor     r2,r2,r10                       @ Ch(e,f,g)
1074         eor     r0,r4,r4,ror#11
1075         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1076 #if 16==31
1077         and     r12,r12,#0xff
1078         cmp     r12,#0xf2                       @ done?
1079 #endif
1080 #if 16<15
1081 # if __ARM_ARCH__>=7
1082         ldr     r2,[r1],#4                      @ prefetch
1083 # else
1084         ldrb    r2,[r1,#3]
1085 # endif
1086         eor     r12,r4,r5                       @ a^b, b^c in next round
1087 #else
1088         ldr     r2,[sp,#2*4]            @ from future BODY_16_xx
1089         eor     r12,r4,r5                       @ a^b, b^c in next round
1090         ldr     r1,[sp,#15*4]   @ from future BODY_16_xx
1091 #endif
1092         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1093         and     r3,r3,r12                       @ (b^c)&=(a^b)
1094         add     r7,r7,r11                       @ d+=h
1095         eor     r3,r3,r5                        @ Maj(a,b,c)
1096         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1097         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1098         @ ldr   r2,[sp,#2*4]            @ 17
1099         @ ldr   r1,[sp,#15*4]
1100         mov     r0,r2,ror#7
1101         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1102         mov     r3,r1,ror#17
1103         eor     r0,r0,r2,ror#18
1104         eor     r3,r3,r1,ror#19
1105         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1106         ldr     r2,[sp,#1*4]
1107         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1108         ldr     r1,[sp,#10*4]
1110         add     r3,r3,r0
1111         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1112         add     r2,r2,r3
1113         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1114         add     r2,r2,r1                        @ X[i]
1115         ldr     r3,[r14],#4                     @ *K256++
1116         add     r10,r10,r2                      @ h+=X[i]
1117         str     r2,[sp,#1*4]
1118         eor     r2,r8,r9
1119         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1120         and     r2,r2,r7
1121         add     r10,r10,r3                      @ h+=K256[i]
1122         eor     r2,r2,r9                        @ Ch(e,f,g)
1123         eor     r0,r11,r11,ror#11
1124         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1125 #if 17==31
1126         and     r3,r3,#0xff
1127         cmp     r3,#0xf2                        @ done?
1128 #endif
1129 #if 17<15
1130 # if __ARM_ARCH__>=7
1131         ldr     r2,[r1],#4                      @ prefetch
1132 # else
1133         ldrb    r2,[r1,#3]
1134 # endif
1135         eor     r3,r11,r4                       @ a^b, b^c in next round
1136 #else
1137         ldr     r2,[sp,#3*4]            @ from future BODY_16_xx
1138         eor     r3,r11,r4                       @ a^b, b^c in next round
1139         ldr     r1,[sp,#0*4]    @ from future BODY_16_xx
1140 #endif
1141         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1142         and     r12,r12,r3                      @ (b^c)&=(a^b)
1143         add     r6,r6,r10                       @ d+=h
1144         eor     r12,r12,r4                      @ Maj(a,b,c)
1145         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1146         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1147         @ ldr   r2,[sp,#3*4]            @ 18
1148         @ ldr   r1,[sp,#0*4]
1149         mov     r0,r2,ror#7
1150         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1151         mov     r12,r1,ror#17
1152         eor     r0,r0,r2,ror#18
1153         eor     r12,r12,r1,ror#19
1154         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1155         ldr     r2,[sp,#2*4]
1156         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1157         ldr     r1,[sp,#11*4]
1159         add     r12,r12,r0
1160         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1161         add     r2,r2,r12
1162         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1163         add     r2,r2,r1                        @ X[i]
1164         ldr     r12,[r14],#4                    @ *K256++
1165         add     r9,r9,r2                        @ h+=X[i]
1166         str     r2,[sp,#2*4]
1167         eor     r2,r7,r8
1168         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1169         and     r2,r2,r6
1170         add     r9,r9,r12                       @ h+=K256[i]
1171         eor     r2,r2,r8                        @ Ch(e,f,g)
1172         eor     r0,r10,r10,ror#11
1173         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1174 #if 18==31
1175         and     r12,r12,#0xff
1176         cmp     r12,#0xf2                       @ done?
1177 #endif
1178 #if 18<15
1179 # if __ARM_ARCH__>=7
1180         ldr     r2,[r1],#4                      @ prefetch
1181 # else
1182         ldrb    r2,[r1,#3]
1183 # endif
1184         eor     r12,r10,r11                     @ a^b, b^c in next round
1185 #else
1186         ldr     r2,[sp,#4*4]            @ from future BODY_16_xx
1187         eor     r12,r10,r11                     @ a^b, b^c in next round
1188         ldr     r1,[sp,#1*4]    @ from future BODY_16_xx
1189 #endif
1190         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1191         and     r3,r3,r12                       @ (b^c)&=(a^b)
1192         add     r5,r5,r9                        @ d+=h
1193         eor     r3,r3,r11                       @ Maj(a,b,c)
1194         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1195         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1196         @ ldr   r2,[sp,#4*4]            @ 19
1197         @ ldr   r1,[sp,#1*4]
1198         mov     r0,r2,ror#7
1199         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1200         mov     r3,r1,ror#17
1201         eor     r0,r0,r2,ror#18
1202         eor     r3,r3,r1,ror#19
1203         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1204         ldr     r2,[sp,#3*4]
1205         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1206         ldr     r1,[sp,#12*4]
1208         add     r3,r3,r0
1209         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1210         add     r2,r2,r3
1211         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1212         add     r2,r2,r1                        @ X[i]
1213         ldr     r3,[r14],#4                     @ *K256++
1214         add     r8,r8,r2                        @ h+=X[i]
1215         str     r2,[sp,#3*4]
1216         eor     r2,r6,r7
1217         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1218         and     r2,r2,r5
1219         add     r8,r8,r3                        @ h+=K256[i]
1220         eor     r2,r2,r7                        @ Ch(e,f,g)
1221         eor     r0,r9,r9,ror#11
1222         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1223 #if 19==31
1224         and     r3,r3,#0xff
1225         cmp     r3,#0xf2                        @ done?
1226 #endif
1227 #if 19<15
1228 # if __ARM_ARCH__>=7
1229         ldr     r2,[r1],#4                      @ prefetch
1230 # else
1231         ldrb    r2,[r1,#3]
1232 # endif
1233         eor     r3,r9,r10                       @ a^b, b^c in next round
1234 #else
1235         ldr     r2,[sp,#5*4]            @ from future BODY_16_xx
1236         eor     r3,r9,r10                       @ a^b, b^c in next round
1237         ldr     r1,[sp,#2*4]    @ from future BODY_16_xx
1238 #endif
1239         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1240         and     r12,r12,r3                      @ (b^c)&=(a^b)
1241         add     r4,r4,r8                        @ d+=h
1242         eor     r12,r12,r10                     @ Maj(a,b,c)
1243         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1244         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1245         @ ldr   r2,[sp,#5*4]            @ 20
1246         @ ldr   r1,[sp,#2*4]
1247         mov     r0,r2,ror#7
1248         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1249         mov     r12,r1,ror#17
1250         eor     r0,r0,r2,ror#18
1251         eor     r12,r12,r1,ror#19
1252         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1253         ldr     r2,[sp,#4*4]
1254         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1255         ldr     r1,[sp,#13*4]
1257         add     r12,r12,r0
1258         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1259         add     r2,r2,r12
1260         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1261         add     r2,r2,r1                        @ X[i]
1262         ldr     r12,[r14],#4                    @ *K256++
1263         add     r7,r7,r2                        @ h+=X[i]
1264         str     r2,[sp,#4*4]
1265         eor     r2,r5,r6
1266         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1267         and     r2,r2,r4
1268         add     r7,r7,r12                       @ h+=K256[i]
1269         eor     r2,r2,r6                        @ Ch(e,f,g)
1270         eor     r0,r8,r8,ror#11
1271         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1272 #if 20==31
1273         and     r12,r12,#0xff
1274         cmp     r12,#0xf2                       @ done?
1275 #endif
1276 #if 20<15
1277 # if __ARM_ARCH__>=7
1278         ldr     r2,[r1],#4                      @ prefetch
1279 # else
1280         ldrb    r2,[r1,#3]
1281 # endif
1282         eor     r12,r8,r9                       @ a^b, b^c in next round
1283 #else
1284         ldr     r2,[sp,#6*4]            @ from future BODY_16_xx
1285         eor     r12,r8,r9                       @ a^b, b^c in next round
1286         ldr     r1,[sp,#3*4]    @ from future BODY_16_xx
1287 #endif
1288         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1289         and     r3,r3,r12                       @ (b^c)&=(a^b)
1290         add     r11,r11,r7                      @ d+=h
1291         eor     r3,r3,r9                        @ Maj(a,b,c)
1292         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1293         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1294         @ ldr   r2,[sp,#6*4]            @ 21
1295         @ ldr   r1,[sp,#3*4]
1296         mov     r0,r2,ror#7
1297         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1298         mov     r3,r1,ror#17
1299         eor     r0,r0,r2,ror#18
1300         eor     r3,r3,r1,ror#19
1301         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1302         ldr     r2,[sp,#5*4]
1303         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1304         ldr     r1,[sp,#14*4]
1306         add     r3,r3,r0
1307         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1308         add     r2,r2,r3
1309         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1310         add     r2,r2,r1                        @ X[i]
1311         ldr     r3,[r14],#4                     @ *K256++
1312         add     r6,r6,r2                        @ h+=X[i]
1313         str     r2,[sp,#5*4]
1314         eor     r2,r4,r5
1315         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1316         and     r2,r2,r11
1317         add     r6,r6,r3                        @ h+=K256[i]
1318         eor     r2,r2,r5                        @ Ch(e,f,g)
1319         eor     r0,r7,r7,ror#11
1320         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1321 #if 21==31
1322         and     r3,r3,#0xff
1323         cmp     r3,#0xf2                        @ done?
1324 #endif
1325 #if 21<15
1326 # if __ARM_ARCH__>=7
1327         ldr     r2,[r1],#4                      @ prefetch
1328 # else
1329         ldrb    r2,[r1,#3]
1330 # endif
1331         eor     r3,r7,r8                        @ a^b, b^c in next round
1332 #else
1333         ldr     r2,[sp,#7*4]            @ from future BODY_16_xx
1334         eor     r3,r7,r8                        @ a^b, b^c in next round
1335         ldr     r1,[sp,#4*4]    @ from future BODY_16_xx
1336 #endif
1337         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1338         and     r12,r12,r3                      @ (b^c)&=(a^b)
1339         add     r10,r10,r6                      @ d+=h
1340         eor     r12,r12,r8                      @ Maj(a,b,c)
1341         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1342         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1343         @ ldr   r2,[sp,#7*4]            @ 22
1344         @ ldr   r1,[sp,#4*4]
1345         mov     r0,r2,ror#7
1346         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1347         mov     r12,r1,ror#17
1348         eor     r0,r0,r2,ror#18
1349         eor     r12,r12,r1,ror#19
1350         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1351         ldr     r2,[sp,#6*4]
1352         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1353         ldr     r1,[sp,#15*4]
1355         add     r12,r12,r0
1356         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1357         add     r2,r2,r12
1358         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1359         add     r2,r2,r1                        @ X[i]
1360         ldr     r12,[r14],#4                    @ *K256++
1361         add     r5,r5,r2                        @ h+=X[i]
1362         str     r2,[sp,#6*4]
1363         eor     r2,r11,r4
1364         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1365         and     r2,r2,r10
1366         add     r5,r5,r12                       @ h+=K256[i]
1367         eor     r2,r2,r4                        @ Ch(e,f,g)
1368         eor     r0,r6,r6,ror#11
1369         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1370 #if 22==31
1371         and     r12,r12,#0xff
1372         cmp     r12,#0xf2                       @ done?
1373 #endif
1374 #if 22<15
1375 # if __ARM_ARCH__>=7
1376         ldr     r2,[r1],#4                      @ prefetch
1377 # else
1378         ldrb    r2,[r1,#3]
1379 # endif
1380         eor     r12,r6,r7                       @ a^b, b^c in next round
1381 #else
1382         ldr     r2,[sp,#8*4]            @ from future BODY_16_xx
1383         eor     r12,r6,r7                       @ a^b, b^c in next round
1384         ldr     r1,[sp,#5*4]    @ from future BODY_16_xx
1385 #endif
1386         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1387         and     r3,r3,r12                       @ (b^c)&=(a^b)
1388         add     r9,r9,r5                        @ d+=h
1389         eor     r3,r3,r7                        @ Maj(a,b,c)
1390         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1391         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1392         @ ldr   r2,[sp,#8*4]            @ 23
1393         @ ldr   r1,[sp,#5*4]
1394         mov     r0,r2,ror#7
1395         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1396         mov     r3,r1,ror#17
1397         eor     r0,r0,r2,ror#18
1398         eor     r3,r3,r1,ror#19
1399         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1400         ldr     r2,[sp,#7*4]
1401         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1402         ldr     r1,[sp,#0*4]
1404         add     r3,r3,r0
1405         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1406         add     r2,r2,r3
1407         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1408         add     r2,r2,r1                        @ X[i]
1409         ldr     r3,[r14],#4                     @ *K256++
1410         add     r4,r4,r2                        @ h+=X[i]
1411         str     r2,[sp,#7*4]
1412         eor     r2,r10,r11
1413         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1414         and     r2,r2,r9
1415         add     r4,r4,r3                        @ h+=K256[i]
1416         eor     r2,r2,r11                       @ Ch(e,f,g)
1417         eor     r0,r5,r5,ror#11
1418         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1419 #if 23==31
1420         and     r3,r3,#0xff
1421         cmp     r3,#0xf2                        @ done?
1422 #endif
1423 #if 23<15
1424 # if __ARM_ARCH__>=7
1425         ldr     r2,[r1],#4                      @ prefetch
1426 # else
1427         ldrb    r2,[r1,#3]
1428 # endif
1429         eor     r3,r5,r6                        @ a^b, b^c in next round
1430 #else
1431         ldr     r2,[sp,#9*4]            @ from future BODY_16_xx
1432         eor     r3,r5,r6                        @ a^b, b^c in next round
1433         ldr     r1,[sp,#6*4]    @ from future BODY_16_xx
1434 #endif
1435         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1436         and     r12,r12,r3                      @ (b^c)&=(a^b)
1437         add     r8,r8,r4                        @ d+=h
1438         eor     r12,r12,r6                      @ Maj(a,b,c)
1439         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1440         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1441         @ ldr   r2,[sp,#9*4]            @ 24
1442         @ ldr   r1,[sp,#6*4]
1443         mov     r0,r2,ror#7
1444         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
1445         mov     r12,r1,ror#17
1446         eor     r0,r0,r2,ror#18
1447         eor     r12,r12,r1,ror#19
1448         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1449         ldr     r2,[sp,#8*4]
1450         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1451         ldr     r1,[sp,#1*4]
1453         add     r12,r12,r0
1454         eor     r0,r8,r8,ror#5  @ from BODY_00_15
1455         add     r2,r2,r12
1456         eor     r0,r0,r8,ror#19 @ Sigma1(e)
1457         add     r2,r2,r1                        @ X[i]
1458         ldr     r12,[r14],#4                    @ *K256++
1459         add     r11,r11,r2                      @ h+=X[i]
1460         str     r2,[sp,#8*4]
1461         eor     r2,r9,r10
1462         add     r11,r11,r0,ror#6        @ h+=Sigma1(e)
1463         and     r2,r2,r8
1464         add     r11,r11,r12                     @ h+=K256[i]
1465         eor     r2,r2,r10                       @ Ch(e,f,g)
1466         eor     r0,r4,r4,ror#11
1467         add     r11,r11,r2                      @ h+=Ch(e,f,g)
1468 #if 24==31
1469         and     r12,r12,#0xff
1470         cmp     r12,#0xf2                       @ done?
1471 #endif
1472 #if 24<15
1473 # if __ARM_ARCH__>=7
1474         ldr     r2,[r1],#4                      @ prefetch
1475 # else
1476         ldrb    r2,[r1,#3]
1477 # endif
1478         eor     r12,r4,r5                       @ a^b, b^c in next round
1479 #else
1480         ldr     r2,[sp,#10*4]           @ from future BODY_16_xx
1481         eor     r12,r4,r5                       @ a^b, b^c in next round
1482         ldr     r1,[sp,#7*4]    @ from future BODY_16_xx
1483 #endif
1484         eor     r0,r0,r4,ror#20 @ Sigma0(a)
1485         and     r3,r3,r12                       @ (b^c)&=(a^b)
1486         add     r7,r7,r11                       @ d+=h
1487         eor     r3,r3,r5                        @ Maj(a,b,c)
1488         add     r11,r11,r0,ror#2        @ h+=Sigma0(a)
1489         @ add   r11,r11,r3                      @ h+=Maj(a,b,c)
1490         @ ldr   r2,[sp,#10*4]           @ 25
1491         @ ldr   r1,[sp,#7*4]
1492         mov     r0,r2,ror#7
1493         add     r11,r11,r3                      @ h+=Maj(a,b,c) from the past
1494         mov     r3,r1,ror#17
1495         eor     r0,r0,r2,ror#18
1496         eor     r3,r3,r1,ror#19
1497         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1498         ldr     r2,[sp,#9*4]
1499         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1500         ldr     r1,[sp,#2*4]
1502         add     r3,r3,r0
1503         eor     r0,r7,r7,ror#5  @ from BODY_00_15
1504         add     r2,r2,r3
1505         eor     r0,r0,r7,ror#19 @ Sigma1(e)
1506         add     r2,r2,r1                        @ X[i]
1507         ldr     r3,[r14],#4                     @ *K256++
1508         add     r10,r10,r2                      @ h+=X[i]
1509         str     r2,[sp,#9*4]
1510         eor     r2,r8,r9
1511         add     r10,r10,r0,ror#6        @ h+=Sigma1(e)
1512         and     r2,r2,r7
1513         add     r10,r10,r3                      @ h+=K256[i]
1514         eor     r2,r2,r9                        @ Ch(e,f,g)
1515         eor     r0,r11,r11,ror#11
1516         add     r10,r10,r2                      @ h+=Ch(e,f,g)
1517 #if 25==31
1518         and     r3,r3,#0xff
1519         cmp     r3,#0xf2                        @ done?
1520 #endif
1521 #if 25<15
1522 # if __ARM_ARCH__>=7
1523         ldr     r2,[r1],#4                      @ prefetch
1524 # else
1525         ldrb    r2,[r1,#3]
1526 # endif
1527         eor     r3,r11,r4                       @ a^b, b^c in next round
1528 #else
1529         ldr     r2,[sp,#11*4]           @ from future BODY_16_xx
1530         eor     r3,r11,r4                       @ a^b, b^c in next round
1531         ldr     r1,[sp,#8*4]    @ from future BODY_16_xx
1532 #endif
1533         eor     r0,r0,r11,ror#20        @ Sigma0(a)
1534         and     r12,r12,r3                      @ (b^c)&=(a^b)
1535         add     r6,r6,r10                       @ d+=h
1536         eor     r12,r12,r4                      @ Maj(a,b,c)
1537         add     r10,r10,r0,ror#2        @ h+=Sigma0(a)
1538         @ add   r10,r10,r12                     @ h+=Maj(a,b,c)
1539         @ ldr   r2,[sp,#11*4]           @ 26
1540         @ ldr   r1,[sp,#8*4]
1541         mov     r0,r2,ror#7
1542         add     r10,r10,r12                     @ h+=Maj(a,b,c) from the past
1543         mov     r12,r1,ror#17
1544         eor     r0,r0,r2,ror#18
1545         eor     r12,r12,r1,ror#19
1546         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1547         ldr     r2,[sp,#10*4]
1548         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1549         ldr     r1,[sp,#3*4]
1551         add     r12,r12,r0
1552         eor     r0,r6,r6,ror#5  @ from BODY_00_15
1553         add     r2,r2,r12
1554         eor     r0,r0,r6,ror#19 @ Sigma1(e)
1555         add     r2,r2,r1                        @ X[i]
1556         ldr     r12,[r14],#4                    @ *K256++
1557         add     r9,r9,r2                        @ h+=X[i]
1558         str     r2,[sp,#10*4]
1559         eor     r2,r7,r8
1560         add     r9,r9,r0,ror#6  @ h+=Sigma1(e)
1561         and     r2,r2,r6
1562         add     r9,r9,r12                       @ h+=K256[i]
1563         eor     r2,r2,r8                        @ Ch(e,f,g)
1564         eor     r0,r10,r10,ror#11
1565         add     r9,r9,r2                        @ h+=Ch(e,f,g)
1566 #if 26==31
1567         and     r12,r12,#0xff
1568         cmp     r12,#0xf2                       @ done?
1569 #endif
1570 #if 26<15
1571 # if __ARM_ARCH__>=7
1572         ldr     r2,[r1],#4                      @ prefetch
1573 # else
1574         ldrb    r2,[r1,#3]
1575 # endif
1576         eor     r12,r10,r11                     @ a^b, b^c in next round
1577 #else
1578         ldr     r2,[sp,#12*4]           @ from future BODY_16_xx
1579         eor     r12,r10,r11                     @ a^b, b^c in next round
1580         ldr     r1,[sp,#9*4]    @ from future BODY_16_xx
1581 #endif
1582         eor     r0,r0,r10,ror#20        @ Sigma0(a)
1583         and     r3,r3,r12                       @ (b^c)&=(a^b)
1584         add     r5,r5,r9                        @ d+=h
1585         eor     r3,r3,r11                       @ Maj(a,b,c)
1586         add     r9,r9,r0,ror#2  @ h+=Sigma0(a)
1587         @ add   r9,r9,r3                        @ h+=Maj(a,b,c)
1588         @ ldr   r2,[sp,#12*4]           @ 27
1589         @ ldr   r1,[sp,#9*4]
1590         mov     r0,r2,ror#7
1591         add     r9,r9,r3                        @ h+=Maj(a,b,c) from the past
1592         mov     r3,r1,ror#17
1593         eor     r0,r0,r2,ror#18
1594         eor     r3,r3,r1,ror#19
1595         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1596         ldr     r2,[sp,#11*4]
1597         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1598         ldr     r1,[sp,#4*4]
1600         add     r3,r3,r0
1601         eor     r0,r5,r5,ror#5  @ from BODY_00_15
1602         add     r2,r2,r3
1603         eor     r0,r0,r5,ror#19 @ Sigma1(e)
1604         add     r2,r2,r1                        @ X[i]
1605         ldr     r3,[r14],#4                     @ *K256++
1606         add     r8,r8,r2                        @ h+=X[i]
1607         str     r2,[sp,#11*4]
1608         eor     r2,r6,r7
1609         add     r8,r8,r0,ror#6  @ h+=Sigma1(e)
1610         and     r2,r2,r5
1611         add     r8,r8,r3                        @ h+=K256[i]
1612         eor     r2,r2,r7                        @ Ch(e,f,g)
1613         eor     r0,r9,r9,ror#11
1614         add     r8,r8,r2                        @ h+=Ch(e,f,g)
1615 #if 27==31
1616         and     r3,r3,#0xff
1617         cmp     r3,#0xf2                        @ done?
1618 #endif
1619 #if 27<15
1620 # if __ARM_ARCH__>=7
1621         ldr     r2,[r1],#4                      @ prefetch
1622 # else
1623         ldrb    r2,[r1,#3]
1624 # endif
1625         eor     r3,r9,r10                       @ a^b, b^c in next round
1626 #else
1627         ldr     r2,[sp,#13*4]           @ from future BODY_16_xx
1628         eor     r3,r9,r10                       @ a^b, b^c in next round
1629         ldr     r1,[sp,#10*4]   @ from future BODY_16_xx
1630 #endif
1631         eor     r0,r0,r9,ror#20 @ Sigma0(a)
1632         and     r12,r12,r3                      @ (b^c)&=(a^b)
1633         add     r4,r4,r8                        @ d+=h
1634         eor     r12,r12,r10                     @ Maj(a,b,c)
1635         add     r8,r8,r0,ror#2  @ h+=Sigma0(a)
1636         @ add   r8,r8,r12                       @ h+=Maj(a,b,c)
1637         @ ldr   r2,[sp,#13*4]           @ 28
1638         @ ldr   r1,[sp,#10*4]
1639         mov     r0,r2,ror#7
1640         add     r8,r8,r12                       @ h+=Maj(a,b,c) from the past
1641         mov     r12,r1,ror#17
1642         eor     r0,r0,r2,ror#18
1643         eor     r12,r12,r1,ror#19
1644         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1645         ldr     r2,[sp,#12*4]
1646         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1647         ldr     r1,[sp,#5*4]
1649         add     r12,r12,r0
1650         eor     r0,r4,r4,ror#5  @ from BODY_00_15
1651         add     r2,r2,r12
1652         eor     r0,r0,r4,ror#19 @ Sigma1(e)
1653         add     r2,r2,r1                        @ X[i]
1654         ldr     r12,[r14],#4                    @ *K256++
1655         add     r7,r7,r2                        @ h+=X[i]
1656         str     r2,[sp,#12*4]
1657         eor     r2,r5,r6
1658         add     r7,r7,r0,ror#6  @ h+=Sigma1(e)
1659         and     r2,r2,r4
1660         add     r7,r7,r12                       @ h+=K256[i]
1661         eor     r2,r2,r6                        @ Ch(e,f,g)
1662         eor     r0,r8,r8,ror#11
1663         add     r7,r7,r2                        @ h+=Ch(e,f,g)
1664 #if 28==31
1665         and     r12,r12,#0xff
1666         cmp     r12,#0xf2                       @ done?
1667 #endif
1668 #if 28<15
1669 # if __ARM_ARCH__>=7
1670         ldr     r2,[r1],#4                      @ prefetch
1671 # else
1672         ldrb    r2,[r1,#3]
1673 # endif
1674         eor     r12,r8,r9                       @ a^b, b^c in next round
1675 #else
1676         ldr     r2,[sp,#14*4]           @ from future BODY_16_xx
1677         eor     r12,r8,r9                       @ a^b, b^c in next round
1678         ldr     r1,[sp,#11*4]   @ from future BODY_16_xx
1679 #endif
1680         eor     r0,r0,r8,ror#20 @ Sigma0(a)
1681         and     r3,r3,r12                       @ (b^c)&=(a^b)
1682         add     r11,r11,r7                      @ d+=h
1683         eor     r3,r3,r9                        @ Maj(a,b,c)
1684         add     r7,r7,r0,ror#2  @ h+=Sigma0(a)
1685         @ add   r7,r7,r3                        @ h+=Maj(a,b,c)
1686         @ ldr   r2,[sp,#14*4]           @ 29
1687         @ ldr   r1,[sp,#11*4]
1688         mov     r0,r2,ror#7
1689         add     r7,r7,r3                        @ h+=Maj(a,b,c) from the past
1690         mov     r3,r1,ror#17
1691         eor     r0,r0,r2,ror#18
1692         eor     r3,r3,r1,ror#19
1693         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1694         ldr     r2,[sp,#13*4]
1695         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1696         ldr     r1,[sp,#6*4]
1698         add     r3,r3,r0
1699         eor     r0,r11,r11,ror#5        @ from BODY_00_15
1700         add     r2,r2,r3
1701         eor     r0,r0,r11,ror#19        @ Sigma1(e)
1702         add     r2,r2,r1                        @ X[i]
1703         ldr     r3,[r14],#4                     @ *K256++
1704         add     r6,r6,r2                        @ h+=X[i]
1705         str     r2,[sp,#13*4]
1706         eor     r2,r4,r5
1707         add     r6,r6,r0,ror#6  @ h+=Sigma1(e)
1708         and     r2,r2,r11
1709         add     r6,r6,r3                        @ h+=K256[i]
1710         eor     r2,r2,r5                        @ Ch(e,f,g)
1711         eor     r0,r7,r7,ror#11
1712         add     r6,r6,r2                        @ h+=Ch(e,f,g)
1713 #if 29==31
1714         and     r3,r3,#0xff
1715         cmp     r3,#0xf2                        @ done?
1716 #endif
1717 #if 29<15
1718 # if __ARM_ARCH__>=7
1719         ldr     r2,[r1],#4                      @ prefetch
1720 # else
1721         ldrb    r2,[r1,#3]
1722 # endif
1723         eor     r3,r7,r8                        @ a^b, b^c in next round
1724 #else
1725         ldr     r2,[sp,#15*4]           @ from future BODY_16_xx
1726         eor     r3,r7,r8                        @ a^b, b^c in next round
1727         ldr     r1,[sp,#12*4]   @ from future BODY_16_xx
1728 #endif
1729         eor     r0,r0,r7,ror#20 @ Sigma0(a)
1730         and     r12,r12,r3                      @ (b^c)&=(a^b)
1731         add     r10,r10,r6                      @ d+=h
1732         eor     r12,r12,r8                      @ Maj(a,b,c)
1733         add     r6,r6,r0,ror#2  @ h+=Sigma0(a)
1734         @ add   r6,r6,r12                       @ h+=Maj(a,b,c)
1735         @ ldr   r2,[sp,#15*4]           @ 30
1736         @ ldr   r1,[sp,#12*4]
1737         mov     r0,r2,ror#7
1738         add     r6,r6,r12                       @ h+=Maj(a,b,c) from the past
1739         mov     r12,r1,ror#17
1740         eor     r0,r0,r2,ror#18
1741         eor     r12,r12,r1,ror#19
1742         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1743         ldr     r2,[sp,#14*4]
1744         eor     r12,r12,r1,lsr#10       @ sigma1(X[i+14])
1745         ldr     r1,[sp,#7*4]
1747         add     r12,r12,r0
1748         eor     r0,r10,r10,ror#5        @ from BODY_00_15
1749         add     r2,r2,r12
1750         eor     r0,r0,r10,ror#19        @ Sigma1(e)
1751         add     r2,r2,r1                        @ X[i]
1752         ldr     r12,[r14],#4                    @ *K256++
1753         add     r5,r5,r2                        @ h+=X[i]
1754         str     r2,[sp,#14*4]
1755         eor     r2,r11,r4
1756         add     r5,r5,r0,ror#6  @ h+=Sigma1(e)
1757         and     r2,r2,r10
1758         add     r5,r5,r12                       @ h+=K256[i]
1759         eor     r2,r2,r4                        @ Ch(e,f,g)
1760         eor     r0,r6,r6,ror#11
1761         add     r5,r5,r2                        @ h+=Ch(e,f,g)
1762 #if 30==31
1763         and     r12,r12,#0xff
1764         cmp     r12,#0xf2                       @ done?
1765 #endif
1766 #if 30<15
1767 # if __ARM_ARCH__>=7
1768         ldr     r2,[r1],#4                      @ prefetch
1769 # else
1770         ldrb    r2,[r1,#3]
1771 # endif
1772         eor     r12,r6,r7                       @ a^b, b^c in next round
1773 #else
1774         ldr     r2,[sp,#0*4]            @ from future BODY_16_xx
1775         eor     r12,r6,r7                       @ a^b, b^c in next round
1776         ldr     r1,[sp,#13*4]   @ from future BODY_16_xx
1777 #endif
1778         eor     r0,r0,r6,ror#20 @ Sigma0(a)
1779         and     r3,r3,r12                       @ (b^c)&=(a^b)
1780         add     r9,r9,r5                        @ d+=h
1781         eor     r3,r3,r7                        @ Maj(a,b,c)
1782         add     r5,r5,r0,ror#2  @ h+=Sigma0(a)
1783         @ add   r5,r5,r3                        @ h+=Maj(a,b,c)
1784         @ ldr   r2,[sp,#0*4]            @ 31
1785         @ ldr   r1,[sp,#13*4]
1786         mov     r0,r2,ror#7
1787         add     r5,r5,r3                        @ h+=Maj(a,b,c) from the past
1788         mov     r3,r1,ror#17
1789         eor     r0,r0,r2,ror#18
1790         eor     r3,r3,r1,ror#19
1791         eor     r0,r0,r2,lsr#3  @ sigma0(X[i+1])
1792         ldr     r2,[sp,#15*4]
1793         eor     r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1794         ldr     r1,[sp,#8*4]
1796         add     r3,r3,r0
1797         eor     r0,r9,r9,ror#5  @ from BODY_00_15
1798         add     r2,r2,r3
1799         eor     r0,r0,r9,ror#19 @ Sigma1(e)
1800         add     r2,r2,r1                        @ X[i]
1801         ldr     r3,[r14],#4                     @ *K256++
1802         add     r4,r4,r2                        @ h+=X[i]
1803         str     r2,[sp,#15*4]
1804         eor     r2,r10,r11
1805         add     r4,r4,r0,ror#6  @ h+=Sigma1(e)
1806         and     r2,r2,r9
1807         add     r4,r4,r3                        @ h+=K256[i]
1808         eor     r2,r2,r11                       @ Ch(e,f,g)
1809         eor     r0,r5,r5,ror#11
1810         add     r4,r4,r2                        @ h+=Ch(e,f,g)
1811 #if 31==31
1812         and     r3,r3,#0xff
1813         cmp     r3,#0xf2                        @ done?
1814 #endif
1815 #if 31<15
1816 # if __ARM_ARCH__>=7
1817         ldr     r2,[r1],#4                      @ prefetch
1818 # else
1819         ldrb    r2,[r1,#3]
1820 # endif
1821         eor     r3,r5,r6                        @ a^b, b^c in next round
1822 #else
1823         ldr     r2,[sp,#1*4]            @ from future BODY_16_xx
1824         eor     r3,r5,r6                        @ a^b, b^c in next round
1825         ldr     r1,[sp,#14*4]   @ from future BODY_16_xx
1826 #endif
1827         eor     r0,r0,r5,ror#20 @ Sigma0(a)
1828         and     r12,r12,r3                      @ (b^c)&=(a^b)
1829         add     r8,r8,r4                        @ d+=h
1830         eor     r12,r12,r6                      @ Maj(a,b,c)
1831         add     r4,r4,r0,ror#2  @ h+=Sigma0(a)
1832         @ add   r4,r4,r12                       @ h+=Maj(a,b,c)
1833 #if __ARM_ARCH__>=7
1834         ite     eq                      @ Thumb2 thing, sanity check in ARM
1835 #endif
1836         ldreq   r3,[sp,#16*4]           @ pull ctx
1837         bne     .Lrounds_16_xx
1839         add     r4,r4,r12               @ h+=Maj(a,b,c) from the past
1840         ldr     r0,[r3,#0]
1841         ldr     r2,[r3,#4]
1842         ldr     r12,[r3,#8]
1843         add     r4,r4,r0
1844         ldr     r0,[r3,#12]
1845         add     r5,r5,r2
1846         ldr     r2,[r3,#16]
1847         add     r6,r6,r12
1848         ldr     r12,[r3,#20]
1849         add     r7,r7,r0
1850         ldr     r0,[r3,#24]
1851         add     r8,r8,r2
1852         ldr     r2,[r3,#28]
1853         add     r9,r9,r12
1854         ldr     r1,[sp,#17*4]           @ pull inp
1855         ldr     r12,[sp,#18*4]          @ pull inp+len
1856         add     r10,r10,r0
1857         add     r11,r11,r2
1858         stmia   r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1859         cmp     r1,r12
1860         sub     r14,r14,#256    @ rewind Ktbl
1861         bne     .Loop
1863         add     sp,sp,#19*4     @ destroy frame
1864 #if __ARM_ARCH__>=5
1865         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1866 #else
1867         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1868         tst     lr,#1
1869         moveq   pc,lr                   @ be binary compatible with V4, yet
1870 .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
1871 #endif
1872 .size   sha256_block_data_order,.-sha256_block_data_order
1873 #if __ARM_MAX_ARCH__>=7
1874 .arch   armv7-a
1875 .fpu    neon
1877 .globl  sha256_block_data_order_neon
1878 .type   sha256_block_data_order_neon,%function
1879 .align  4
1880 sha256_block_data_order_neon:
1881 .LNEON:
1882         stmdb   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1884         sub     r11,sp,#16*4+16
1885         adrl    r14,K256
1886         bic     r11,r11,#15             @ align for 128-bit stores
1887         mov     r12,sp
1888         mov     sp,r11                  @ alloca
1889         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
1891         vld1.8  {q0},[r1]!
1892         vld1.8  {q1},[r1]!
1893         vld1.8  {q2},[r1]!
1894         vld1.8  {q3},[r1]!
1895         vld1.32 {q8},[r14,:128]!
1896         vld1.32 {q9},[r14,:128]!
1897         vld1.32 {q10},[r14,:128]!
1898         vld1.32 {q11},[r14,:128]!
1899         vrev32.8        q0,q0           @ yes, even on
1900         str     r0,[sp,#64]
1901         vrev32.8        q1,q1           @ big-endian
1902         str     r1,[sp,#68]
1903         mov     r1,sp
1904         vrev32.8        q2,q2
1905         str     r2,[sp,#72]
1906         vrev32.8        q3,q3
1907         str     r12,[sp,#76]            @ save original sp
1908         vadd.i32        q8,q8,q0
1909         vadd.i32        q9,q9,q1
1910         vst1.32 {q8},[r1,:128]!
1911         vadd.i32        q10,q10,q2
1912         vst1.32 {q9},[r1,:128]!
1913         vadd.i32        q11,q11,q3
1914         vst1.32 {q10},[r1,:128]!
1915         vst1.32 {q11},[r1,:128]!
1917         ldmia   r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1918         sub     r1,r1,#64
1919         ldr     r2,[sp,#0]
1920         eor     r12,r12,r12
1921         eor     r3,r5,r6
1922         b       .L_00_48
1924 .align  4
1925 .L_00_48:
1926         vext.8  q8,q0,q1,#4
1927         add     r11,r11,r2
1928         eor     r2,r9,r10
1929         eor     r0,r8,r8,ror#5
1930         vext.8  q9,q2,q3,#4
1931         add     r4,r4,r12
1932         and     r2,r2,r8
1933         eor     r12,r0,r8,ror#19
1934         vshr.u32        q10,q8,#7
1935         eor     r0,r4,r4,ror#11
1936         eor     r2,r2,r10
1937         vadd.i32        q0,q0,q9
1938         add     r11,r11,r12,ror#6
1939         eor     r12,r4,r5
1940         vshr.u32        q9,q8,#3
1941         eor     r0,r0,r4,ror#20
1942         add     r11,r11,r2
1943         vsli.32 q10,q8,#25
1944         ldr     r2,[sp,#4]
1945         and     r3,r3,r12
1946         vshr.u32        q11,q8,#18
1947         add     r7,r7,r11
1948         add     r11,r11,r0,ror#2
1949         eor     r3,r3,r5
1950         veor    q9,q9,q10
1951         add     r10,r10,r2
1952         vsli.32 q11,q8,#14
1953         eor     r2,r8,r9
1954         eor     r0,r7,r7,ror#5
1955         vshr.u32        d24,d7,#17
1956         add     r11,r11,r3
1957         and     r2,r2,r7
1958         veor    q9,q9,q11
1959         eor     r3,r0,r7,ror#19
1960         eor     r0,r11,r11,ror#11
1961         vsli.32 d24,d7,#15
1962         eor     r2,r2,r9
1963         add     r10,r10,r3,ror#6
1964         vshr.u32        d25,d7,#10
1965         eor     r3,r11,r4
1966         eor     r0,r0,r11,ror#20
1967         vadd.i32        q0,q0,q9
1968         add     r10,r10,r2
1969         ldr     r2,[sp,#8]
1970         veor    d25,d25,d24
1971         and     r12,r12,r3
1972         add     r6,r6,r10
1973         vshr.u32        d24,d7,#19
1974         add     r10,r10,r0,ror#2
1975         eor     r12,r12,r4
1976         vsli.32 d24,d7,#13
1977         add     r9,r9,r2
1978         eor     r2,r7,r8
1979         veor    d25,d25,d24
1980         eor     r0,r6,r6,ror#5
1981         add     r10,r10,r12
1982         vadd.i32        d0,d0,d25
1983         and     r2,r2,r6
1984         eor     r12,r0,r6,ror#19
1985         vshr.u32        d24,d0,#17
1986         eor     r0,r10,r10,ror#11
1987         eor     r2,r2,r8
1988         vsli.32 d24,d0,#15
1989         add     r9,r9,r12,ror#6
1990         eor     r12,r10,r11
1991         vshr.u32        d25,d0,#10
1992         eor     r0,r0,r10,ror#20
1993         add     r9,r9,r2
1994         veor    d25,d25,d24
1995         ldr     r2,[sp,#12]
1996         and     r3,r3,r12
1997         vshr.u32        d24,d0,#19
1998         add     r5,r5,r9
1999         add     r9,r9,r0,ror#2
2000         eor     r3,r3,r11
2001         vld1.32 {q8},[r14,:128]!
2002         add     r8,r8,r2
2003         vsli.32 d24,d0,#13
2004         eor     r2,r6,r7
2005         eor     r0,r5,r5,ror#5
2006         veor    d25,d25,d24
2007         add     r9,r9,r3
2008         and     r2,r2,r5
2009         vadd.i32        d1,d1,d25
2010         eor     r3,r0,r5,ror#19
2011         eor     r0,r9,r9,ror#11
2012         vadd.i32        q8,q8,q0
2013         eor     r2,r2,r7
2014         add     r8,r8,r3,ror#6
2015         eor     r3,r9,r10
2016         eor     r0,r0,r9,ror#20
2017         add     r8,r8,r2
2018         ldr     r2,[sp,#16]
2019         and     r12,r12,r3
2020         add     r4,r4,r8
2021         vst1.32 {q8},[r1,:128]!
2022         add     r8,r8,r0,ror#2
2023         eor     r12,r12,r10
2024         vext.8  q8,q1,q2,#4
2025         add     r7,r7,r2
2026         eor     r2,r5,r6
2027         eor     r0,r4,r4,ror#5
2028         vext.8  q9,q3,q0,#4
2029         add     r8,r8,r12
2030         and     r2,r2,r4
2031         eor     r12,r0,r4,ror#19
2032         vshr.u32        q10,q8,#7
2033         eor     r0,r8,r8,ror#11
2034         eor     r2,r2,r6
2035         vadd.i32        q1,q1,q9
2036         add     r7,r7,r12,ror#6
2037         eor     r12,r8,r9
2038         vshr.u32        q9,q8,#3
2039         eor     r0,r0,r8,ror#20
2040         add     r7,r7,r2
2041         vsli.32 q10,q8,#25
2042         ldr     r2,[sp,#20]
2043         and     r3,r3,r12
2044         vshr.u32        q11,q8,#18
2045         add     r11,r11,r7
2046         add     r7,r7,r0,ror#2
2047         eor     r3,r3,r9
2048         veor    q9,q9,q10
2049         add     r6,r6,r2
2050         vsli.32 q11,q8,#14
2051         eor     r2,r4,r5
2052         eor     r0,r11,r11,ror#5
2053         vshr.u32        d24,d1,#17
2054         add     r7,r7,r3
2055         and     r2,r2,r11
2056         veor    q9,q9,q11
2057         eor     r3,r0,r11,ror#19
2058         eor     r0,r7,r7,ror#11
2059         vsli.32 d24,d1,#15
2060         eor     r2,r2,r5
2061         add     r6,r6,r3,ror#6
2062         vshr.u32        d25,d1,#10
2063         eor     r3,r7,r8
2064         eor     r0,r0,r7,ror#20
2065         vadd.i32        q1,q1,q9
2066         add     r6,r6,r2
2067         ldr     r2,[sp,#24]
2068         veor    d25,d25,d24
2069         and     r12,r12,r3
2070         add     r10,r10,r6
2071         vshr.u32        d24,d1,#19
2072         add     r6,r6,r0,ror#2
2073         eor     r12,r12,r8
2074         vsli.32 d24,d1,#13
2075         add     r5,r5,r2
2076         eor     r2,r11,r4
2077         veor    d25,d25,d24
2078         eor     r0,r10,r10,ror#5
2079         add     r6,r6,r12
2080         vadd.i32        d2,d2,d25
2081         and     r2,r2,r10
2082         eor     r12,r0,r10,ror#19
2083         vshr.u32        d24,d2,#17
2084         eor     r0,r6,r6,ror#11
2085         eor     r2,r2,r4
2086         vsli.32 d24,d2,#15
2087         add     r5,r5,r12,ror#6
2088         eor     r12,r6,r7
2089         vshr.u32        d25,d2,#10
2090         eor     r0,r0,r6,ror#20
2091         add     r5,r5,r2
2092         veor    d25,d25,d24
2093         ldr     r2,[sp,#28]
2094         and     r3,r3,r12
2095         vshr.u32        d24,d2,#19
2096         add     r9,r9,r5
2097         add     r5,r5,r0,ror#2
2098         eor     r3,r3,r7
2099         vld1.32 {q8},[r14,:128]!
2100         add     r4,r4,r2
2101         vsli.32 d24,d2,#13
2102         eor     r2,r10,r11
2103         eor     r0,r9,r9,ror#5
2104         veor    d25,d25,d24
2105         add     r5,r5,r3
2106         and     r2,r2,r9
2107         vadd.i32        d3,d3,d25
2108         eor     r3,r0,r9,ror#19
2109         eor     r0,r5,r5,ror#11
2110         vadd.i32        q8,q8,q1
2111         eor     r2,r2,r11
2112         add     r4,r4,r3,ror#6
2113         eor     r3,r5,r6
2114         eor     r0,r0,r5,ror#20
2115         add     r4,r4,r2
2116         ldr     r2,[sp,#32]
2117         and     r12,r12,r3
2118         add     r8,r8,r4
2119         vst1.32 {q8},[r1,:128]!
2120         add     r4,r4,r0,ror#2
2121         eor     r12,r12,r6
2122         vext.8  q8,q2,q3,#4
2123         add     r11,r11,r2
2124         eor     r2,r9,r10
2125         eor     r0,r8,r8,ror#5
2126         vext.8  q9,q0,q1,#4
2127         add     r4,r4,r12
2128         and     r2,r2,r8
2129         eor     r12,r0,r8,ror#19
2130         vshr.u32        q10,q8,#7
2131         eor     r0,r4,r4,ror#11
2132         eor     r2,r2,r10
2133         vadd.i32        q2,q2,q9
2134         add     r11,r11,r12,ror#6
2135         eor     r12,r4,r5
2136         vshr.u32        q9,q8,#3
2137         eor     r0,r0,r4,ror#20
2138         add     r11,r11,r2
2139         vsli.32 q10,q8,#25
2140         ldr     r2,[sp,#36]
2141         and     r3,r3,r12
2142         vshr.u32        q11,q8,#18
2143         add     r7,r7,r11
2144         add     r11,r11,r0,ror#2
2145         eor     r3,r3,r5
2146         veor    q9,q9,q10
2147         add     r10,r10,r2
2148         vsli.32 q11,q8,#14
2149         eor     r2,r8,r9
2150         eor     r0,r7,r7,ror#5
2151         vshr.u32        d24,d3,#17
2152         add     r11,r11,r3
2153         and     r2,r2,r7
2154         veor    q9,q9,q11
2155         eor     r3,r0,r7,ror#19
2156         eor     r0,r11,r11,ror#11
2157         vsli.32 d24,d3,#15
2158         eor     r2,r2,r9
2159         add     r10,r10,r3,ror#6
2160         vshr.u32        d25,d3,#10
2161         eor     r3,r11,r4
2162         eor     r0,r0,r11,ror#20
2163         vadd.i32        q2,q2,q9
2164         add     r10,r10,r2
2165         ldr     r2,[sp,#40]
2166         veor    d25,d25,d24
2167         and     r12,r12,r3
2168         add     r6,r6,r10
2169         vshr.u32        d24,d3,#19
2170         add     r10,r10,r0,ror#2
2171         eor     r12,r12,r4
2172         vsli.32 d24,d3,#13
2173         add     r9,r9,r2
2174         eor     r2,r7,r8
2175         veor    d25,d25,d24
2176         eor     r0,r6,r6,ror#5
2177         add     r10,r10,r12
2178         vadd.i32        d4,d4,d25
2179         and     r2,r2,r6
2180         eor     r12,r0,r6,ror#19
2181         vshr.u32        d24,d4,#17
2182         eor     r0,r10,r10,ror#11
2183         eor     r2,r2,r8
2184         vsli.32 d24,d4,#15
2185         add     r9,r9,r12,ror#6
2186         eor     r12,r10,r11
2187         vshr.u32        d25,d4,#10
2188         eor     r0,r0,r10,ror#20
2189         add     r9,r9,r2
2190         veor    d25,d25,d24
2191         ldr     r2,[sp,#44]
2192         and     r3,r3,r12
2193         vshr.u32        d24,d4,#19
2194         add     r5,r5,r9
2195         add     r9,r9,r0,ror#2
2196         eor     r3,r3,r11
2197         vld1.32 {q8},[r14,:128]!
2198         add     r8,r8,r2
2199         vsli.32 d24,d4,#13
2200         eor     r2,r6,r7
2201         eor     r0,r5,r5,ror#5
2202         veor    d25,d25,d24
2203         add     r9,r9,r3
2204         and     r2,r2,r5
2205         vadd.i32        d5,d5,d25
2206         eor     r3,r0,r5,ror#19
2207         eor     r0,r9,r9,ror#11
2208         vadd.i32        q8,q8,q2
2209         eor     r2,r2,r7
2210         add     r8,r8,r3,ror#6
2211         eor     r3,r9,r10
2212         eor     r0,r0,r9,ror#20
2213         add     r8,r8,r2
2214         ldr     r2,[sp,#48]
2215         and     r12,r12,r3
2216         add     r4,r4,r8
2217         vst1.32 {q8},[r1,:128]!
2218         add     r8,r8,r0,ror#2
2219         eor     r12,r12,r10
2220         vext.8  q8,q3,q0,#4
2221         add     r7,r7,r2
2222         eor     r2,r5,r6
2223         eor     r0,r4,r4,ror#5
2224         vext.8  q9,q1,q2,#4
2225         add     r8,r8,r12
2226         and     r2,r2,r4
2227         eor     r12,r0,r4,ror#19
2228         vshr.u32        q10,q8,#7
2229         eor     r0,r8,r8,ror#11
2230         eor     r2,r2,r6
2231         vadd.i32        q3,q3,q9
2232         add     r7,r7,r12,ror#6
2233         eor     r12,r8,r9
2234         vshr.u32        q9,q8,#3
2235         eor     r0,r0,r8,ror#20
2236         add     r7,r7,r2
2237         vsli.32 q10,q8,#25
2238         ldr     r2,[sp,#52]
2239         and     r3,r3,r12
2240         vshr.u32        q11,q8,#18
2241         add     r11,r11,r7
2242         add     r7,r7,r0,ror#2
2243         eor     r3,r3,r9
2244         veor    q9,q9,q10
2245         add     r6,r6,r2
2246         vsli.32 q11,q8,#14
2247         eor     r2,r4,r5
2248         eor     r0,r11,r11,ror#5
2249         vshr.u32        d24,d5,#17
2250         add     r7,r7,r3
2251         and     r2,r2,r11
2252         veor    q9,q9,q11
2253         eor     r3,r0,r11,ror#19
2254         eor     r0,r7,r7,ror#11
2255         vsli.32 d24,d5,#15
2256         eor     r2,r2,r5
2257         add     r6,r6,r3,ror#6
2258         vshr.u32        d25,d5,#10
2259         eor     r3,r7,r8
2260         eor     r0,r0,r7,ror#20
2261         vadd.i32        q3,q3,q9
2262         add     r6,r6,r2
2263         ldr     r2,[sp,#56]
2264         veor    d25,d25,d24
2265         and     r12,r12,r3
2266         add     r10,r10,r6
2267         vshr.u32        d24,d5,#19
2268         add     r6,r6,r0,ror#2
2269         eor     r12,r12,r8
2270         vsli.32 d24,d5,#13
2271         add     r5,r5,r2
2272         eor     r2,r11,r4
2273         veor    d25,d25,d24
2274         eor     r0,r10,r10,ror#5
2275         add     r6,r6,r12
2276         vadd.i32        d6,d6,d25
2277         and     r2,r2,r10
2278         eor     r12,r0,r10,ror#19
2279         vshr.u32        d24,d6,#17
2280         eor     r0,r6,r6,ror#11
2281         eor     r2,r2,r4
2282         vsli.32 d24,d6,#15
2283         add     r5,r5,r12,ror#6
2284         eor     r12,r6,r7
2285         vshr.u32        d25,d6,#10
2286         eor     r0,r0,r6,ror#20
2287         add     r5,r5,r2
2288         veor    d25,d25,d24
2289         ldr     r2,[sp,#60]
2290         and     r3,r3,r12
2291         vshr.u32        d24,d6,#19
2292         add     r9,r9,r5
2293         add     r5,r5,r0,ror#2
2294         eor     r3,r3,r7
2295         vld1.32 {q8},[r14,:128]!
2296         add     r4,r4,r2
2297         vsli.32 d24,d6,#13
2298         eor     r2,r10,r11
2299         eor     r0,r9,r9,ror#5
2300         veor    d25,d25,d24
2301         add     r5,r5,r3
2302         and     r2,r2,r9
2303         vadd.i32        d7,d7,d25
2304         eor     r3,r0,r9,ror#19
2305         eor     r0,r5,r5,ror#11
2306         vadd.i32        q8,q8,q3
2307         eor     r2,r2,r11
2308         add     r4,r4,r3,ror#6
2309         eor     r3,r5,r6
2310         eor     r0,r0,r5,ror#20
2311         add     r4,r4,r2
2312         ldr     r2,[r14]
2313         and     r12,r12,r3
2314         add     r8,r8,r4
2315         vst1.32 {q8},[r1,:128]!
2316         add     r4,r4,r0,ror#2
2317         eor     r12,r12,r6
2318         teq     r2,#0                           @ check for K256 terminator
2319         ldr     r2,[sp,#0]
2320         sub     r1,r1,#64
2321         bne     .L_00_48
2323         ldr     r1,[sp,#68]
2324         ldr     r0,[sp,#72]
2325         sub     r14,r14,#256    @ rewind r14
2326         teq     r1,r0
2327         it      eq
2328         subeq   r1,r1,#64               @ avoid SEGV
2329         vld1.8  {q0},[r1]!              @ load next input block
2330         vld1.8  {q1},[r1]!
2331         vld1.8  {q2},[r1]!
2332         vld1.8  {q3},[r1]!
2333         it      ne
2334         strne   r1,[sp,#68]
2335         mov     r1,sp
2336         add     r11,r11,r2
2337         eor     r2,r9,r10
2338         eor     r0,r8,r8,ror#5
2339         add     r4,r4,r12
2340         vld1.32 {q8},[r14,:128]!
2341         and     r2,r2,r8
2342         eor     r12,r0,r8,ror#19
2343         eor     r0,r4,r4,ror#11
2344         eor     r2,r2,r10
2345         vrev32.8        q0,q0
2346         add     r11,r11,r12,ror#6
2347         eor     r12,r4,r5
2348         eor     r0,r0,r4,ror#20
2349         add     r11,r11,r2
2350         vadd.i32        q8,q8,q0
2351         ldr     r2,[sp,#4]
2352         and     r3,r3,r12
2353         add     r7,r7,r11
2354         add     r11,r11,r0,ror#2
2355         eor     r3,r3,r5
2356         add     r10,r10,r2
2357         eor     r2,r8,r9
2358         eor     r0,r7,r7,ror#5
2359         add     r11,r11,r3
2360         and     r2,r2,r7
2361         eor     r3,r0,r7,ror#19
2362         eor     r0,r11,r11,ror#11
2363         eor     r2,r2,r9
2364         add     r10,r10,r3,ror#6
2365         eor     r3,r11,r4
2366         eor     r0,r0,r11,ror#20
2367         add     r10,r10,r2
2368         ldr     r2,[sp,#8]
2369         and     r12,r12,r3
2370         add     r6,r6,r10
2371         add     r10,r10,r0,ror#2
2372         eor     r12,r12,r4
2373         add     r9,r9,r2
2374         eor     r2,r7,r8
2375         eor     r0,r6,r6,ror#5
2376         add     r10,r10,r12
2377         and     r2,r2,r6
2378         eor     r12,r0,r6,ror#19
2379         eor     r0,r10,r10,ror#11
2380         eor     r2,r2,r8
2381         add     r9,r9,r12,ror#6
2382         eor     r12,r10,r11
2383         eor     r0,r0,r10,ror#20
2384         add     r9,r9,r2
2385         ldr     r2,[sp,#12]
2386         and     r3,r3,r12
2387         add     r5,r5,r9
2388         add     r9,r9,r0,ror#2
2389         eor     r3,r3,r11
2390         add     r8,r8,r2
2391         eor     r2,r6,r7
2392         eor     r0,r5,r5,ror#5
2393         add     r9,r9,r3
2394         and     r2,r2,r5
2395         eor     r3,r0,r5,ror#19
2396         eor     r0,r9,r9,ror#11
2397         eor     r2,r2,r7
2398         add     r8,r8,r3,ror#6
2399         eor     r3,r9,r10
2400         eor     r0,r0,r9,ror#20
2401         add     r8,r8,r2
2402         ldr     r2,[sp,#16]
2403         and     r12,r12,r3
2404         add     r4,r4,r8
2405         add     r8,r8,r0,ror#2
2406         eor     r12,r12,r10
2407         vst1.32 {q8},[r1,:128]!
2408         add     r7,r7,r2
2409         eor     r2,r5,r6
2410         eor     r0,r4,r4,ror#5
2411         add     r8,r8,r12
2412         vld1.32 {q8},[r14,:128]!
2413         and     r2,r2,r4
2414         eor     r12,r0,r4,ror#19
2415         eor     r0,r8,r8,ror#11
2416         eor     r2,r2,r6
2417         vrev32.8        q1,q1
2418         add     r7,r7,r12,ror#6
2419         eor     r12,r8,r9
2420         eor     r0,r0,r8,ror#20
2421         add     r7,r7,r2
2422         vadd.i32        q8,q8,q1
2423         ldr     r2,[sp,#20]
2424         and     r3,r3,r12
2425         add     r11,r11,r7
2426         add     r7,r7,r0,ror#2
2427         eor     r3,r3,r9
2428         add     r6,r6,r2
2429         eor     r2,r4,r5
2430         eor     r0,r11,r11,ror#5
2431         add     r7,r7,r3
2432         and     r2,r2,r11
2433         eor     r3,r0,r11,ror#19
2434         eor     r0,r7,r7,ror#11
2435         eor     r2,r2,r5
2436         add     r6,r6,r3,ror#6
2437         eor     r3,r7,r8
2438         eor     r0,r0,r7,ror#20
2439         add     r6,r6,r2
2440         ldr     r2,[sp,#24]
2441         and     r12,r12,r3
2442         add     r10,r10,r6
2443         add     r6,r6,r0,ror#2
2444         eor     r12,r12,r8
2445         add     r5,r5,r2
2446         eor     r2,r11,r4
2447         eor     r0,r10,r10,ror#5
2448         add     r6,r6,r12
2449         and     r2,r2,r10
2450         eor     r12,r0,r10,ror#19
2451         eor     r0,r6,r6,ror#11
2452         eor     r2,r2,r4
2453         add     r5,r5,r12,ror#6
2454         eor     r12,r6,r7
2455         eor     r0,r0,r6,ror#20
2456         add     r5,r5,r2
2457         ldr     r2,[sp,#28]
2458         and     r3,r3,r12
2459         add     r9,r9,r5
2460         add     r5,r5,r0,ror#2
2461         eor     r3,r3,r7
2462         add     r4,r4,r2
2463         eor     r2,r10,r11
2464         eor     r0,r9,r9,ror#5
2465         add     r5,r5,r3
2466         and     r2,r2,r9
2467         eor     r3,r0,r9,ror#19
2468         eor     r0,r5,r5,ror#11
2469         eor     r2,r2,r11
2470         add     r4,r4,r3,ror#6
2471         eor     r3,r5,r6
2472         eor     r0,r0,r5,ror#20
2473         add     r4,r4,r2
2474         ldr     r2,[sp,#32]
2475         and     r12,r12,r3
2476         add     r8,r8,r4
2477         add     r4,r4,r0,ror#2
2478         eor     r12,r12,r6
2479         vst1.32 {q8},[r1,:128]!
2480         add     r11,r11,r2
2481         eor     r2,r9,r10
2482         eor     r0,r8,r8,ror#5
2483         add     r4,r4,r12
2484         vld1.32 {q8},[r14,:128]!
2485         and     r2,r2,r8
2486         eor     r12,r0,r8,ror#19
2487         eor     r0,r4,r4,ror#11
2488         eor     r2,r2,r10
2489         vrev32.8        q2,q2
2490         add     r11,r11,r12,ror#6
2491         eor     r12,r4,r5
2492         eor     r0,r0,r4,ror#20
2493         add     r11,r11,r2
2494         vadd.i32        q8,q8,q2
2495         ldr     r2,[sp,#36]
2496         and     r3,r3,r12
2497         add     r7,r7,r11
2498         add     r11,r11,r0,ror#2
2499         eor     r3,r3,r5
2500         add     r10,r10,r2
2501         eor     r2,r8,r9
2502         eor     r0,r7,r7,ror#5
2503         add     r11,r11,r3
2504         and     r2,r2,r7
2505         eor     r3,r0,r7,ror#19
2506         eor     r0,r11,r11,ror#11
2507         eor     r2,r2,r9
2508         add     r10,r10,r3,ror#6
2509         eor     r3,r11,r4
2510         eor     r0,r0,r11,ror#20
2511         add     r10,r10,r2
2512         ldr     r2,[sp,#40]
2513         and     r12,r12,r3
2514         add     r6,r6,r10
2515         add     r10,r10,r0,ror#2
2516         eor     r12,r12,r4
2517         add     r9,r9,r2
2518         eor     r2,r7,r8
2519         eor     r0,r6,r6,ror#5
2520         add     r10,r10,r12
2521         and     r2,r2,r6
2522         eor     r12,r0,r6,ror#19
2523         eor     r0,r10,r10,ror#11
2524         eor     r2,r2,r8
2525         add     r9,r9,r12,ror#6
2526         eor     r12,r10,r11
2527         eor     r0,r0,r10,ror#20
2528         add     r9,r9,r2
2529         ldr     r2,[sp,#44]
2530         and     r3,r3,r12
2531         add     r5,r5,r9
2532         add     r9,r9,r0,ror#2
2533         eor     r3,r3,r11
2534         add     r8,r8,r2
2535         eor     r2,r6,r7
2536         eor     r0,r5,r5,ror#5
2537         add     r9,r9,r3
2538         and     r2,r2,r5
2539         eor     r3,r0,r5,ror#19
2540         eor     r0,r9,r9,ror#11
2541         eor     r2,r2,r7
2542         add     r8,r8,r3,ror#6
2543         eor     r3,r9,r10
2544         eor     r0,r0,r9,ror#20
2545         add     r8,r8,r2
2546         ldr     r2,[sp,#48]
2547         and     r12,r12,r3
2548         add     r4,r4,r8
2549         add     r8,r8,r0,ror#2
2550         eor     r12,r12,r10
2551         vst1.32 {q8},[r1,:128]!
2552         add     r7,r7,r2
2553         eor     r2,r5,r6
2554         eor     r0,r4,r4,ror#5
2555         add     r8,r8,r12
2556         vld1.32 {q8},[r14,:128]!
2557         and     r2,r2,r4
2558         eor     r12,r0,r4,ror#19
2559         eor     r0,r8,r8,ror#11
2560         eor     r2,r2,r6
2561         vrev32.8        q3,q3
2562         add     r7,r7,r12,ror#6
2563         eor     r12,r8,r9
2564         eor     r0,r0,r8,ror#20
2565         add     r7,r7,r2
2566         vadd.i32        q8,q8,q3
2567         ldr     r2,[sp,#52]
2568         and     r3,r3,r12
2569         add     r11,r11,r7
2570         add     r7,r7,r0,ror#2
2571         eor     r3,r3,r9
2572         add     r6,r6,r2
2573         eor     r2,r4,r5
2574         eor     r0,r11,r11,ror#5
2575         add     r7,r7,r3
2576         and     r2,r2,r11
2577         eor     r3,r0,r11,ror#19
2578         eor     r0,r7,r7,ror#11
2579         eor     r2,r2,r5
2580         add     r6,r6,r3,ror#6
2581         eor     r3,r7,r8
2582         eor     r0,r0,r7,ror#20
2583         add     r6,r6,r2
2584         ldr     r2,[sp,#56]
2585         and     r12,r12,r3
2586         add     r10,r10,r6
2587         add     r6,r6,r0,ror#2
2588         eor     r12,r12,r8
2589         add     r5,r5,r2
2590         eor     r2,r11,r4
2591         eor     r0,r10,r10,ror#5
2592         add     r6,r6,r12
2593         and     r2,r2,r10
2594         eor     r12,r0,r10,ror#19
2595         eor     r0,r6,r6,ror#11
2596         eor     r2,r2,r4
2597         add     r5,r5,r12,ror#6
2598         eor     r12,r6,r7
2599         eor     r0,r0,r6,ror#20
2600         add     r5,r5,r2
2601         ldr     r2,[sp,#60]
2602         and     r3,r3,r12
2603         add     r9,r9,r5
2604         add     r5,r5,r0,ror#2
2605         eor     r3,r3,r7
2606         add     r4,r4,r2
2607         eor     r2,r10,r11
2608         eor     r0,r9,r9,ror#5
2609         add     r5,r5,r3
2610         and     r2,r2,r9
2611         eor     r3,r0,r9,ror#19
2612         eor     r0,r5,r5,ror#11
2613         eor     r2,r2,r11
2614         add     r4,r4,r3,ror#6
2615         eor     r3,r5,r6
2616         eor     r0,r0,r5,ror#20
2617         add     r4,r4,r2
2618         ldr     r2,[sp,#64]
2619         and     r12,r12,r3
2620         add     r8,r8,r4
2621         add     r4,r4,r0,ror#2
2622         eor     r12,r12,r6
2623         vst1.32 {q8},[r1,:128]!
2624         ldr     r0,[r2,#0]
2625         add     r4,r4,r12                       @ h+=Maj(a,b,c) from the past
2626         ldr     r12,[r2,#4]
2627         ldr     r3,[r2,#8]
2628         ldr     r1,[r2,#12]
2629         add     r4,r4,r0                        @ accumulate
2630         ldr     r0,[r2,#16]
2631         add     r5,r5,r12
2632         ldr     r12,[r2,#20]
2633         add     r6,r6,r3
2634         ldr     r3,[r2,#24]
2635         add     r7,r7,r1
2636         ldr     r1,[r2,#28]
2637         add     r8,r8,r0
2638         str     r4,[r2],#4
2639         add     r9,r9,r12
2640         str     r5,[r2],#4
2641         add     r10,r10,r3
2642         str     r6,[r2],#4
2643         add     r11,r11,r1
2644         str     r7,[r2],#4
2645         stmia   r2,{r8,r9,r10,r11}
2647         ittte   ne
2648         movne   r1,sp
2649         ldrne   r2,[sp,#0]
2650         eorne   r12,r12,r12
2651         ldreq   sp,[sp,#76]                     @ restore original sp
2652         itt     ne
2653         eorne   r3,r5,r6
2654         bne     .L_00_48
2656         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2657 .size   sha256_block_data_order_neon,.-sha256_block_data_order_neon
2658 #endif
2659 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2661 # if defined(__thumb2__) && !defined(__APPLE__)
2662 #  define INST(a,b,c,d) .byte   c,d|0xc,a,b
2663 # else
2664 #  define INST(a,b,c,d) .byte   a,b,c,d
2665 # endif
2667 .type   sha256_block_data_order_armv8,%function
2668 .align  5
2669 sha256_block_data_order_armv8:
2670 .LARMv8:
2671         vld1.32 {q0,q1},[r0]
2672 # ifdef __APPLE__
2673         sub     r3,r3,#256+32
2674 # elif  defined(__thumb2__)
2675         adr     r3,.LARMv8
2676         sub     r3,r3,#.LARMv8-K256
2677 # else
2678         adrl    r3,K256
2679 # endif
2680         add     r2,r1,r2,lsl#6  @ len to point at the end of inp
2682 .Loop_v8:
2683         vld1.8  {q8,q9},[r1]!
2684         vld1.8  {q10,q11},[r1]!
2685         vld1.32 {q12},[r3]!
2686         vrev32.8        q8,q8
2687         vrev32.8        q9,q9
2688         vrev32.8        q10,q10
2689         vrev32.8        q11,q11
2690         vmov    q14,q0  @ offload
2691         vmov    q15,q1
2692         teq     r1,r2
2693         vld1.32 {q13},[r3]!
2694         vadd.i32        q12,q12,q8
2695         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2696         vmov    q2,q0
2697         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2698         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2699         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2700         vld1.32 {q12},[r3]!
2701         vadd.i32        q13,q13,q9
2702         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2703         vmov    q2,q0
2704         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2705         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2706         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2707         vld1.32 {q13},[r3]!
2708         vadd.i32        q12,q12,q10
2709         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2710         vmov    q2,q0
2711         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2712         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2713         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2714         vld1.32 {q12},[r3]!
2715         vadd.i32        q13,q13,q11
2716         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2717         vmov    q2,q0
2718         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2719         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2720         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2721         vld1.32 {q13},[r3]!
2722         vadd.i32        q12,q12,q8
2723         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2724         vmov    q2,q0
2725         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2726         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2727         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2728         vld1.32 {q12},[r3]!
2729         vadd.i32        q13,q13,q9
2730         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2731         vmov    q2,q0
2732         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2733         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2734         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2735         vld1.32 {q13},[r3]!
2736         vadd.i32        q12,q12,q10
2737         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2738         vmov    q2,q0
2739         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2740         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2741         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2742         vld1.32 {q12},[r3]!
2743         vadd.i32        q13,q13,q11
2744         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2745         vmov    q2,q0
2746         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2747         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2748         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2749         vld1.32 {q13},[r3]!
2750         vadd.i32        q12,q12,q8
2751         INST(0xe2,0x03,0xfa,0xf3)       @ sha256su0 q8,q9
2752         vmov    q2,q0
2753         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2754         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2755         INST(0xe6,0x0c,0x64,0xf3)       @ sha256su1 q8,q10,q11
2756         vld1.32 {q12},[r3]!
2757         vadd.i32        q13,q13,q9
2758         INST(0xe4,0x23,0xfa,0xf3)       @ sha256su0 q9,q10
2759         vmov    q2,q0
2760         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2761         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2762         INST(0xe0,0x2c,0x66,0xf3)       @ sha256su1 q9,q11,q8
2763         vld1.32 {q13},[r3]!
2764         vadd.i32        q12,q12,q10
2765         INST(0xe6,0x43,0xfa,0xf3)       @ sha256su0 q10,q11
2766         vmov    q2,q0
2767         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2768         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2769         INST(0xe2,0x4c,0x60,0xf3)       @ sha256su1 q10,q8,q9
2770         vld1.32 {q12},[r3]!
2771         vadd.i32        q13,q13,q11
2772         INST(0xe0,0x63,0xfa,0xf3)       @ sha256su0 q11,q8
2773         vmov    q2,q0
2774         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2775         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2776         INST(0xe4,0x6c,0x62,0xf3)       @ sha256su1 q11,q9,q10
2777         vld1.32 {q13},[r3]!
2778         vadd.i32        q12,q12,q8
2779         vmov    q2,q0
2780         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2781         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2783         vld1.32 {q12},[r3]!
2784         vadd.i32        q13,q13,q9
2785         vmov    q2,q0
2786         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2787         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2789         vld1.32 {q13},[r3]
2790         vadd.i32        q12,q12,q10
2791         sub     r3,r3,#256-16   @ rewind
2792         vmov    q2,q0
2793         INST(0x68,0x0c,0x02,0xf3)       @ sha256h q0,q1,q12
2794         INST(0x68,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q12
2796         vadd.i32        q13,q13,q11
2797         vmov    q2,q0
2798         INST(0x6a,0x0c,0x02,0xf3)       @ sha256h q0,q1,q13
2799         INST(0x6a,0x2c,0x14,0xf3)       @ sha256h2 q1,q2,q13
2801         vadd.i32        q0,q0,q14
2802         vadd.i32        q1,q1,q15
2803         it      ne
2804         bne     .Loop_v8
2806         vst1.32 {q0,q1},[r0]
2808         bx      lr              @ bx lr
2809 .size   sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2810 #endif
2811 .byte   83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2812 .align  2
2813 .align  2
2814 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2815 .comm   OPENSSL_armcap_P,4,4
2816 .hidden OPENSSL_armcap_P
2817 #endif
2818 #endif