3 @ ====================================================================
4 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 @ project. The module is, however, dual licensed under OpenSSL and
6 @ CRYPTOGAMS licenses depending on where you obtain it. For further
7 @ details see http://www.openssl.org/~appro/cryptogams/.
9 @ Permission to use under GPL terms is granted.
10 @ ====================================================================
12 @ SHA256 block procedure for ARMv4. May 2007.
14 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
15 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
16 @ byte [on single-issue Xscale PXA250 core].
20 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
21 @ Cortex A8 core and ~20 cycles per processed byte.
25 @ Profiler-assisted and platform-specific optimization resulted in 16%
26 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
30 @ Add NEON implementation. On Cortex A8 it was measured to process one
31 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
32 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
33 @ code (meaning that latter performs sub-optimally, nothing was done
38 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
41 # include <openssl/arm_arch.h>
43 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
44 # define __ARM_MAX_ARCH__ 7
52 # if defined(__thumb2__) && !defined(__APPLE__)
63 .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
64 .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
65 .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
66 .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
67 .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
68 .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
69 .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
70 .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
71 .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
72 .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
73 .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
74 .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
75 .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
76 .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
77 .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
78 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
81 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
83 .word OPENSSL_armcap_P-.Lsha256_block_data_order
87 .globl sha256_block_data_order
88 .type sha256_block_data_order,%function
89 sha256_block_data_order:
90 .Lsha256_block_data_order:
92 sub r3,pc,#8 @ sha256_block_data_order
94 adr r3,sha256_block_data_order
96 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
97 ldr r12,.LOPENSSL_armcap
98 ldr r12,[r3,r12] @ OPENSSL_armcap_P
102 tst r12,#ARMV8_SHA256
107 add r2,r1,r2,lsl#6 @ len to point at the end of inp
108 stmdb sp!,{r0,r1,r2,r4-r11,lr}
109 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
110 sub r14,r3,#256+32 @ K256
111 sub sp,sp,#16*4 @ alloca(X[16])
123 str r1,[sp,#17*4] @ make room for r1
126 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
127 eor r0,r0,r8,ror#19 @ Sigma1(e)
132 @ ldrb r2,[r1,#3] @ 0
133 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
140 str r1,[sp,#17*4] @ make room for r1
144 eor r0,r0,r8,ror#19 @ Sigma1(e)
146 ldr r12,[r14],#4 @ *K256++
147 add r11,r11,r2 @ h+=X[i]
150 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
152 add r11,r11,r12 @ h+=K256[i]
153 eor r2,r2,r10 @ Ch(e,f,g)
155 add r11,r11,r2 @ h+=Ch(e,f,g)
158 cmp r12,#0xf2 @ done?
162 ldr r2,[r1],#4 @ prefetch
166 eor r12,r4,r5 @ a^b, b^c in next round
168 ldr r2,[sp,#2*4] @ from future BODY_16_xx
169 eor r12,r4,r5 @ a^b, b^c in next round
170 ldr r1,[sp,#15*4] @ from future BODY_16_xx
172 eor r0,r0,r4,ror#20 @ Sigma0(a)
173 and r3,r3,r12 @ (b^c)&=(a^b)
175 eor r3,r3,r5 @ Maj(a,b,c)
176 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
177 @ add r11,r11,r3 @ h+=Maj(a,b,c)
181 str r1,[sp,#17*4] @ make room for r1
184 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
185 eor r0,r0,r7,ror#19 @ Sigma1(e)
190 @ ldrb r2,[r1,#3] @ 1
191 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
198 str r1,[sp,#17*4] @ make room for r1
202 eor r0,r0,r7,ror#19 @ Sigma1(e)
204 ldr r3,[r14],#4 @ *K256++
205 add r10,r10,r2 @ h+=X[i]
208 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
210 add r10,r10,r3 @ h+=K256[i]
211 eor r2,r2,r9 @ Ch(e,f,g)
212 eor r0,r11,r11,ror#11
213 add r10,r10,r2 @ h+=Ch(e,f,g)
220 ldr r2,[r1],#4 @ prefetch
224 eor r3,r11,r4 @ a^b, b^c in next round
226 ldr r2,[sp,#3*4] @ from future BODY_16_xx
227 eor r3,r11,r4 @ a^b, b^c in next round
228 ldr r1,[sp,#0*4] @ from future BODY_16_xx
230 eor r0,r0,r11,ror#20 @ Sigma0(a)
231 and r12,r12,r3 @ (b^c)&=(a^b)
233 eor r12,r12,r4 @ Maj(a,b,c)
234 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
235 @ add r10,r10,r12 @ h+=Maj(a,b,c)
239 str r1,[sp,#17*4] @ make room for r1
242 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
243 eor r0,r0,r6,ror#19 @ Sigma1(e)
248 @ ldrb r2,[r1,#3] @ 2
249 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
256 str r1,[sp,#17*4] @ make room for r1
260 eor r0,r0,r6,ror#19 @ Sigma1(e)
262 ldr r12,[r14],#4 @ *K256++
263 add r9,r9,r2 @ h+=X[i]
266 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
268 add r9,r9,r12 @ h+=K256[i]
269 eor r2,r2,r8 @ Ch(e,f,g)
270 eor r0,r10,r10,ror#11
271 add r9,r9,r2 @ h+=Ch(e,f,g)
274 cmp r12,#0xf2 @ done?
278 ldr r2,[r1],#4 @ prefetch
282 eor r12,r10,r11 @ a^b, b^c in next round
284 ldr r2,[sp,#4*4] @ from future BODY_16_xx
285 eor r12,r10,r11 @ a^b, b^c in next round
286 ldr r1,[sp,#1*4] @ from future BODY_16_xx
288 eor r0,r0,r10,ror#20 @ Sigma0(a)
289 and r3,r3,r12 @ (b^c)&=(a^b)
291 eor r3,r3,r11 @ Maj(a,b,c)
292 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
293 @ add r9,r9,r3 @ h+=Maj(a,b,c)
297 str r1,[sp,#17*4] @ make room for r1
300 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
301 eor r0,r0,r5,ror#19 @ Sigma1(e)
306 @ ldrb r2,[r1,#3] @ 3
307 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
314 str r1,[sp,#17*4] @ make room for r1
318 eor r0,r0,r5,ror#19 @ Sigma1(e)
320 ldr r3,[r14],#4 @ *K256++
321 add r8,r8,r2 @ h+=X[i]
324 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
326 add r8,r8,r3 @ h+=K256[i]
327 eor r2,r2,r7 @ Ch(e,f,g)
329 add r8,r8,r2 @ h+=Ch(e,f,g)
336 ldr r2,[r1],#4 @ prefetch
340 eor r3,r9,r10 @ a^b, b^c in next round
342 ldr r2,[sp,#5*4] @ from future BODY_16_xx
343 eor r3,r9,r10 @ a^b, b^c in next round
344 ldr r1,[sp,#2*4] @ from future BODY_16_xx
346 eor r0,r0,r9,ror#20 @ Sigma0(a)
347 and r12,r12,r3 @ (b^c)&=(a^b)
349 eor r12,r12,r10 @ Maj(a,b,c)
350 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
351 @ add r8,r8,r12 @ h+=Maj(a,b,c)
355 str r1,[sp,#17*4] @ make room for r1
358 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
359 eor r0,r0,r4,ror#19 @ Sigma1(e)
364 @ ldrb r2,[r1,#3] @ 4
365 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
372 str r1,[sp,#17*4] @ make room for r1
376 eor r0,r0,r4,ror#19 @ Sigma1(e)
378 ldr r12,[r14],#4 @ *K256++
379 add r7,r7,r2 @ h+=X[i]
382 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
384 add r7,r7,r12 @ h+=K256[i]
385 eor r2,r2,r6 @ Ch(e,f,g)
387 add r7,r7,r2 @ h+=Ch(e,f,g)
390 cmp r12,#0xf2 @ done?
394 ldr r2,[r1],#4 @ prefetch
398 eor r12,r8,r9 @ a^b, b^c in next round
400 ldr r2,[sp,#6*4] @ from future BODY_16_xx
401 eor r12,r8,r9 @ a^b, b^c in next round
402 ldr r1,[sp,#3*4] @ from future BODY_16_xx
404 eor r0,r0,r8,ror#20 @ Sigma0(a)
405 and r3,r3,r12 @ (b^c)&=(a^b)
406 add r11,r11,r7 @ d+=h
407 eor r3,r3,r9 @ Maj(a,b,c)
408 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
409 @ add r7,r7,r3 @ h+=Maj(a,b,c)
413 str r1,[sp,#17*4] @ make room for r1
416 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
417 eor r0,r0,r11,ror#19 @ Sigma1(e)
422 @ ldrb r2,[r1,#3] @ 5
423 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
430 str r1,[sp,#17*4] @ make room for r1
434 eor r0,r0,r11,ror#19 @ Sigma1(e)
436 ldr r3,[r14],#4 @ *K256++
437 add r6,r6,r2 @ h+=X[i]
440 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
442 add r6,r6,r3 @ h+=K256[i]
443 eor r2,r2,r5 @ Ch(e,f,g)
445 add r6,r6,r2 @ h+=Ch(e,f,g)
452 ldr r2,[r1],#4 @ prefetch
456 eor r3,r7,r8 @ a^b, b^c in next round
458 ldr r2,[sp,#7*4] @ from future BODY_16_xx
459 eor r3,r7,r8 @ a^b, b^c in next round
460 ldr r1,[sp,#4*4] @ from future BODY_16_xx
462 eor r0,r0,r7,ror#20 @ Sigma0(a)
463 and r12,r12,r3 @ (b^c)&=(a^b)
464 add r10,r10,r6 @ d+=h
465 eor r12,r12,r8 @ Maj(a,b,c)
466 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
467 @ add r6,r6,r12 @ h+=Maj(a,b,c)
471 str r1,[sp,#17*4] @ make room for r1
474 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
475 eor r0,r0,r10,ror#19 @ Sigma1(e)
480 @ ldrb r2,[r1,#3] @ 6
481 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
488 str r1,[sp,#17*4] @ make room for r1
492 eor r0,r0,r10,ror#19 @ Sigma1(e)
494 ldr r12,[r14],#4 @ *K256++
495 add r5,r5,r2 @ h+=X[i]
498 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
500 add r5,r5,r12 @ h+=K256[i]
501 eor r2,r2,r4 @ Ch(e,f,g)
503 add r5,r5,r2 @ h+=Ch(e,f,g)
506 cmp r12,#0xf2 @ done?
510 ldr r2,[r1],#4 @ prefetch
514 eor r12,r6,r7 @ a^b, b^c in next round
516 ldr r2,[sp,#8*4] @ from future BODY_16_xx
517 eor r12,r6,r7 @ a^b, b^c in next round
518 ldr r1,[sp,#5*4] @ from future BODY_16_xx
520 eor r0,r0,r6,ror#20 @ Sigma0(a)
521 and r3,r3,r12 @ (b^c)&=(a^b)
523 eor r3,r3,r7 @ Maj(a,b,c)
524 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
525 @ add r5,r5,r3 @ h+=Maj(a,b,c)
529 str r1,[sp,#17*4] @ make room for r1
532 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
533 eor r0,r0,r9,ror#19 @ Sigma1(e)
538 @ ldrb r2,[r1,#3] @ 7
539 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
546 str r1,[sp,#17*4] @ make room for r1
550 eor r0,r0,r9,ror#19 @ Sigma1(e)
552 ldr r3,[r14],#4 @ *K256++
553 add r4,r4,r2 @ h+=X[i]
556 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
558 add r4,r4,r3 @ h+=K256[i]
559 eor r2,r2,r11 @ Ch(e,f,g)
561 add r4,r4,r2 @ h+=Ch(e,f,g)
568 ldr r2,[r1],#4 @ prefetch
572 eor r3,r5,r6 @ a^b, b^c in next round
574 ldr r2,[sp,#9*4] @ from future BODY_16_xx
575 eor r3,r5,r6 @ a^b, b^c in next round
576 ldr r1,[sp,#6*4] @ from future BODY_16_xx
578 eor r0,r0,r5,ror#20 @ Sigma0(a)
579 and r12,r12,r3 @ (b^c)&=(a^b)
581 eor r12,r12,r6 @ Maj(a,b,c)
582 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
583 @ add r4,r4,r12 @ h+=Maj(a,b,c)
587 str r1,[sp,#17*4] @ make room for r1
590 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
591 eor r0,r0,r8,ror#19 @ Sigma1(e)
596 @ ldrb r2,[r1,#3] @ 8
597 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
604 str r1,[sp,#17*4] @ make room for r1
608 eor r0,r0,r8,ror#19 @ Sigma1(e)
610 ldr r12,[r14],#4 @ *K256++
611 add r11,r11,r2 @ h+=X[i]
614 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
616 add r11,r11,r12 @ h+=K256[i]
617 eor r2,r2,r10 @ Ch(e,f,g)
619 add r11,r11,r2 @ h+=Ch(e,f,g)
622 cmp r12,#0xf2 @ done?
626 ldr r2,[r1],#4 @ prefetch
630 eor r12,r4,r5 @ a^b, b^c in next round
632 ldr r2,[sp,#10*4] @ from future BODY_16_xx
633 eor r12,r4,r5 @ a^b, b^c in next round
634 ldr r1,[sp,#7*4] @ from future BODY_16_xx
636 eor r0,r0,r4,ror#20 @ Sigma0(a)
637 and r3,r3,r12 @ (b^c)&=(a^b)
639 eor r3,r3,r5 @ Maj(a,b,c)
640 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
641 @ add r11,r11,r3 @ h+=Maj(a,b,c)
645 str r1,[sp,#17*4] @ make room for r1
648 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
649 eor r0,r0,r7,ror#19 @ Sigma1(e)
654 @ ldrb r2,[r1,#3] @ 9
655 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
662 str r1,[sp,#17*4] @ make room for r1
666 eor r0,r0,r7,ror#19 @ Sigma1(e)
668 ldr r3,[r14],#4 @ *K256++
669 add r10,r10,r2 @ h+=X[i]
672 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
674 add r10,r10,r3 @ h+=K256[i]
675 eor r2,r2,r9 @ Ch(e,f,g)
676 eor r0,r11,r11,ror#11
677 add r10,r10,r2 @ h+=Ch(e,f,g)
684 ldr r2,[r1],#4 @ prefetch
688 eor r3,r11,r4 @ a^b, b^c in next round
690 ldr r2,[sp,#11*4] @ from future BODY_16_xx
691 eor r3,r11,r4 @ a^b, b^c in next round
692 ldr r1,[sp,#8*4] @ from future BODY_16_xx
694 eor r0,r0,r11,ror#20 @ Sigma0(a)
695 and r12,r12,r3 @ (b^c)&=(a^b)
697 eor r12,r12,r4 @ Maj(a,b,c)
698 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
699 @ add r10,r10,r12 @ h+=Maj(a,b,c)
701 @ ldr r2,[r1],#4 @ 10
703 str r1,[sp,#17*4] @ make room for r1
706 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
707 eor r0,r0,r6,ror#19 @ Sigma1(e)
712 @ ldrb r2,[r1,#3] @ 10
713 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
720 str r1,[sp,#17*4] @ make room for r1
724 eor r0,r0,r6,ror#19 @ Sigma1(e)
726 ldr r12,[r14],#4 @ *K256++
727 add r9,r9,r2 @ h+=X[i]
730 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
732 add r9,r9,r12 @ h+=K256[i]
733 eor r2,r2,r8 @ Ch(e,f,g)
734 eor r0,r10,r10,ror#11
735 add r9,r9,r2 @ h+=Ch(e,f,g)
738 cmp r12,#0xf2 @ done?
742 ldr r2,[r1],#4 @ prefetch
746 eor r12,r10,r11 @ a^b, b^c in next round
748 ldr r2,[sp,#12*4] @ from future BODY_16_xx
749 eor r12,r10,r11 @ a^b, b^c in next round
750 ldr r1,[sp,#9*4] @ from future BODY_16_xx
752 eor r0,r0,r10,ror#20 @ Sigma0(a)
753 and r3,r3,r12 @ (b^c)&=(a^b)
755 eor r3,r3,r11 @ Maj(a,b,c)
756 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
757 @ add r9,r9,r3 @ h+=Maj(a,b,c)
759 @ ldr r2,[r1],#4 @ 11
761 str r1,[sp,#17*4] @ make room for r1
764 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
765 eor r0,r0,r5,ror#19 @ Sigma1(e)
770 @ ldrb r2,[r1,#3] @ 11
771 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
778 str r1,[sp,#17*4] @ make room for r1
782 eor r0,r0,r5,ror#19 @ Sigma1(e)
784 ldr r3,[r14],#4 @ *K256++
785 add r8,r8,r2 @ h+=X[i]
788 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
790 add r8,r8,r3 @ h+=K256[i]
791 eor r2,r2,r7 @ Ch(e,f,g)
793 add r8,r8,r2 @ h+=Ch(e,f,g)
800 ldr r2,[r1],#4 @ prefetch
804 eor r3,r9,r10 @ a^b, b^c in next round
806 ldr r2,[sp,#13*4] @ from future BODY_16_xx
807 eor r3,r9,r10 @ a^b, b^c in next round
808 ldr r1,[sp,#10*4] @ from future BODY_16_xx
810 eor r0,r0,r9,ror#20 @ Sigma0(a)
811 and r12,r12,r3 @ (b^c)&=(a^b)
813 eor r12,r12,r10 @ Maj(a,b,c)
814 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
815 @ add r8,r8,r12 @ h+=Maj(a,b,c)
817 @ ldr r2,[r1],#4 @ 12
819 str r1,[sp,#17*4] @ make room for r1
822 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
823 eor r0,r0,r4,ror#19 @ Sigma1(e)
828 @ ldrb r2,[r1,#3] @ 12
829 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
836 str r1,[sp,#17*4] @ make room for r1
840 eor r0,r0,r4,ror#19 @ Sigma1(e)
842 ldr r12,[r14],#4 @ *K256++
843 add r7,r7,r2 @ h+=X[i]
846 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
848 add r7,r7,r12 @ h+=K256[i]
849 eor r2,r2,r6 @ Ch(e,f,g)
851 add r7,r7,r2 @ h+=Ch(e,f,g)
854 cmp r12,#0xf2 @ done?
858 ldr r2,[r1],#4 @ prefetch
862 eor r12,r8,r9 @ a^b, b^c in next round
864 ldr r2,[sp,#14*4] @ from future BODY_16_xx
865 eor r12,r8,r9 @ a^b, b^c in next round
866 ldr r1,[sp,#11*4] @ from future BODY_16_xx
868 eor r0,r0,r8,ror#20 @ Sigma0(a)
869 and r3,r3,r12 @ (b^c)&=(a^b)
870 add r11,r11,r7 @ d+=h
871 eor r3,r3,r9 @ Maj(a,b,c)
872 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
873 @ add r7,r7,r3 @ h+=Maj(a,b,c)
875 @ ldr r2,[r1],#4 @ 13
877 str r1,[sp,#17*4] @ make room for r1
880 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
881 eor r0,r0,r11,ror#19 @ Sigma1(e)
886 @ ldrb r2,[r1,#3] @ 13
887 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
894 str r1,[sp,#17*4] @ make room for r1
898 eor r0,r0,r11,ror#19 @ Sigma1(e)
900 ldr r3,[r14],#4 @ *K256++
901 add r6,r6,r2 @ h+=X[i]
904 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
906 add r6,r6,r3 @ h+=K256[i]
907 eor r2,r2,r5 @ Ch(e,f,g)
909 add r6,r6,r2 @ h+=Ch(e,f,g)
916 ldr r2,[r1],#4 @ prefetch
920 eor r3,r7,r8 @ a^b, b^c in next round
922 ldr r2,[sp,#15*4] @ from future BODY_16_xx
923 eor r3,r7,r8 @ a^b, b^c in next round
924 ldr r1,[sp,#12*4] @ from future BODY_16_xx
926 eor r0,r0,r7,ror#20 @ Sigma0(a)
927 and r12,r12,r3 @ (b^c)&=(a^b)
928 add r10,r10,r6 @ d+=h
929 eor r12,r12,r8 @ Maj(a,b,c)
930 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
931 @ add r6,r6,r12 @ h+=Maj(a,b,c)
933 @ ldr r2,[r1],#4 @ 14
935 str r1,[sp,#17*4] @ make room for r1
938 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
939 eor r0,r0,r10,ror#19 @ Sigma1(e)
944 @ ldrb r2,[r1,#3] @ 14
945 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
952 str r1,[sp,#17*4] @ make room for r1
956 eor r0,r0,r10,ror#19 @ Sigma1(e)
958 ldr r12,[r14],#4 @ *K256++
959 add r5,r5,r2 @ h+=X[i]
962 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
964 add r5,r5,r12 @ h+=K256[i]
965 eor r2,r2,r4 @ Ch(e,f,g)
967 add r5,r5,r2 @ h+=Ch(e,f,g)
970 cmp r12,#0xf2 @ done?
974 ldr r2,[r1],#4 @ prefetch
978 eor r12,r6,r7 @ a^b, b^c in next round
980 ldr r2,[sp,#0*4] @ from future BODY_16_xx
981 eor r12,r6,r7 @ a^b, b^c in next round
982 ldr r1,[sp,#13*4] @ from future BODY_16_xx
984 eor r0,r0,r6,ror#20 @ Sigma0(a)
985 and r3,r3,r12 @ (b^c)&=(a^b)
987 eor r3,r3,r7 @ Maj(a,b,c)
988 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
989 @ add r5,r5,r3 @ h+=Maj(a,b,c)
991 @ ldr r2,[r1],#4 @ 15
993 str r1,[sp,#17*4] @ make room for r1
996 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
997 eor r0,r0,r9,ror#19 @ Sigma1(e)
1002 @ ldrb r2,[r1,#3] @ 15
1003 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1010 str r1,[sp,#17*4] @ make room for r1
1014 eor r0,r0,r9,ror#19 @ Sigma1(e)
1016 ldr r3,[r14],#4 @ *K256++
1017 add r4,r4,r2 @ h+=X[i]
1020 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1022 add r4,r4,r3 @ h+=K256[i]
1023 eor r2,r2,r11 @ Ch(e,f,g)
1025 add r4,r4,r2 @ h+=Ch(e,f,g)
1028 cmp r3,#0xf2 @ done?
1031 # if __ARM_ARCH__>=7
1032 ldr r2,[r1],#4 @ prefetch
1036 eor r3,r5,r6 @ a^b, b^c in next round
1038 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1039 eor r3,r5,r6 @ a^b, b^c in next round
1040 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1042 eor r0,r0,r5,ror#20 @ Sigma0(a)
1043 and r12,r12,r3 @ (b^c)&=(a^b)
1045 eor r12,r12,r6 @ Maj(a,b,c)
1046 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1047 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1049 @ ldr r2,[sp,#1*4] @ 16
1052 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1055 eor r12,r12,r1,ror#19
1056 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1058 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1062 eor r0,r8,r8,ror#5 @ from BODY_00_15
1064 eor r0,r0,r8,ror#19 @ Sigma1(e)
1066 ldr r12,[r14],#4 @ *K256++
1067 add r11,r11,r2 @ h+=X[i]
1070 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1072 add r11,r11,r12 @ h+=K256[i]
1073 eor r2,r2,r10 @ Ch(e,f,g)
1075 add r11,r11,r2 @ h+=Ch(e,f,g)
1078 cmp r12,#0xf2 @ done?
1081 # if __ARM_ARCH__>=7
1082 ldr r2,[r1],#4 @ prefetch
1086 eor r12,r4,r5 @ a^b, b^c in next round
1088 ldr r2,[sp,#2*4] @ from future BODY_16_xx
1089 eor r12,r4,r5 @ a^b, b^c in next round
1090 ldr r1,[sp,#15*4] @ from future BODY_16_xx
1092 eor r0,r0,r4,ror#20 @ Sigma0(a)
1093 and r3,r3,r12 @ (b^c)&=(a^b)
1094 add r7,r7,r11 @ d+=h
1095 eor r3,r3,r5 @ Maj(a,b,c)
1096 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1097 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1098 @ ldr r2,[sp,#2*4] @ 17
1101 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1105 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1107 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1111 eor r0,r7,r7,ror#5 @ from BODY_00_15
1113 eor r0,r0,r7,ror#19 @ Sigma1(e)
1115 ldr r3,[r14],#4 @ *K256++
1116 add r10,r10,r2 @ h+=X[i]
1119 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1121 add r10,r10,r3 @ h+=K256[i]
1122 eor r2,r2,r9 @ Ch(e,f,g)
1123 eor r0,r11,r11,ror#11
1124 add r10,r10,r2 @ h+=Ch(e,f,g)
1127 cmp r3,#0xf2 @ done?
1130 # if __ARM_ARCH__>=7
1131 ldr r2,[r1],#4 @ prefetch
1135 eor r3,r11,r4 @ a^b, b^c in next round
1137 ldr r2,[sp,#3*4] @ from future BODY_16_xx
1138 eor r3,r11,r4 @ a^b, b^c in next round
1139 ldr r1,[sp,#0*4] @ from future BODY_16_xx
1141 eor r0,r0,r11,ror#20 @ Sigma0(a)
1142 and r12,r12,r3 @ (b^c)&=(a^b)
1143 add r6,r6,r10 @ d+=h
1144 eor r12,r12,r4 @ Maj(a,b,c)
1145 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1146 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1147 @ ldr r2,[sp,#3*4] @ 18
1150 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1153 eor r12,r12,r1,ror#19
1154 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1156 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1160 eor r0,r6,r6,ror#5 @ from BODY_00_15
1162 eor r0,r0,r6,ror#19 @ Sigma1(e)
1164 ldr r12,[r14],#4 @ *K256++
1165 add r9,r9,r2 @ h+=X[i]
1168 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1170 add r9,r9,r12 @ h+=K256[i]
1171 eor r2,r2,r8 @ Ch(e,f,g)
1172 eor r0,r10,r10,ror#11
1173 add r9,r9,r2 @ h+=Ch(e,f,g)
1176 cmp r12,#0xf2 @ done?
1179 # if __ARM_ARCH__>=7
1180 ldr r2,[r1],#4 @ prefetch
1184 eor r12,r10,r11 @ a^b, b^c in next round
1186 ldr r2,[sp,#4*4] @ from future BODY_16_xx
1187 eor r12,r10,r11 @ a^b, b^c in next round
1188 ldr r1,[sp,#1*4] @ from future BODY_16_xx
1190 eor r0,r0,r10,ror#20 @ Sigma0(a)
1191 and r3,r3,r12 @ (b^c)&=(a^b)
1193 eor r3,r3,r11 @ Maj(a,b,c)
1194 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1195 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1196 @ ldr r2,[sp,#4*4] @ 19
1199 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1203 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1205 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1209 eor r0,r5,r5,ror#5 @ from BODY_00_15
1211 eor r0,r0,r5,ror#19 @ Sigma1(e)
1213 ldr r3,[r14],#4 @ *K256++
1214 add r8,r8,r2 @ h+=X[i]
1217 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1219 add r8,r8,r3 @ h+=K256[i]
1220 eor r2,r2,r7 @ Ch(e,f,g)
1222 add r8,r8,r2 @ h+=Ch(e,f,g)
1225 cmp r3,#0xf2 @ done?
1228 # if __ARM_ARCH__>=7
1229 ldr r2,[r1],#4 @ prefetch
1233 eor r3,r9,r10 @ a^b, b^c in next round
1235 ldr r2,[sp,#5*4] @ from future BODY_16_xx
1236 eor r3,r9,r10 @ a^b, b^c in next round
1237 ldr r1,[sp,#2*4] @ from future BODY_16_xx
1239 eor r0,r0,r9,ror#20 @ Sigma0(a)
1240 and r12,r12,r3 @ (b^c)&=(a^b)
1242 eor r12,r12,r10 @ Maj(a,b,c)
1243 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1244 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1245 @ ldr r2,[sp,#5*4] @ 20
1248 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1251 eor r12,r12,r1,ror#19
1252 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1254 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1258 eor r0,r4,r4,ror#5 @ from BODY_00_15
1260 eor r0,r0,r4,ror#19 @ Sigma1(e)
1262 ldr r12,[r14],#4 @ *K256++
1263 add r7,r7,r2 @ h+=X[i]
1266 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1268 add r7,r7,r12 @ h+=K256[i]
1269 eor r2,r2,r6 @ Ch(e,f,g)
1271 add r7,r7,r2 @ h+=Ch(e,f,g)
1274 cmp r12,#0xf2 @ done?
1277 # if __ARM_ARCH__>=7
1278 ldr r2,[r1],#4 @ prefetch
1282 eor r12,r8,r9 @ a^b, b^c in next round
1284 ldr r2,[sp,#6*4] @ from future BODY_16_xx
1285 eor r12,r8,r9 @ a^b, b^c in next round
1286 ldr r1,[sp,#3*4] @ from future BODY_16_xx
1288 eor r0,r0,r8,ror#20 @ Sigma0(a)
1289 and r3,r3,r12 @ (b^c)&=(a^b)
1290 add r11,r11,r7 @ d+=h
1291 eor r3,r3,r9 @ Maj(a,b,c)
1292 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1293 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1294 @ ldr r2,[sp,#6*4] @ 21
1297 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1301 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1303 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1307 eor r0,r11,r11,ror#5 @ from BODY_00_15
1309 eor r0,r0,r11,ror#19 @ Sigma1(e)
1311 ldr r3,[r14],#4 @ *K256++
1312 add r6,r6,r2 @ h+=X[i]
1315 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1317 add r6,r6,r3 @ h+=K256[i]
1318 eor r2,r2,r5 @ Ch(e,f,g)
1320 add r6,r6,r2 @ h+=Ch(e,f,g)
1323 cmp r3,#0xf2 @ done?
1326 # if __ARM_ARCH__>=7
1327 ldr r2,[r1],#4 @ prefetch
1331 eor r3,r7,r8 @ a^b, b^c in next round
1333 ldr r2,[sp,#7*4] @ from future BODY_16_xx
1334 eor r3,r7,r8 @ a^b, b^c in next round
1335 ldr r1,[sp,#4*4] @ from future BODY_16_xx
1337 eor r0,r0,r7,ror#20 @ Sigma0(a)
1338 and r12,r12,r3 @ (b^c)&=(a^b)
1339 add r10,r10,r6 @ d+=h
1340 eor r12,r12,r8 @ Maj(a,b,c)
1341 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1342 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1343 @ ldr r2,[sp,#7*4] @ 22
1346 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1349 eor r12,r12,r1,ror#19
1350 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1352 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1356 eor r0,r10,r10,ror#5 @ from BODY_00_15
1358 eor r0,r0,r10,ror#19 @ Sigma1(e)
1360 ldr r12,[r14],#4 @ *K256++
1361 add r5,r5,r2 @ h+=X[i]
1364 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1366 add r5,r5,r12 @ h+=K256[i]
1367 eor r2,r2,r4 @ Ch(e,f,g)
1369 add r5,r5,r2 @ h+=Ch(e,f,g)
1372 cmp r12,#0xf2 @ done?
1375 # if __ARM_ARCH__>=7
1376 ldr r2,[r1],#4 @ prefetch
1380 eor r12,r6,r7 @ a^b, b^c in next round
1382 ldr r2,[sp,#8*4] @ from future BODY_16_xx
1383 eor r12,r6,r7 @ a^b, b^c in next round
1384 ldr r1,[sp,#5*4] @ from future BODY_16_xx
1386 eor r0,r0,r6,ror#20 @ Sigma0(a)
1387 and r3,r3,r12 @ (b^c)&=(a^b)
1389 eor r3,r3,r7 @ Maj(a,b,c)
1390 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1391 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1392 @ ldr r2,[sp,#8*4] @ 23
1395 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1399 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1401 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1405 eor r0,r9,r9,ror#5 @ from BODY_00_15
1407 eor r0,r0,r9,ror#19 @ Sigma1(e)
1409 ldr r3,[r14],#4 @ *K256++
1410 add r4,r4,r2 @ h+=X[i]
1413 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1415 add r4,r4,r3 @ h+=K256[i]
1416 eor r2,r2,r11 @ Ch(e,f,g)
1418 add r4,r4,r2 @ h+=Ch(e,f,g)
1421 cmp r3,#0xf2 @ done?
1424 # if __ARM_ARCH__>=7
1425 ldr r2,[r1],#4 @ prefetch
1429 eor r3,r5,r6 @ a^b, b^c in next round
1431 ldr r2,[sp,#9*4] @ from future BODY_16_xx
1432 eor r3,r5,r6 @ a^b, b^c in next round
1433 ldr r1,[sp,#6*4] @ from future BODY_16_xx
1435 eor r0,r0,r5,ror#20 @ Sigma0(a)
1436 and r12,r12,r3 @ (b^c)&=(a^b)
1438 eor r12,r12,r6 @ Maj(a,b,c)
1439 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1440 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1441 @ ldr r2,[sp,#9*4] @ 24
1444 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1447 eor r12,r12,r1,ror#19
1448 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1450 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1454 eor r0,r8,r8,ror#5 @ from BODY_00_15
1456 eor r0,r0,r8,ror#19 @ Sigma1(e)
1458 ldr r12,[r14],#4 @ *K256++
1459 add r11,r11,r2 @ h+=X[i]
1462 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1464 add r11,r11,r12 @ h+=K256[i]
1465 eor r2,r2,r10 @ Ch(e,f,g)
1467 add r11,r11,r2 @ h+=Ch(e,f,g)
1470 cmp r12,#0xf2 @ done?
1473 # if __ARM_ARCH__>=7
1474 ldr r2,[r1],#4 @ prefetch
1478 eor r12,r4,r5 @ a^b, b^c in next round
1480 ldr r2,[sp,#10*4] @ from future BODY_16_xx
1481 eor r12,r4,r5 @ a^b, b^c in next round
1482 ldr r1,[sp,#7*4] @ from future BODY_16_xx
1484 eor r0,r0,r4,ror#20 @ Sigma0(a)
1485 and r3,r3,r12 @ (b^c)&=(a^b)
1486 add r7,r7,r11 @ d+=h
1487 eor r3,r3,r5 @ Maj(a,b,c)
1488 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1489 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1490 @ ldr r2,[sp,#10*4] @ 25
1493 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1497 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1499 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1503 eor r0,r7,r7,ror#5 @ from BODY_00_15
1505 eor r0,r0,r7,ror#19 @ Sigma1(e)
1507 ldr r3,[r14],#4 @ *K256++
1508 add r10,r10,r2 @ h+=X[i]
1511 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1513 add r10,r10,r3 @ h+=K256[i]
1514 eor r2,r2,r9 @ Ch(e,f,g)
1515 eor r0,r11,r11,ror#11
1516 add r10,r10,r2 @ h+=Ch(e,f,g)
1519 cmp r3,#0xf2 @ done?
1522 # if __ARM_ARCH__>=7
1523 ldr r2,[r1],#4 @ prefetch
1527 eor r3,r11,r4 @ a^b, b^c in next round
1529 ldr r2,[sp,#11*4] @ from future BODY_16_xx
1530 eor r3,r11,r4 @ a^b, b^c in next round
1531 ldr r1,[sp,#8*4] @ from future BODY_16_xx
1533 eor r0,r0,r11,ror#20 @ Sigma0(a)
1534 and r12,r12,r3 @ (b^c)&=(a^b)
1535 add r6,r6,r10 @ d+=h
1536 eor r12,r12,r4 @ Maj(a,b,c)
1537 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1538 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1539 @ ldr r2,[sp,#11*4] @ 26
1542 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1545 eor r12,r12,r1,ror#19
1546 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1548 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1552 eor r0,r6,r6,ror#5 @ from BODY_00_15
1554 eor r0,r0,r6,ror#19 @ Sigma1(e)
1556 ldr r12,[r14],#4 @ *K256++
1557 add r9,r9,r2 @ h+=X[i]
1560 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1562 add r9,r9,r12 @ h+=K256[i]
1563 eor r2,r2,r8 @ Ch(e,f,g)
1564 eor r0,r10,r10,ror#11
1565 add r9,r9,r2 @ h+=Ch(e,f,g)
1568 cmp r12,#0xf2 @ done?
1571 # if __ARM_ARCH__>=7
1572 ldr r2,[r1],#4 @ prefetch
1576 eor r12,r10,r11 @ a^b, b^c in next round
1578 ldr r2,[sp,#12*4] @ from future BODY_16_xx
1579 eor r12,r10,r11 @ a^b, b^c in next round
1580 ldr r1,[sp,#9*4] @ from future BODY_16_xx
1582 eor r0,r0,r10,ror#20 @ Sigma0(a)
1583 and r3,r3,r12 @ (b^c)&=(a^b)
1585 eor r3,r3,r11 @ Maj(a,b,c)
1586 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1587 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1588 @ ldr r2,[sp,#12*4] @ 27
1591 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1595 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1597 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1601 eor r0,r5,r5,ror#5 @ from BODY_00_15
1603 eor r0,r0,r5,ror#19 @ Sigma1(e)
1605 ldr r3,[r14],#4 @ *K256++
1606 add r8,r8,r2 @ h+=X[i]
1609 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1611 add r8,r8,r3 @ h+=K256[i]
1612 eor r2,r2,r7 @ Ch(e,f,g)
1614 add r8,r8,r2 @ h+=Ch(e,f,g)
1617 cmp r3,#0xf2 @ done?
1620 # if __ARM_ARCH__>=7
1621 ldr r2,[r1],#4 @ prefetch
1625 eor r3,r9,r10 @ a^b, b^c in next round
1627 ldr r2,[sp,#13*4] @ from future BODY_16_xx
1628 eor r3,r9,r10 @ a^b, b^c in next round
1629 ldr r1,[sp,#10*4] @ from future BODY_16_xx
1631 eor r0,r0,r9,ror#20 @ Sigma0(a)
1632 and r12,r12,r3 @ (b^c)&=(a^b)
1634 eor r12,r12,r10 @ Maj(a,b,c)
1635 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1636 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1637 @ ldr r2,[sp,#13*4] @ 28
1640 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1643 eor r12,r12,r1,ror#19
1644 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1646 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1650 eor r0,r4,r4,ror#5 @ from BODY_00_15
1652 eor r0,r0,r4,ror#19 @ Sigma1(e)
1654 ldr r12,[r14],#4 @ *K256++
1655 add r7,r7,r2 @ h+=X[i]
1658 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1660 add r7,r7,r12 @ h+=K256[i]
1661 eor r2,r2,r6 @ Ch(e,f,g)
1663 add r7,r7,r2 @ h+=Ch(e,f,g)
1666 cmp r12,#0xf2 @ done?
1669 # if __ARM_ARCH__>=7
1670 ldr r2,[r1],#4 @ prefetch
1674 eor r12,r8,r9 @ a^b, b^c in next round
1676 ldr r2,[sp,#14*4] @ from future BODY_16_xx
1677 eor r12,r8,r9 @ a^b, b^c in next round
1678 ldr r1,[sp,#11*4] @ from future BODY_16_xx
1680 eor r0,r0,r8,ror#20 @ Sigma0(a)
1681 and r3,r3,r12 @ (b^c)&=(a^b)
1682 add r11,r11,r7 @ d+=h
1683 eor r3,r3,r9 @ Maj(a,b,c)
1684 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1685 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1686 @ ldr r2,[sp,#14*4] @ 29
1689 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1693 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1695 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1699 eor r0,r11,r11,ror#5 @ from BODY_00_15
1701 eor r0,r0,r11,ror#19 @ Sigma1(e)
1703 ldr r3,[r14],#4 @ *K256++
1704 add r6,r6,r2 @ h+=X[i]
1707 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1709 add r6,r6,r3 @ h+=K256[i]
1710 eor r2,r2,r5 @ Ch(e,f,g)
1712 add r6,r6,r2 @ h+=Ch(e,f,g)
1715 cmp r3,#0xf2 @ done?
1718 # if __ARM_ARCH__>=7
1719 ldr r2,[r1],#4 @ prefetch
1723 eor r3,r7,r8 @ a^b, b^c in next round
1725 ldr r2,[sp,#15*4] @ from future BODY_16_xx
1726 eor r3,r7,r8 @ a^b, b^c in next round
1727 ldr r1,[sp,#12*4] @ from future BODY_16_xx
1729 eor r0,r0,r7,ror#20 @ Sigma0(a)
1730 and r12,r12,r3 @ (b^c)&=(a^b)
1731 add r10,r10,r6 @ d+=h
1732 eor r12,r12,r8 @ Maj(a,b,c)
1733 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1734 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1735 @ ldr r2,[sp,#15*4] @ 30
1738 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1741 eor r12,r12,r1,ror#19
1742 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1744 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1748 eor r0,r10,r10,ror#5 @ from BODY_00_15
1750 eor r0,r0,r10,ror#19 @ Sigma1(e)
1752 ldr r12,[r14],#4 @ *K256++
1753 add r5,r5,r2 @ h+=X[i]
1756 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1758 add r5,r5,r12 @ h+=K256[i]
1759 eor r2,r2,r4 @ Ch(e,f,g)
1761 add r5,r5,r2 @ h+=Ch(e,f,g)
1764 cmp r12,#0xf2 @ done?
1767 # if __ARM_ARCH__>=7
1768 ldr r2,[r1],#4 @ prefetch
1772 eor r12,r6,r7 @ a^b, b^c in next round
1774 ldr r2,[sp,#0*4] @ from future BODY_16_xx
1775 eor r12,r6,r7 @ a^b, b^c in next round
1776 ldr r1,[sp,#13*4] @ from future BODY_16_xx
1778 eor r0,r0,r6,ror#20 @ Sigma0(a)
1779 and r3,r3,r12 @ (b^c)&=(a^b)
1781 eor r3,r3,r7 @ Maj(a,b,c)
1782 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1783 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1784 @ ldr r2,[sp,#0*4] @ 31
1787 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1791 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1793 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1797 eor r0,r9,r9,ror#5 @ from BODY_00_15
1799 eor r0,r0,r9,ror#19 @ Sigma1(e)
1801 ldr r3,[r14],#4 @ *K256++
1802 add r4,r4,r2 @ h+=X[i]
1805 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1807 add r4,r4,r3 @ h+=K256[i]
1808 eor r2,r2,r11 @ Ch(e,f,g)
1810 add r4,r4,r2 @ h+=Ch(e,f,g)
1813 cmp r3,#0xf2 @ done?
1816 # if __ARM_ARCH__>=7
1817 ldr r2,[r1],#4 @ prefetch
1821 eor r3,r5,r6 @ a^b, b^c in next round
1823 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1824 eor r3,r5,r6 @ a^b, b^c in next round
1825 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1827 eor r0,r0,r5,ror#20 @ Sigma0(a)
1828 and r12,r12,r3 @ (b^c)&=(a^b)
1830 eor r12,r12,r6 @ Maj(a,b,c)
1831 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1832 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1834 ite eq @ Thumb2 thing, sanity check in ARM
1836 ldreq r3,[sp,#16*4] @ pull ctx
1839 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1854 ldr r1,[sp,#17*4] @ pull inp
1855 ldr r12,[sp,#18*4] @ pull inp+len
1858 stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1860 sub r14,r14,#256 @ rewind Ktbl
1863 add sp,sp,#19*4 @ destroy frame
1865 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1867 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1869 moveq pc,lr @ be binary compatible with V4, yet
1870 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
1872 .size sha256_block_data_order,.-sha256_block_data_order
1873 #if __ARM_MAX_ARCH__>=7
1877 .globl sha256_block_data_order_neon
1878 .type sha256_block_data_order_neon,%function
1880 sha256_block_data_order_neon:
1882 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1886 bic r11,r11,#15 @ align for 128-bit stores
1889 add r2,r1,r2,lsl#6 @ len to point at the end of inp
1895 vld1.32 {q8},[r14,:128]!
1896 vld1.32 {q9},[r14,:128]!
1897 vld1.32 {q10},[r14,:128]!
1898 vld1.32 {q11},[r14,:128]!
1899 vrev32.8 q0,q0 @ yes, even on
1901 vrev32.8 q1,q1 @ big-endian
1907 str r12,[sp,#76] @ save original sp
1910 vst1.32 {q8},[r1,:128]!
1912 vst1.32 {q9},[r1,:128]!
1914 vst1.32 {q10},[r1,:128]!
1915 vst1.32 {q11},[r1,:128]!
1917 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1933 eor r12,r0,r8,ror#19
1938 add r11,r11,r12,ror#6
1948 add r11,r11,r0,ror#2
1960 eor r0,r11,r11,ror#11
1963 add r10,r10,r3,ror#6
1966 eor r0,r0,r11,ror#20
1974 add r10,r10,r0,ror#2
1984 eor r12,r0,r6,ror#19
1986 eor r0,r10,r10,ror#11
1992 eor r0,r0,r10,ror#20
2001 vld1.32 {q8},[r14,:128]!
2021 vst1.32 {q8},[r1,:128]!
2031 eor r12,r0,r4,ror#19
2052 eor r0,r11,r11,ror#5
2057 eor r3,r0,r11,ror#19
2078 eor r0,r10,r10,ror#5
2082 eor r12,r0,r10,ror#19
2099 vld1.32 {q8},[r14,:128]!
2119 vst1.32 {q8},[r1,:128]!
2129 eor r12,r0,r8,ror#19
2134 add r11,r11,r12,ror#6
2144 add r11,r11,r0,ror#2
2156 eor r0,r11,r11,ror#11
2159 add r10,r10,r3,ror#6
2162 eor r0,r0,r11,ror#20
2170 add r10,r10,r0,ror#2
2180 eor r12,r0,r6,ror#19
2182 eor r0,r10,r10,ror#11
2188 eor r0,r0,r10,ror#20
2197 vld1.32 {q8},[r14,:128]!
2217 vst1.32 {q8},[r1,:128]!
2227 eor r12,r0,r4,ror#19
2248 eor r0,r11,r11,ror#5
2253 eor r3,r0,r11,ror#19
2274 eor r0,r10,r10,ror#5
2278 eor r12,r0,r10,ror#19
2295 vld1.32 {q8},[r14,:128]!
2315 vst1.32 {q8},[r1,:128]!
2318 teq r2,#0 @ check for K256 terminator
2325 sub r14,r14,#256 @ rewind r14
2328 subeq r1,r1,#64 @ avoid SEGV
2329 vld1.8 {q0},[r1]! @ load next input block
2340 vld1.32 {q8},[r14,:128]!
2342 eor r12,r0,r8,ror#19
2346 add r11,r11,r12,ror#6
2354 add r11,r11,r0,ror#2
2362 eor r0,r11,r11,ror#11
2364 add r10,r10,r3,ror#6
2366 eor r0,r0,r11,ror#20
2371 add r10,r10,r0,ror#2
2378 eor r12,r0,r6,ror#19
2379 eor r0,r10,r10,ror#11
2383 eor r0,r0,r10,ror#20
2407 vst1.32 {q8},[r1,:128]!
2412 vld1.32 {q8},[r14,:128]!
2414 eor r12,r0,r4,ror#19
2430 eor r0,r11,r11,ror#5
2433 eor r3,r0,r11,ror#19
2447 eor r0,r10,r10,ror#5
2450 eor r12,r0,r10,ror#19
2479 vst1.32 {q8},[r1,:128]!
2484 vld1.32 {q8},[r14,:128]!
2486 eor r12,r0,r8,ror#19
2490 add r11,r11,r12,ror#6
2498 add r11,r11,r0,ror#2
2506 eor r0,r11,r11,ror#11
2508 add r10,r10,r3,ror#6
2510 eor r0,r0,r11,ror#20
2515 add r10,r10,r0,ror#2
2522 eor r12,r0,r6,ror#19
2523 eor r0,r10,r10,ror#11
2527 eor r0,r0,r10,ror#20
2551 vst1.32 {q8},[r1,:128]!
2556 vld1.32 {q8},[r14,:128]!
2558 eor r12,r0,r4,ror#19
2574 eor r0,r11,r11,ror#5
2577 eor r3,r0,r11,ror#19
2591 eor r0,r10,r10,ror#5
2594 eor r12,r0,r10,ror#19
2623 vst1.32 {q8},[r1,:128]!
2625 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
2629 add r4,r4,r0 @ accumulate
2645 stmia r2,{r8,r9,r10,r11}
2651 ldreq sp,[sp,#76] @ restore original sp
2656 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2657 .size sha256_block_data_order_neon,.-sha256_block_data_order_neon
2659 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2661 # if defined(__thumb2__) && !defined(__APPLE__)
2662 # define INST(a,b,c,d) .byte c,d|0xc,a,b
2664 # define INST(a,b,c,d) .byte a,b,c,d
2667 .type sha256_block_data_order_armv8,%function
2669 sha256_block_data_order_armv8:
2671 vld1.32 {q0,q1},[r0]
2674 # elif defined(__thumb2__)
2676 sub r3,r3,#.LARMv8-K256
2680 add r2,r1,r2,lsl#6 @ len to point at the end of inp
2683 vld1.8 {q8,q9},[r1]!
2684 vld1.8 {q10,q11},[r1]!
2690 vmov q14,q0 @ offload
2695 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2697 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2698 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2699 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2702 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2704 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2705 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2706 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2708 vadd.i32 q12,q12,q10
2709 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2711 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2712 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2713 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2715 vadd.i32 q13,q13,q11
2716 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2718 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2719 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2720 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2723 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2725 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2726 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2727 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2730 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2732 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2733 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2734 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2736 vadd.i32 q12,q12,q10
2737 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2739 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2740 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2741 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2743 vadd.i32 q13,q13,q11
2744 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2746 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2747 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2748 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2751 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2753 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2754 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2755 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2758 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2760 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2761 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2762 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2764 vadd.i32 q12,q12,q10
2765 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2767 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2768 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2769 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2771 vadd.i32 q13,q13,q11
2772 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2774 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2775 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2776 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2780 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2781 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2786 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2787 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2790 vadd.i32 q12,q12,q10
2791 sub r3,r3,#256-16 @ rewind
2793 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2794 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2796 vadd.i32 q13,q13,q11
2798 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2799 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2806 vst1.32 {q0,q1},[r0]
2809 .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2811 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2814 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2815 .comm OPENSSL_armcap_P,4,4
2816 .hidden OPENSSL_armcap_P