2 @ ====================================================================
3 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4 @ project. The module is, however, dual licensed under OpenSSL and
5 @ CRYPTOGAMS licenses depending on where you obtain it. For further
6 @ details see http://www.openssl.org/~appro/cryptogams/.
8 @ Permission to use under GPL terms is granted.
9 @ ====================================================================
11 @ SHA256 block procedure for ARMv4. May 2007.
13 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
14 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
15 @ byte [on single-issue Xscale PXA250 core].
19 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
20 @ Cortex A8 core and ~20 cycles per processed byte.
24 @ Profiler-assisted and platform-specific optimization resulted in 16%
25 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
29 @ Add NEON implementation. On Cortex A8 it was measured to process one
30 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
31 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
32 @ code (meaning that latter performs sub-optimally, nothing was done
37 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
40 # include "arm_arch.h"
42 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
43 # define __ARM_MAX_ARCH__ 7
62 .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
63 .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
64 .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
65 .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
66 .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
67 .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
68 .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
69 .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
70 .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
71 .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
72 .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
73 .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
74 .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
75 .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
76 .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
77 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
80 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
82 .word OPENSSL_armcap_P-sha256_block_data_order
86 .global sha256_block_data_order
87 .type sha256_block_data_order,%function
88 sha256_block_data_order:
90 sub r3,pc,#8 @ sha256_block_data_order
92 adr r3,sha256_block_data_order
94 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
95 ldr r12,.LOPENSSL_armcap
96 ldr r12,[r3,r12] @ OPENSSL_armcap_P
102 add r2,r1,r2,lsl#6 @ len to point at the end of inp
103 stmdb sp!,{r0,r1,r2,r4-r11,lr}
104 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
105 sub r14,r3,#256+32 @ K256
106 sub sp,sp,#16*4 @ alloca(X[16])
118 str r1,[sp,#17*4] @ make room for r1
121 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
122 eor r0,r0,r8,ror#19 @ Sigma1(e)
127 @ ldrb r2,[r1,#3] @ 0
128 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
135 str r1,[sp,#17*4] @ make room for r1
139 eor r0,r0,r8,ror#19 @ Sigma1(e)
141 ldr r12,[r14],#4 @ *K256++
142 add r11,r11,r2 @ h+=X[i]
145 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
147 add r11,r11,r12 @ h+=K256[i]
148 eor r2,r2,r10 @ Ch(e,f,g)
150 add r11,r11,r2 @ h+=Ch(e,f,g)
153 cmp r12,#0xf2 @ done?
157 ldr r2,[r1],#4 @ prefetch
161 eor r12,r4,r5 @ a^b, b^c in next round
163 ldr r2,[sp,#2*4] @ from future BODY_16_xx
164 eor r12,r4,r5 @ a^b, b^c in next round
165 ldr r1,[sp,#15*4] @ from future BODY_16_xx
167 eor r0,r0,r4,ror#20 @ Sigma0(a)
168 and r3,r3,r12 @ (b^c)&=(a^b)
170 eor r3,r3,r5 @ Maj(a,b,c)
171 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
172 @ add r11,r11,r3 @ h+=Maj(a,b,c)
176 str r1,[sp,#17*4] @ make room for r1
179 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
180 eor r0,r0,r7,ror#19 @ Sigma1(e)
185 @ ldrb r2,[r1,#3] @ 1
186 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
193 str r1,[sp,#17*4] @ make room for r1
197 eor r0,r0,r7,ror#19 @ Sigma1(e)
199 ldr r3,[r14],#4 @ *K256++
200 add r10,r10,r2 @ h+=X[i]
203 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
205 add r10,r10,r3 @ h+=K256[i]
206 eor r2,r2,r9 @ Ch(e,f,g)
207 eor r0,r11,r11,ror#11
208 add r10,r10,r2 @ h+=Ch(e,f,g)
215 ldr r2,[r1],#4 @ prefetch
219 eor r3,r11,r4 @ a^b, b^c in next round
221 ldr r2,[sp,#3*4] @ from future BODY_16_xx
222 eor r3,r11,r4 @ a^b, b^c in next round
223 ldr r1,[sp,#0*4] @ from future BODY_16_xx
225 eor r0,r0,r11,ror#20 @ Sigma0(a)
226 and r12,r12,r3 @ (b^c)&=(a^b)
228 eor r12,r12,r4 @ Maj(a,b,c)
229 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
230 @ add r10,r10,r12 @ h+=Maj(a,b,c)
234 str r1,[sp,#17*4] @ make room for r1
237 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
238 eor r0,r0,r6,ror#19 @ Sigma1(e)
243 @ ldrb r2,[r1,#3] @ 2
244 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
251 str r1,[sp,#17*4] @ make room for r1
255 eor r0,r0,r6,ror#19 @ Sigma1(e)
257 ldr r12,[r14],#4 @ *K256++
258 add r9,r9,r2 @ h+=X[i]
261 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
263 add r9,r9,r12 @ h+=K256[i]
264 eor r2,r2,r8 @ Ch(e,f,g)
265 eor r0,r10,r10,ror#11
266 add r9,r9,r2 @ h+=Ch(e,f,g)
269 cmp r12,#0xf2 @ done?
273 ldr r2,[r1],#4 @ prefetch
277 eor r12,r10,r11 @ a^b, b^c in next round
279 ldr r2,[sp,#4*4] @ from future BODY_16_xx
280 eor r12,r10,r11 @ a^b, b^c in next round
281 ldr r1,[sp,#1*4] @ from future BODY_16_xx
283 eor r0,r0,r10,ror#20 @ Sigma0(a)
284 and r3,r3,r12 @ (b^c)&=(a^b)
286 eor r3,r3,r11 @ Maj(a,b,c)
287 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
288 @ add r9,r9,r3 @ h+=Maj(a,b,c)
292 str r1,[sp,#17*4] @ make room for r1
295 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
296 eor r0,r0,r5,ror#19 @ Sigma1(e)
301 @ ldrb r2,[r1,#3] @ 3
302 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
309 str r1,[sp,#17*4] @ make room for r1
313 eor r0,r0,r5,ror#19 @ Sigma1(e)
315 ldr r3,[r14],#4 @ *K256++
316 add r8,r8,r2 @ h+=X[i]
319 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
321 add r8,r8,r3 @ h+=K256[i]
322 eor r2,r2,r7 @ Ch(e,f,g)
324 add r8,r8,r2 @ h+=Ch(e,f,g)
331 ldr r2,[r1],#4 @ prefetch
335 eor r3,r9,r10 @ a^b, b^c in next round
337 ldr r2,[sp,#5*4] @ from future BODY_16_xx
338 eor r3,r9,r10 @ a^b, b^c in next round
339 ldr r1,[sp,#2*4] @ from future BODY_16_xx
341 eor r0,r0,r9,ror#20 @ Sigma0(a)
342 and r12,r12,r3 @ (b^c)&=(a^b)
344 eor r12,r12,r10 @ Maj(a,b,c)
345 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
346 @ add r8,r8,r12 @ h+=Maj(a,b,c)
350 str r1,[sp,#17*4] @ make room for r1
353 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
354 eor r0,r0,r4,ror#19 @ Sigma1(e)
359 @ ldrb r2,[r1,#3] @ 4
360 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
367 str r1,[sp,#17*4] @ make room for r1
371 eor r0,r0,r4,ror#19 @ Sigma1(e)
373 ldr r12,[r14],#4 @ *K256++
374 add r7,r7,r2 @ h+=X[i]
377 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
379 add r7,r7,r12 @ h+=K256[i]
380 eor r2,r2,r6 @ Ch(e,f,g)
382 add r7,r7,r2 @ h+=Ch(e,f,g)
385 cmp r12,#0xf2 @ done?
389 ldr r2,[r1],#4 @ prefetch
393 eor r12,r8,r9 @ a^b, b^c in next round
395 ldr r2,[sp,#6*4] @ from future BODY_16_xx
396 eor r12,r8,r9 @ a^b, b^c in next round
397 ldr r1,[sp,#3*4] @ from future BODY_16_xx
399 eor r0,r0,r8,ror#20 @ Sigma0(a)
400 and r3,r3,r12 @ (b^c)&=(a^b)
401 add r11,r11,r7 @ d+=h
402 eor r3,r3,r9 @ Maj(a,b,c)
403 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
404 @ add r7,r7,r3 @ h+=Maj(a,b,c)
408 str r1,[sp,#17*4] @ make room for r1
411 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
412 eor r0,r0,r11,ror#19 @ Sigma1(e)
417 @ ldrb r2,[r1,#3] @ 5
418 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
425 str r1,[sp,#17*4] @ make room for r1
429 eor r0,r0,r11,ror#19 @ Sigma1(e)
431 ldr r3,[r14],#4 @ *K256++
432 add r6,r6,r2 @ h+=X[i]
435 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
437 add r6,r6,r3 @ h+=K256[i]
438 eor r2,r2,r5 @ Ch(e,f,g)
440 add r6,r6,r2 @ h+=Ch(e,f,g)
447 ldr r2,[r1],#4 @ prefetch
451 eor r3,r7,r8 @ a^b, b^c in next round
453 ldr r2,[sp,#7*4] @ from future BODY_16_xx
454 eor r3,r7,r8 @ a^b, b^c in next round
455 ldr r1,[sp,#4*4] @ from future BODY_16_xx
457 eor r0,r0,r7,ror#20 @ Sigma0(a)
458 and r12,r12,r3 @ (b^c)&=(a^b)
459 add r10,r10,r6 @ d+=h
460 eor r12,r12,r8 @ Maj(a,b,c)
461 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
462 @ add r6,r6,r12 @ h+=Maj(a,b,c)
466 str r1,[sp,#17*4] @ make room for r1
469 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
470 eor r0,r0,r10,ror#19 @ Sigma1(e)
475 @ ldrb r2,[r1,#3] @ 6
476 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
483 str r1,[sp,#17*4] @ make room for r1
487 eor r0,r0,r10,ror#19 @ Sigma1(e)
489 ldr r12,[r14],#4 @ *K256++
490 add r5,r5,r2 @ h+=X[i]
493 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
495 add r5,r5,r12 @ h+=K256[i]
496 eor r2,r2,r4 @ Ch(e,f,g)
498 add r5,r5,r2 @ h+=Ch(e,f,g)
501 cmp r12,#0xf2 @ done?
505 ldr r2,[r1],#4 @ prefetch
509 eor r12,r6,r7 @ a^b, b^c in next round
511 ldr r2,[sp,#8*4] @ from future BODY_16_xx
512 eor r12,r6,r7 @ a^b, b^c in next round
513 ldr r1,[sp,#5*4] @ from future BODY_16_xx
515 eor r0,r0,r6,ror#20 @ Sigma0(a)
516 and r3,r3,r12 @ (b^c)&=(a^b)
518 eor r3,r3,r7 @ Maj(a,b,c)
519 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
520 @ add r5,r5,r3 @ h+=Maj(a,b,c)
524 str r1,[sp,#17*4] @ make room for r1
527 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
528 eor r0,r0,r9,ror#19 @ Sigma1(e)
533 @ ldrb r2,[r1,#3] @ 7
534 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
541 str r1,[sp,#17*4] @ make room for r1
545 eor r0,r0,r9,ror#19 @ Sigma1(e)
547 ldr r3,[r14],#4 @ *K256++
548 add r4,r4,r2 @ h+=X[i]
551 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
553 add r4,r4,r3 @ h+=K256[i]
554 eor r2,r2,r11 @ Ch(e,f,g)
556 add r4,r4,r2 @ h+=Ch(e,f,g)
563 ldr r2,[r1],#4 @ prefetch
567 eor r3,r5,r6 @ a^b, b^c in next round
569 ldr r2,[sp,#9*4] @ from future BODY_16_xx
570 eor r3,r5,r6 @ a^b, b^c in next round
571 ldr r1,[sp,#6*4] @ from future BODY_16_xx
573 eor r0,r0,r5,ror#20 @ Sigma0(a)
574 and r12,r12,r3 @ (b^c)&=(a^b)
576 eor r12,r12,r6 @ Maj(a,b,c)
577 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
578 @ add r4,r4,r12 @ h+=Maj(a,b,c)
582 str r1,[sp,#17*4] @ make room for r1
585 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
586 eor r0,r0,r8,ror#19 @ Sigma1(e)
591 @ ldrb r2,[r1,#3] @ 8
592 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
599 str r1,[sp,#17*4] @ make room for r1
603 eor r0,r0,r8,ror#19 @ Sigma1(e)
605 ldr r12,[r14],#4 @ *K256++
606 add r11,r11,r2 @ h+=X[i]
609 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
611 add r11,r11,r12 @ h+=K256[i]
612 eor r2,r2,r10 @ Ch(e,f,g)
614 add r11,r11,r2 @ h+=Ch(e,f,g)
617 cmp r12,#0xf2 @ done?
621 ldr r2,[r1],#4 @ prefetch
625 eor r12,r4,r5 @ a^b, b^c in next round
627 ldr r2,[sp,#10*4] @ from future BODY_16_xx
628 eor r12,r4,r5 @ a^b, b^c in next round
629 ldr r1,[sp,#7*4] @ from future BODY_16_xx
631 eor r0,r0,r4,ror#20 @ Sigma0(a)
632 and r3,r3,r12 @ (b^c)&=(a^b)
634 eor r3,r3,r5 @ Maj(a,b,c)
635 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
636 @ add r11,r11,r3 @ h+=Maj(a,b,c)
640 str r1,[sp,#17*4] @ make room for r1
643 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
644 eor r0,r0,r7,ror#19 @ Sigma1(e)
649 @ ldrb r2,[r1,#3] @ 9
650 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
657 str r1,[sp,#17*4] @ make room for r1
661 eor r0,r0,r7,ror#19 @ Sigma1(e)
663 ldr r3,[r14],#4 @ *K256++
664 add r10,r10,r2 @ h+=X[i]
667 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
669 add r10,r10,r3 @ h+=K256[i]
670 eor r2,r2,r9 @ Ch(e,f,g)
671 eor r0,r11,r11,ror#11
672 add r10,r10,r2 @ h+=Ch(e,f,g)
679 ldr r2,[r1],#4 @ prefetch
683 eor r3,r11,r4 @ a^b, b^c in next round
685 ldr r2,[sp,#11*4] @ from future BODY_16_xx
686 eor r3,r11,r4 @ a^b, b^c in next round
687 ldr r1,[sp,#8*4] @ from future BODY_16_xx
689 eor r0,r0,r11,ror#20 @ Sigma0(a)
690 and r12,r12,r3 @ (b^c)&=(a^b)
692 eor r12,r12,r4 @ Maj(a,b,c)
693 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
694 @ add r10,r10,r12 @ h+=Maj(a,b,c)
696 @ ldr r2,[r1],#4 @ 10
698 str r1,[sp,#17*4] @ make room for r1
701 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
702 eor r0,r0,r6,ror#19 @ Sigma1(e)
707 @ ldrb r2,[r1,#3] @ 10
708 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
715 str r1,[sp,#17*4] @ make room for r1
719 eor r0,r0,r6,ror#19 @ Sigma1(e)
721 ldr r12,[r14],#4 @ *K256++
722 add r9,r9,r2 @ h+=X[i]
725 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
727 add r9,r9,r12 @ h+=K256[i]
728 eor r2,r2,r8 @ Ch(e,f,g)
729 eor r0,r10,r10,ror#11
730 add r9,r9,r2 @ h+=Ch(e,f,g)
733 cmp r12,#0xf2 @ done?
737 ldr r2,[r1],#4 @ prefetch
741 eor r12,r10,r11 @ a^b, b^c in next round
743 ldr r2,[sp,#12*4] @ from future BODY_16_xx
744 eor r12,r10,r11 @ a^b, b^c in next round
745 ldr r1,[sp,#9*4] @ from future BODY_16_xx
747 eor r0,r0,r10,ror#20 @ Sigma0(a)
748 and r3,r3,r12 @ (b^c)&=(a^b)
750 eor r3,r3,r11 @ Maj(a,b,c)
751 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
752 @ add r9,r9,r3 @ h+=Maj(a,b,c)
754 @ ldr r2,[r1],#4 @ 11
756 str r1,[sp,#17*4] @ make room for r1
759 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
760 eor r0,r0,r5,ror#19 @ Sigma1(e)
765 @ ldrb r2,[r1,#3] @ 11
766 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
773 str r1,[sp,#17*4] @ make room for r1
777 eor r0,r0,r5,ror#19 @ Sigma1(e)
779 ldr r3,[r14],#4 @ *K256++
780 add r8,r8,r2 @ h+=X[i]
783 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
785 add r8,r8,r3 @ h+=K256[i]
786 eor r2,r2,r7 @ Ch(e,f,g)
788 add r8,r8,r2 @ h+=Ch(e,f,g)
795 ldr r2,[r1],#4 @ prefetch
799 eor r3,r9,r10 @ a^b, b^c in next round
801 ldr r2,[sp,#13*4] @ from future BODY_16_xx
802 eor r3,r9,r10 @ a^b, b^c in next round
803 ldr r1,[sp,#10*4] @ from future BODY_16_xx
805 eor r0,r0,r9,ror#20 @ Sigma0(a)
806 and r12,r12,r3 @ (b^c)&=(a^b)
808 eor r12,r12,r10 @ Maj(a,b,c)
809 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
810 @ add r8,r8,r12 @ h+=Maj(a,b,c)
812 @ ldr r2,[r1],#4 @ 12
814 str r1,[sp,#17*4] @ make room for r1
817 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
818 eor r0,r0,r4,ror#19 @ Sigma1(e)
823 @ ldrb r2,[r1,#3] @ 12
824 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
831 str r1,[sp,#17*4] @ make room for r1
835 eor r0,r0,r4,ror#19 @ Sigma1(e)
837 ldr r12,[r14],#4 @ *K256++
838 add r7,r7,r2 @ h+=X[i]
841 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
843 add r7,r7,r12 @ h+=K256[i]
844 eor r2,r2,r6 @ Ch(e,f,g)
846 add r7,r7,r2 @ h+=Ch(e,f,g)
849 cmp r12,#0xf2 @ done?
853 ldr r2,[r1],#4 @ prefetch
857 eor r12,r8,r9 @ a^b, b^c in next round
859 ldr r2,[sp,#14*4] @ from future BODY_16_xx
860 eor r12,r8,r9 @ a^b, b^c in next round
861 ldr r1,[sp,#11*4] @ from future BODY_16_xx
863 eor r0,r0,r8,ror#20 @ Sigma0(a)
864 and r3,r3,r12 @ (b^c)&=(a^b)
865 add r11,r11,r7 @ d+=h
866 eor r3,r3,r9 @ Maj(a,b,c)
867 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
868 @ add r7,r7,r3 @ h+=Maj(a,b,c)
870 @ ldr r2,[r1],#4 @ 13
872 str r1,[sp,#17*4] @ make room for r1
875 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
876 eor r0,r0,r11,ror#19 @ Sigma1(e)
881 @ ldrb r2,[r1,#3] @ 13
882 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
889 str r1,[sp,#17*4] @ make room for r1
893 eor r0,r0,r11,ror#19 @ Sigma1(e)
895 ldr r3,[r14],#4 @ *K256++
896 add r6,r6,r2 @ h+=X[i]
899 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
901 add r6,r6,r3 @ h+=K256[i]
902 eor r2,r2,r5 @ Ch(e,f,g)
904 add r6,r6,r2 @ h+=Ch(e,f,g)
911 ldr r2,[r1],#4 @ prefetch
915 eor r3,r7,r8 @ a^b, b^c in next round
917 ldr r2,[sp,#15*4] @ from future BODY_16_xx
918 eor r3,r7,r8 @ a^b, b^c in next round
919 ldr r1,[sp,#12*4] @ from future BODY_16_xx
921 eor r0,r0,r7,ror#20 @ Sigma0(a)
922 and r12,r12,r3 @ (b^c)&=(a^b)
923 add r10,r10,r6 @ d+=h
924 eor r12,r12,r8 @ Maj(a,b,c)
925 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
926 @ add r6,r6,r12 @ h+=Maj(a,b,c)
928 @ ldr r2,[r1],#4 @ 14
930 str r1,[sp,#17*4] @ make room for r1
933 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
934 eor r0,r0,r10,ror#19 @ Sigma1(e)
939 @ ldrb r2,[r1,#3] @ 14
940 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
947 str r1,[sp,#17*4] @ make room for r1
951 eor r0,r0,r10,ror#19 @ Sigma1(e)
953 ldr r12,[r14],#4 @ *K256++
954 add r5,r5,r2 @ h+=X[i]
957 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
959 add r5,r5,r12 @ h+=K256[i]
960 eor r2,r2,r4 @ Ch(e,f,g)
962 add r5,r5,r2 @ h+=Ch(e,f,g)
965 cmp r12,#0xf2 @ done?
969 ldr r2,[r1],#4 @ prefetch
973 eor r12,r6,r7 @ a^b, b^c in next round
975 ldr r2,[sp,#0*4] @ from future BODY_16_xx
976 eor r12,r6,r7 @ a^b, b^c in next round
977 ldr r1,[sp,#13*4] @ from future BODY_16_xx
979 eor r0,r0,r6,ror#20 @ Sigma0(a)
980 and r3,r3,r12 @ (b^c)&=(a^b)
982 eor r3,r3,r7 @ Maj(a,b,c)
983 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
984 @ add r5,r5,r3 @ h+=Maj(a,b,c)
986 @ ldr r2,[r1],#4 @ 15
988 str r1,[sp,#17*4] @ make room for r1
991 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
992 eor r0,r0,r9,ror#19 @ Sigma1(e)
997 @ ldrb r2,[r1,#3] @ 15
998 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1005 str r1,[sp,#17*4] @ make room for r1
1009 eor r0,r0,r9,ror#19 @ Sigma1(e)
1011 ldr r3,[r14],#4 @ *K256++
1012 add r4,r4,r2 @ h+=X[i]
1015 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1017 add r4,r4,r3 @ h+=K256[i]
1018 eor r2,r2,r11 @ Ch(e,f,g)
1020 add r4,r4,r2 @ h+=Ch(e,f,g)
1023 cmp r3,#0xf2 @ done?
1026 # if __ARM_ARCH__>=7
1027 ldr r2,[r1],#4 @ prefetch
1031 eor r3,r5,r6 @ a^b, b^c in next round
1033 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1034 eor r3,r5,r6 @ a^b, b^c in next round
1035 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1037 eor r0,r0,r5,ror#20 @ Sigma0(a)
1038 and r12,r12,r3 @ (b^c)&=(a^b)
1040 eor r12,r12,r6 @ Maj(a,b,c)
1041 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1042 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1044 @ ldr r2,[sp,#1*4] @ 16
1047 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1050 eor r12,r12,r1,ror#19
1051 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1053 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1057 eor r0,r8,r8,ror#5 @ from BODY_00_15
1059 eor r0,r0,r8,ror#19 @ Sigma1(e)
1061 ldr r12,[r14],#4 @ *K256++
1062 add r11,r11,r2 @ h+=X[i]
1065 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1067 add r11,r11,r12 @ h+=K256[i]
1068 eor r2,r2,r10 @ Ch(e,f,g)
1070 add r11,r11,r2 @ h+=Ch(e,f,g)
1073 cmp r12,#0xf2 @ done?
1076 # if __ARM_ARCH__>=7
1077 ldr r2,[r1],#4 @ prefetch
1081 eor r12,r4,r5 @ a^b, b^c in next round
1083 ldr r2,[sp,#2*4] @ from future BODY_16_xx
1084 eor r12,r4,r5 @ a^b, b^c in next round
1085 ldr r1,[sp,#15*4] @ from future BODY_16_xx
1087 eor r0,r0,r4,ror#20 @ Sigma0(a)
1088 and r3,r3,r12 @ (b^c)&=(a^b)
1089 add r7,r7,r11 @ d+=h
1090 eor r3,r3,r5 @ Maj(a,b,c)
1091 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1092 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1093 @ ldr r2,[sp,#2*4] @ 17
1096 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1100 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1102 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1106 eor r0,r7,r7,ror#5 @ from BODY_00_15
1108 eor r0,r0,r7,ror#19 @ Sigma1(e)
1110 ldr r3,[r14],#4 @ *K256++
1111 add r10,r10,r2 @ h+=X[i]
1114 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1116 add r10,r10,r3 @ h+=K256[i]
1117 eor r2,r2,r9 @ Ch(e,f,g)
1118 eor r0,r11,r11,ror#11
1119 add r10,r10,r2 @ h+=Ch(e,f,g)
1122 cmp r3,#0xf2 @ done?
1125 # if __ARM_ARCH__>=7
1126 ldr r2,[r1],#4 @ prefetch
1130 eor r3,r11,r4 @ a^b, b^c in next round
1132 ldr r2,[sp,#3*4] @ from future BODY_16_xx
1133 eor r3,r11,r4 @ a^b, b^c in next round
1134 ldr r1,[sp,#0*4] @ from future BODY_16_xx
1136 eor r0,r0,r11,ror#20 @ Sigma0(a)
1137 and r12,r12,r3 @ (b^c)&=(a^b)
1138 add r6,r6,r10 @ d+=h
1139 eor r12,r12,r4 @ Maj(a,b,c)
1140 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1141 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1142 @ ldr r2,[sp,#3*4] @ 18
1145 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1148 eor r12,r12,r1,ror#19
1149 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1151 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1155 eor r0,r6,r6,ror#5 @ from BODY_00_15
1157 eor r0,r0,r6,ror#19 @ Sigma1(e)
1159 ldr r12,[r14],#4 @ *K256++
1160 add r9,r9,r2 @ h+=X[i]
1163 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1165 add r9,r9,r12 @ h+=K256[i]
1166 eor r2,r2,r8 @ Ch(e,f,g)
1167 eor r0,r10,r10,ror#11
1168 add r9,r9,r2 @ h+=Ch(e,f,g)
1171 cmp r12,#0xf2 @ done?
1174 # if __ARM_ARCH__>=7
1175 ldr r2,[r1],#4 @ prefetch
1179 eor r12,r10,r11 @ a^b, b^c in next round
1181 ldr r2,[sp,#4*4] @ from future BODY_16_xx
1182 eor r12,r10,r11 @ a^b, b^c in next round
1183 ldr r1,[sp,#1*4] @ from future BODY_16_xx
1185 eor r0,r0,r10,ror#20 @ Sigma0(a)
1186 and r3,r3,r12 @ (b^c)&=(a^b)
1188 eor r3,r3,r11 @ Maj(a,b,c)
1189 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1190 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1191 @ ldr r2,[sp,#4*4] @ 19
1194 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1198 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1200 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1204 eor r0,r5,r5,ror#5 @ from BODY_00_15
1206 eor r0,r0,r5,ror#19 @ Sigma1(e)
1208 ldr r3,[r14],#4 @ *K256++
1209 add r8,r8,r2 @ h+=X[i]
1212 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1214 add r8,r8,r3 @ h+=K256[i]
1215 eor r2,r2,r7 @ Ch(e,f,g)
1217 add r8,r8,r2 @ h+=Ch(e,f,g)
1220 cmp r3,#0xf2 @ done?
1223 # if __ARM_ARCH__>=7
1224 ldr r2,[r1],#4 @ prefetch
1228 eor r3,r9,r10 @ a^b, b^c in next round
1230 ldr r2,[sp,#5*4] @ from future BODY_16_xx
1231 eor r3,r9,r10 @ a^b, b^c in next round
1232 ldr r1,[sp,#2*4] @ from future BODY_16_xx
1234 eor r0,r0,r9,ror#20 @ Sigma0(a)
1235 and r12,r12,r3 @ (b^c)&=(a^b)
1237 eor r12,r12,r10 @ Maj(a,b,c)
1238 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1239 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1240 @ ldr r2,[sp,#5*4] @ 20
1243 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1246 eor r12,r12,r1,ror#19
1247 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1249 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1253 eor r0,r4,r4,ror#5 @ from BODY_00_15
1255 eor r0,r0,r4,ror#19 @ Sigma1(e)
1257 ldr r12,[r14],#4 @ *K256++
1258 add r7,r7,r2 @ h+=X[i]
1261 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1263 add r7,r7,r12 @ h+=K256[i]
1264 eor r2,r2,r6 @ Ch(e,f,g)
1266 add r7,r7,r2 @ h+=Ch(e,f,g)
1269 cmp r12,#0xf2 @ done?
1272 # if __ARM_ARCH__>=7
1273 ldr r2,[r1],#4 @ prefetch
1277 eor r12,r8,r9 @ a^b, b^c in next round
1279 ldr r2,[sp,#6*4] @ from future BODY_16_xx
1280 eor r12,r8,r9 @ a^b, b^c in next round
1281 ldr r1,[sp,#3*4] @ from future BODY_16_xx
1283 eor r0,r0,r8,ror#20 @ Sigma0(a)
1284 and r3,r3,r12 @ (b^c)&=(a^b)
1285 add r11,r11,r7 @ d+=h
1286 eor r3,r3,r9 @ Maj(a,b,c)
1287 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1288 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1289 @ ldr r2,[sp,#6*4] @ 21
1292 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1296 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1298 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1302 eor r0,r11,r11,ror#5 @ from BODY_00_15
1304 eor r0,r0,r11,ror#19 @ Sigma1(e)
1306 ldr r3,[r14],#4 @ *K256++
1307 add r6,r6,r2 @ h+=X[i]
1310 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1312 add r6,r6,r3 @ h+=K256[i]
1313 eor r2,r2,r5 @ Ch(e,f,g)
1315 add r6,r6,r2 @ h+=Ch(e,f,g)
1318 cmp r3,#0xf2 @ done?
1321 # if __ARM_ARCH__>=7
1322 ldr r2,[r1],#4 @ prefetch
1326 eor r3,r7,r8 @ a^b, b^c in next round
1328 ldr r2,[sp,#7*4] @ from future BODY_16_xx
1329 eor r3,r7,r8 @ a^b, b^c in next round
1330 ldr r1,[sp,#4*4] @ from future BODY_16_xx
1332 eor r0,r0,r7,ror#20 @ Sigma0(a)
1333 and r12,r12,r3 @ (b^c)&=(a^b)
1334 add r10,r10,r6 @ d+=h
1335 eor r12,r12,r8 @ Maj(a,b,c)
1336 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1337 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1338 @ ldr r2,[sp,#7*4] @ 22
1341 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1344 eor r12,r12,r1,ror#19
1345 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1347 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1351 eor r0,r10,r10,ror#5 @ from BODY_00_15
1353 eor r0,r0,r10,ror#19 @ Sigma1(e)
1355 ldr r12,[r14],#4 @ *K256++
1356 add r5,r5,r2 @ h+=X[i]
1359 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1361 add r5,r5,r12 @ h+=K256[i]
1362 eor r2,r2,r4 @ Ch(e,f,g)
1364 add r5,r5,r2 @ h+=Ch(e,f,g)
1367 cmp r12,#0xf2 @ done?
1370 # if __ARM_ARCH__>=7
1371 ldr r2,[r1],#4 @ prefetch
1375 eor r12,r6,r7 @ a^b, b^c in next round
1377 ldr r2,[sp,#8*4] @ from future BODY_16_xx
1378 eor r12,r6,r7 @ a^b, b^c in next round
1379 ldr r1,[sp,#5*4] @ from future BODY_16_xx
1381 eor r0,r0,r6,ror#20 @ Sigma0(a)
1382 and r3,r3,r12 @ (b^c)&=(a^b)
1384 eor r3,r3,r7 @ Maj(a,b,c)
1385 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1386 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1387 @ ldr r2,[sp,#8*4] @ 23
1390 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1394 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1396 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1400 eor r0,r9,r9,ror#5 @ from BODY_00_15
1402 eor r0,r0,r9,ror#19 @ Sigma1(e)
1404 ldr r3,[r14],#4 @ *K256++
1405 add r4,r4,r2 @ h+=X[i]
1408 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1410 add r4,r4,r3 @ h+=K256[i]
1411 eor r2,r2,r11 @ Ch(e,f,g)
1413 add r4,r4,r2 @ h+=Ch(e,f,g)
1416 cmp r3,#0xf2 @ done?
1419 # if __ARM_ARCH__>=7
1420 ldr r2,[r1],#4 @ prefetch
1424 eor r3,r5,r6 @ a^b, b^c in next round
1426 ldr r2,[sp,#9*4] @ from future BODY_16_xx
1427 eor r3,r5,r6 @ a^b, b^c in next round
1428 ldr r1,[sp,#6*4] @ from future BODY_16_xx
1430 eor r0,r0,r5,ror#20 @ Sigma0(a)
1431 and r12,r12,r3 @ (b^c)&=(a^b)
1433 eor r12,r12,r6 @ Maj(a,b,c)
1434 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1435 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1436 @ ldr r2,[sp,#9*4] @ 24
1439 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1442 eor r12,r12,r1,ror#19
1443 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1445 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1449 eor r0,r8,r8,ror#5 @ from BODY_00_15
1451 eor r0,r0,r8,ror#19 @ Sigma1(e)
1453 ldr r12,[r14],#4 @ *K256++
1454 add r11,r11,r2 @ h+=X[i]
1457 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1459 add r11,r11,r12 @ h+=K256[i]
1460 eor r2,r2,r10 @ Ch(e,f,g)
1462 add r11,r11,r2 @ h+=Ch(e,f,g)
1465 cmp r12,#0xf2 @ done?
1468 # if __ARM_ARCH__>=7
1469 ldr r2,[r1],#4 @ prefetch
1473 eor r12,r4,r5 @ a^b, b^c in next round
1475 ldr r2,[sp,#10*4] @ from future BODY_16_xx
1476 eor r12,r4,r5 @ a^b, b^c in next round
1477 ldr r1,[sp,#7*4] @ from future BODY_16_xx
1479 eor r0,r0,r4,ror#20 @ Sigma0(a)
1480 and r3,r3,r12 @ (b^c)&=(a^b)
1481 add r7,r7,r11 @ d+=h
1482 eor r3,r3,r5 @ Maj(a,b,c)
1483 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1484 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1485 @ ldr r2,[sp,#10*4] @ 25
1488 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1492 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1494 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1498 eor r0,r7,r7,ror#5 @ from BODY_00_15
1500 eor r0,r0,r7,ror#19 @ Sigma1(e)
1502 ldr r3,[r14],#4 @ *K256++
1503 add r10,r10,r2 @ h+=X[i]
1506 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1508 add r10,r10,r3 @ h+=K256[i]
1509 eor r2,r2,r9 @ Ch(e,f,g)
1510 eor r0,r11,r11,ror#11
1511 add r10,r10,r2 @ h+=Ch(e,f,g)
1514 cmp r3,#0xf2 @ done?
1517 # if __ARM_ARCH__>=7
1518 ldr r2,[r1],#4 @ prefetch
1522 eor r3,r11,r4 @ a^b, b^c in next round
1524 ldr r2,[sp,#11*4] @ from future BODY_16_xx
1525 eor r3,r11,r4 @ a^b, b^c in next round
1526 ldr r1,[sp,#8*4] @ from future BODY_16_xx
1528 eor r0,r0,r11,ror#20 @ Sigma0(a)
1529 and r12,r12,r3 @ (b^c)&=(a^b)
1530 add r6,r6,r10 @ d+=h
1531 eor r12,r12,r4 @ Maj(a,b,c)
1532 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1533 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1534 @ ldr r2,[sp,#11*4] @ 26
1537 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1540 eor r12,r12,r1,ror#19
1541 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1543 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1547 eor r0,r6,r6,ror#5 @ from BODY_00_15
1549 eor r0,r0,r6,ror#19 @ Sigma1(e)
1551 ldr r12,[r14],#4 @ *K256++
1552 add r9,r9,r2 @ h+=X[i]
1555 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1557 add r9,r9,r12 @ h+=K256[i]
1558 eor r2,r2,r8 @ Ch(e,f,g)
1559 eor r0,r10,r10,ror#11
1560 add r9,r9,r2 @ h+=Ch(e,f,g)
1563 cmp r12,#0xf2 @ done?
1566 # if __ARM_ARCH__>=7
1567 ldr r2,[r1],#4 @ prefetch
1571 eor r12,r10,r11 @ a^b, b^c in next round
1573 ldr r2,[sp,#12*4] @ from future BODY_16_xx
1574 eor r12,r10,r11 @ a^b, b^c in next round
1575 ldr r1,[sp,#9*4] @ from future BODY_16_xx
1577 eor r0,r0,r10,ror#20 @ Sigma0(a)
1578 and r3,r3,r12 @ (b^c)&=(a^b)
1580 eor r3,r3,r11 @ Maj(a,b,c)
1581 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1582 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1583 @ ldr r2,[sp,#12*4] @ 27
1586 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1590 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1592 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1596 eor r0,r5,r5,ror#5 @ from BODY_00_15
1598 eor r0,r0,r5,ror#19 @ Sigma1(e)
1600 ldr r3,[r14],#4 @ *K256++
1601 add r8,r8,r2 @ h+=X[i]
1604 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1606 add r8,r8,r3 @ h+=K256[i]
1607 eor r2,r2,r7 @ Ch(e,f,g)
1609 add r8,r8,r2 @ h+=Ch(e,f,g)
1612 cmp r3,#0xf2 @ done?
1615 # if __ARM_ARCH__>=7
1616 ldr r2,[r1],#4 @ prefetch
1620 eor r3,r9,r10 @ a^b, b^c in next round
1622 ldr r2,[sp,#13*4] @ from future BODY_16_xx
1623 eor r3,r9,r10 @ a^b, b^c in next round
1624 ldr r1,[sp,#10*4] @ from future BODY_16_xx
1626 eor r0,r0,r9,ror#20 @ Sigma0(a)
1627 and r12,r12,r3 @ (b^c)&=(a^b)
1629 eor r12,r12,r10 @ Maj(a,b,c)
1630 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1631 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1632 @ ldr r2,[sp,#13*4] @ 28
1635 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1638 eor r12,r12,r1,ror#19
1639 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1641 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1645 eor r0,r4,r4,ror#5 @ from BODY_00_15
1647 eor r0,r0,r4,ror#19 @ Sigma1(e)
1649 ldr r12,[r14],#4 @ *K256++
1650 add r7,r7,r2 @ h+=X[i]
1653 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1655 add r7,r7,r12 @ h+=K256[i]
1656 eor r2,r2,r6 @ Ch(e,f,g)
1658 add r7,r7,r2 @ h+=Ch(e,f,g)
1661 cmp r12,#0xf2 @ done?
1664 # if __ARM_ARCH__>=7
1665 ldr r2,[r1],#4 @ prefetch
1669 eor r12,r8,r9 @ a^b, b^c in next round
1671 ldr r2,[sp,#14*4] @ from future BODY_16_xx
1672 eor r12,r8,r9 @ a^b, b^c in next round
1673 ldr r1,[sp,#11*4] @ from future BODY_16_xx
1675 eor r0,r0,r8,ror#20 @ Sigma0(a)
1676 and r3,r3,r12 @ (b^c)&=(a^b)
1677 add r11,r11,r7 @ d+=h
1678 eor r3,r3,r9 @ Maj(a,b,c)
1679 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1680 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1681 @ ldr r2,[sp,#14*4] @ 29
1684 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1688 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1690 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1694 eor r0,r11,r11,ror#5 @ from BODY_00_15
1696 eor r0,r0,r11,ror#19 @ Sigma1(e)
1698 ldr r3,[r14],#4 @ *K256++
1699 add r6,r6,r2 @ h+=X[i]
1702 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1704 add r6,r6,r3 @ h+=K256[i]
1705 eor r2,r2,r5 @ Ch(e,f,g)
1707 add r6,r6,r2 @ h+=Ch(e,f,g)
1710 cmp r3,#0xf2 @ done?
1713 # if __ARM_ARCH__>=7
1714 ldr r2,[r1],#4 @ prefetch
1718 eor r3,r7,r8 @ a^b, b^c in next round
1720 ldr r2,[sp,#15*4] @ from future BODY_16_xx
1721 eor r3,r7,r8 @ a^b, b^c in next round
1722 ldr r1,[sp,#12*4] @ from future BODY_16_xx
1724 eor r0,r0,r7,ror#20 @ Sigma0(a)
1725 and r12,r12,r3 @ (b^c)&=(a^b)
1726 add r10,r10,r6 @ d+=h
1727 eor r12,r12,r8 @ Maj(a,b,c)
1728 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1729 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1730 @ ldr r2,[sp,#15*4] @ 30
1733 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1736 eor r12,r12,r1,ror#19
1737 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1739 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1743 eor r0,r10,r10,ror#5 @ from BODY_00_15
1745 eor r0,r0,r10,ror#19 @ Sigma1(e)
1747 ldr r12,[r14],#4 @ *K256++
1748 add r5,r5,r2 @ h+=X[i]
1751 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1753 add r5,r5,r12 @ h+=K256[i]
1754 eor r2,r2,r4 @ Ch(e,f,g)
1756 add r5,r5,r2 @ h+=Ch(e,f,g)
1759 cmp r12,#0xf2 @ done?
1762 # if __ARM_ARCH__>=7
1763 ldr r2,[r1],#4 @ prefetch
1767 eor r12,r6,r7 @ a^b, b^c in next round
1769 ldr r2,[sp,#0*4] @ from future BODY_16_xx
1770 eor r12,r6,r7 @ a^b, b^c in next round
1771 ldr r1,[sp,#13*4] @ from future BODY_16_xx
1773 eor r0,r0,r6,ror#20 @ Sigma0(a)
1774 and r3,r3,r12 @ (b^c)&=(a^b)
1776 eor r3,r3,r7 @ Maj(a,b,c)
1777 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1778 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1779 @ ldr r2,[sp,#0*4] @ 31
1782 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1786 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1788 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1792 eor r0,r9,r9,ror#5 @ from BODY_00_15
1794 eor r0,r0,r9,ror#19 @ Sigma1(e)
1796 ldr r3,[r14],#4 @ *K256++
1797 add r4,r4,r2 @ h+=X[i]
1800 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1802 add r4,r4,r3 @ h+=K256[i]
1803 eor r2,r2,r11 @ Ch(e,f,g)
1805 add r4,r4,r2 @ h+=Ch(e,f,g)
1808 cmp r3,#0xf2 @ done?
1811 # if __ARM_ARCH__>=7
1812 ldr r2,[r1],#4 @ prefetch
1816 eor r3,r5,r6 @ a^b, b^c in next round
1818 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1819 eor r3,r5,r6 @ a^b, b^c in next round
1820 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1822 eor r0,r0,r5,ror#20 @ Sigma0(a)
1823 and r12,r12,r3 @ (b^c)&=(a^b)
1825 eor r12,r12,r6 @ Maj(a,b,c)
1826 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1827 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1829 ite eq @ Thumb2 thing, sanity check in ARM
1831 ldreq r3,[sp,#16*4] @ pull ctx
1834 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1849 ldr r1,[sp,#17*4] @ pull inp
1850 ldr r12,[sp,#18*4] @ pull inp+len
1853 stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1855 sub r14,r14,#256 @ rewind Ktbl
1858 add sp,sp,#19*4 @ destroy frame
1860 ldmia sp!,{r4-r11,pc}
1862 ldmia sp!,{r4-r11,lr}
1864 moveq pc,lr @ be binary compatible with V4, yet
1865 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
1867 .size sha256_block_data_order,.-sha256_block_data_order
1868 #if __ARM_MAX_ARCH__>=7
1872 .global sha256_block_data_order_neon
1873 .type sha256_block_data_order_neon,%function
1875 sha256_block_data_order_neon:
1877 stmdb sp!,{r4-r12,lr}
1881 bic r11,r11,#15 @ align for 128-bit stores
1884 add r2,r1,r2,lsl#6 @ len to point at the end of inp
1890 vld1.32 {q8},[r14,:128]!
1891 vld1.32 {q9},[r14,:128]!
1892 vld1.32 {q10},[r14,:128]!
1893 vld1.32 {q11},[r14,:128]!
1894 vrev32.8 q0,q0 @ yes, even on
1896 vrev32.8 q1,q1 @ big-endian
1902 str r12,[sp,#76] @ save original sp
1905 vst1.32 {q8},[r1,:128]!
1907 vst1.32 {q9},[r1,:128]!
1909 vst1.32 {q10},[r1,:128]!
1910 vst1.32 {q11},[r1,:128]!
1928 eor r12,r0,r8,ror#19
1933 add r11,r11,r12,ror#6
1943 add r11,r11,r0,ror#2
1955 eor r0,r11,r11,ror#11
1958 add r10,r10,r3,ror#6
1961 eor r0,r0,r11,ror#20
1969 add r10,r10,r0,ror#2
1979 eor r12,r0,r6,ror#19
1981 eor r0,r10,r10,ror#11
1987 eor r0,r0,r10,ror#20
1996 vld1.32 {q8},[r14,:128]!
2016 vst1.32 {q8},[r1,:128]!
2026 eor r12,r0,r4,ror#19
2047 eor r0,r11,r11,ror#5
2052 eor r3,r0,r11,ror#19
2073 eor r0,r10,r10,ror#5
2077 eor r12,r0,r10,ror#19
2094 vld1.32 {q8},[r14,:128]!
2114 vst1.32 {q8},[r1,:128]!
2124 eor r12,r0,r8,ror#19
2129 add r11,r11,r12,ror#6
2139 add r11,r11,r0,ror#2
2151 eor r0,r11,r11,ror#11
2154 add r10,r10,r3,ror#6
2157 eor r0,r0,r11,ror#20
2165 add r10,r10,r0,ror#2
2175 eor r12,r0,r6,ror#19
2177 eor r0,r10,r10,ror#11
2183 eor r0,r0,r10,ror#20
2192 vld1.32 {q8},[r14,:128]!
2212 vst1.32 {q8},[r1,:128]!
2222 eor r12,r0,r4,ror#19
2243 eor r0,r11,r11,ror#5
2248 eor r3,r0,r11,ror#19
2269 eor r0,r10,r10,ror#5
2273 eor r12,r0,r10,ror#19
2290 vld1.32 {q8},[r14,:128]!
2310 vst1.32 {q8},[r1,:128]!
2313 teq r2,#0 @ check for K256 terminator
2320 sub r14,r14,#256 @ rewind r14
2323 subeq r1,r1,#64 @ avoid SEGV
2324 vld1.8 {q0},[r1]! @ load next input block
2335 vld1.32 {q8},[r14,:128]!
2337 eor r12,r0,r8,ror#19
2341 add r11,r11,r12,ror#6
2349 add r11,r11,r0,ror#2
2357 eor r0,r11,r11,ror#11
2359 add r10,r10,r3,ror#6
2361 eor r0,r0,r11,ror#20
2366 add r10,r10,r0,ror#2
2373 eor r12,r0,r6,ror#19
2374 eor r0,r10,r10,ror#11
2378 eor r0,r0,r10,ror#20
2402 vst1.32 {q8},[r1,:128]!
2407 vld1.32 {q8},[r14,:128]!
2409 eor r12,r0,r4,ror#19
2425 eor r0,r11,r11,ror#5
2428 eor r3,r0,r11,ror#19
2442 eor r0,r10,r10,ror#5
2445 eor r12,r0,r10,ror#19
2474 vst1.32 {q8},[r1,:128]!
2479 vld1.32 {q8},[r14,:128]!
2481 eor r12,r0,r8,ror#19
2485 add r11,r11,r12,ror#6
2493 add r11,r11,r0,ror#2
2501 eor r0,r11,r11,ror#11
2503 add r10,r10,r3,ror#6
2505 eor r0,r0,r11,ror#20
2510 add r10,r10,r0,ror#2
2517 eor r12,r0,r6,ror#19
2518 eor r0,r10,r10,ror#11
2522 eor r0,r0,r10,ror#20
2546 vst1.32 {q8},[r1,:128]!
2551 vld1.32 {q8},[r14,:128]!
2553 eor r12,r0,r4,ror#19
2569 eor r0,r11,r11,ror#5
2572 eor r3,r0,r11,ror#19
2586 eor r0,r10,r10,ror#5
2589 eor r12,r0,r10,ror#19
2618 vst1.32 {q8},[r1,:128]!
2620 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
2624 add r4,r4,r0 @ accumulate
2646 ldreq sp,[sp,#76] @ restore original sp
2651 ldmia sp!,{r4-r12,pc}
2652 .size sha256_block_data_order_neon,.-sha256_block_data_order_neon
2654 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2657 # define INST(a,b,c,d) .byte c,d|0xc,a,b
2659 # define INST(a,b,c,d) .byte a,b,c,d
2662 .type sha256_block_data_order_armv8,%function
2664 sha256_block_data_order_armv8:
2666 vld1.32 {q0,q1},[r0]
2669 sub r3,r3,#.LARMv8-K256
2673 add r2,r1,r2,lsl#6 @ len to point at the end of inp
2676 vld1.8 {q8-q9},[r1]!
2677 vld1.8 {q10-q11},[r1]!
2683 vmov q14,q0 @ offload
2688 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2690 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2691 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2692 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2695 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2697 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2698 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2699 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2701 vadd.i32 q12,q12,q10
2702 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2704 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2705 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2706 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2708 vadd.i32 q13,q13,q11
2709 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2711 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2712 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2713 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2716 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2718 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2719 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2720 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2723 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2725 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2726 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2727 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2729 vadd.i32 q12,q12,q10
2730 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2732 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2733 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2734 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2736 vadd.i32 q13,q13,q11
2737 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2739 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2740 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2741 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2744 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2746 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2747 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2748 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2751 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2753 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2754 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2755 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2757 vadd.i32 q12,q12,q10
2758 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2760 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2761 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2762 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2764 vadd.i32 q13,q13,q11
2765 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2767 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2768 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2769 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2773 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2774 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2779 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2780 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2783 vadd.i32 q12,q12,q10
2784 sub r3,r3,#256-16 @ rewind
2786 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2787 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2789 vadd.i32 q13,q13,q11
2791 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2792 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2799 vst1.32 {q0,q1},[r0]
2802 .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2804 .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2806 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2807 .comm OPENSSL_armcap_P,4,4