1 @ SPDX-License-Identifier: GPL-2.0
3 @ This code is taken from the OpenSSL project but the author (Andy Polyakov)
4 @ has relicensed it under the GPLv2. Therefore this program is free software;
5 @ you can redistribute it and/or modify it under the terms of the GNU General
6 @ Public License version 2 as published by the Free Software Foundation.
8 @ The original headers, including the original license headers, are
9 @ included below for completeness.
11 @ ====================================================================
12 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
13 @ project. The module is, however, dual licensed under OpenSSL and
14 @ CRYPTOGAMS licenses depending on where you obtain it. For further
15 @ details see http://www.openssl.org/~appro/cryptogams/.
16 @ ====================================================================
18 @ SHA256 block procedure for ARMv4. May 2007.
20 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22 @ byte [on single-issue Xscale PXA250 core].
26 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27 @ Cortex A8 core and ~20 cycles per processed byte.
31 @ Profiler-assisted and platform-specific optimization resulted in 16%
32 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
36 @ Add NEON implementation. On Cortex A8 it was measured to process one
37 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39 @ code (meaning that latter performs sub-optimally, nothing was done
44 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
47 # include "arm_arch.h"
49 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
50 # define __ARM_MAX_ARCH__ 7
69 .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
70 .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
71 .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
72 .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
73 .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
74 .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
75 .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
76 .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
77 .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
78 .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
79 .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
80 .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
81 .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
82 .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
83 .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
84 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
87 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
89 .word OPENSSL_armcap_P-sha256_block_data_order
93 .global sha256_block_data_order
94 .type sha256_block_data_order,%function
95 sha256_block_data_order:
97 sub r3,pc,#8 @ sha256_block_data_order
99 adr r3,sha256_block_data_order
101 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
102 ldr r12,.LOPENSSL_armcap
103 ldr r12,[r3,r12] @ OPENSSL_armcap_P
104 tst r12,#ARMV8_SHA256
109 add r2,r1,r2,lsl#6 @ len to point at the end of inp
110 stmdb sp!,{r0,r1,r2,r4-r11,lr}
111 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
112 sub r14,r3,#256+32 @ K256
113 sub sp,sp,#16*4 @ alloca(X[16])
125 str r1,[sp,#17*4] @ make room for r1
128 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
129 eor r0,r0,r8,ror#19 @ Sigma1(e)
134 @ ldrb r2,[r1,#3] @ 0
135 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
142 str r1,[sp,#17*4] @ make room for r1
146 eor r0,r0,r8,ror#19 @ Sigma1(e)
148 ldr r12,[r14],#4 @ *K256++
149 add r11,r11,r2 @ h+=X[i]
152 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
154 add r11,r11,r12 @ h+=K256[i]
155 eor r2,r2,r10 @ Ch(e,f,g)
157 add r11,r11,r2 @ h+=Ch(e,f,g)
160 cmp r12,#0xf2 @ done?
164 ldr r2,[r1],#4 @ prefetch
168 eor r12,r4,r5 @ a^b, b^c in next round
170 ldr r2,[sp,#2*4] @ from future BODY_16_xx
171 eor r12,r4,r5 @ a^b, b^c in next round
172 ldr r1,[sp,#15*4] @ from future BODY_16_xx
174 eor r0,r0,r4,ror#20 @ Sigma0(a)
175 and r3,r3,r12 @ (b^c)&=(a^b)
177 eor r3,r3,r5 @ Maj(a,b,c)
178 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
179 @ add r11,r11,r3 @ h+=Maj(a,b,c)
183 str r1,[sp,#17*4] @ make room for r1
186 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
187 eor r0,r0,r7,ror#19 @ Sigma1(e)
192 @ ldrb r2,[r1,#3] @ 1
193 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
200 str r1,[sp,#17*4] @ make room for r1
204 eor r0,r0,r7,ror#19 @ Sigma1(e)
206 ldr r3,[r14],#4 @ *K256++
207 add r10,r10,r2 @ h+=X[i]
210 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
212 add r10,r10,r3 @ h+=K256[i]
213 eor r2,r2,r9 @ Ch(e,f,g)
214 eor r0,r11,r11,ror#11
215 add r10,r10,r2 @ h+=Ch(e,f,g)
222 ldr r2,[r1],#4 @ prefetch
226 eor r3,r11,r4 @ a^b, b^c in next round
228 ldr r2,[sp,#3*4] @ from future BODY_16_xx
229 eor r3,r11,r4 @ a^b, b^c in next round
230 ldr r1,[sp,#0*4] @ from future BODY_16_xx
232 eor r0,r0,r11,ror#20 @ Sigma0(a)
233 and r12,r12,r3 @ (b^c)&=(a^b)
235 eor r12,r12,r4 @ Maj(a,b,c)
236 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
237 @ add r10,r10,r12 @ h+=Maj(a,b,c)
241 str r1,[sp,#17*4] @ make room for r1
244 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
245 eor r0,r0,r6,ror#19 @ Sigma1(e)
250 @ ldrb r2,[r1,#3] @ 2
251 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
258 str r1,[sp,#17*4] @ make room for r1
262 eor r0,r0,r6,ror#19 @ Sigma1(e)
264 ldr r12,[r14],#4 @ *K256++
265 add r9,r9,r2 @ h+=X[i]
268 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
270 add r9,r9,r12 @ h+=K256[i]
271 eor r2,r2,r8 @ Ch(e,f,g)
272 eor r0,r10,r10,ror#11
273 add r9,r9,r2 @ h+=Ch(e,f,g)
276 cmp r12,#0xf2 @ done?
280 ldr r2,[r1],#4 @ prefetch
284 eor r12,r10,r11 @ a^b, b^c in next round
286 ldr r2,[sp,#4*4] @ from future BODY_16_xx
287 eor r12,r10,r11 @ a^b, b^c in next round
288 ldr r1,[sp,#1*4] @ from future BODY_16_xx
290 eor r0,r0,r10,ror#20 @ Sigma0(a)
291 and r3,r3,r12 @ (b^c)&=(a^b)
293 eor r3,r3,r11 @ Maj(a,b,c)
294 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
295 @ add r9,r9,r3 @ h+=Maj(a,b,c)
299 str r1,[sp,#17*4] @ make room for r1
302 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
303 eor r0,r0,r5,ror#19 @ Sigma1(e)
308 @ ldrb r2,[r1,#3] @ 3
309 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
316 str r1,[sp,#17*4] @ make room for r1
320 eor r0,r0,r5,ror#19 @ Sigma1(e)
322 ldr r3,[r14],#4 @ *K256++
323 add r8,r8,r2 @ h+=X[i]
326 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
328 add r8,r8,r3 @ h+=K256[i]
329 eor r2,r2,r7 @ Ch(e,f,g)
331 add r8,r8,r2 @ h+=Ch(e,f,g)
338 ldr r2,[r1],#4 @ prefetch
342 eor r3,r9,r10 @ a^b, b^c in next round
344 ldr r2,[sp,#5*4] @ from future BODY_16_xx
345 eor r3,r9,r10 @ a^b, b^c in next round
346 ldr r1,[sp,#2*4] @ from future BODY_16_xx
348 eor r0,r0,r9,ror#20 @ Sigma0(a)
349 and r12,r12,r3 @ (b^c)&=(a^b)
351 eor r12,r12,r10 @ Maj(a,b,c)
352 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
353 @ add r8,r8,r12 @ h+=Maj(a,b,c)
357 str r1,[sp,#17*4] @ make room for r1
360 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
361 eor r0,r0,r4,ror#19 @ Sigma1(e)
366 @ ldrb r2,[r1,#3] @ 4
367 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
374 str r1,[sp,#17*4] @ make room for r1
378 eor r0,r0,r4,ror#19 @ Sigma1(e)
380 ldr r12,[r14],#4 @ *K256++
381 add r7,r7,r2 @ h+=X[i]
384 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
386 add r7,r7,r12 @ h+=K256[i]
387 eor r2,r2,r6 @ Ch(e,f,g)
389 add r7,r7,r2 @ h+=Ch(e,f,g)
392 cmp r12,#0xf2 @ done?
396 ldr r2,[r1],#4 @ prefetch
400 eor r12,r8,r9 @ a^b, b^c in next round
402 ldr r2,[sp,#6*4] @ from future BODY_16_xx
403 eor r12,r8,r9 @ a^b, b^c in next round
404 ldr r1,[sp,#3*4] @ from future BODY_16_xx
406 eor r0,r0,r8,ror#20 @ Sigma0(a)
407 and r3,r3,r12 @ (b^c)&=(a^b)
408 add r11,r11,r7 @ d+=h
409 eor r3,r3,r9 @ Maj(a,b,c)
410 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
411 @ add r7,r7,r3 @ h+=Maj(a,b,c)
415 str r1,[sp,#17*4] @ make room for r1
418 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
419 eor r0,r0,r11,ror#19 @ Sigma1(e)
424 @ ldrb r2,[r1,#3] @ 5
425 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
432 str r1,[sp,#17*4] @ make room for r1
436 eor r0,r0,r11,ror#19 @ Sigma1(e)
438 ldr r3,[r14],#4 @ *K256++
439 add r6,r6,r2 @ h+=X[i]
442 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
444 add r6,r6,r3 @ h+=K256[i]
445 eor r2,r2,r5 @ Ch(e,f,g)
447 add r6,r6,r2 @ h+=Ch(e,f,g)
454 ldr r2,[r1],#4 @ prefetch
458 eor r3,r7,r8 @ a^b, b^c in next round
460 ldr r2,[sp,#7*4] @ from future BODY_16_xx
461 eor r3,r7,r8 @ a^b, b^c in next round
462 ldr r1,[sp,#4*4] @ from future BODY_16_xx
464 eor r0,r0,r7,ror#20 @ Sigma0(a)
465 and r12,r12,r3 @ (b^c)&=(a^b)
466 add r10,r10,r6 @ d+=h
467 eor r12,r12,r8 @ Maj(a,b,c)
468 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
469 @ add r6,r6,r12 @ h+=Maj(a,b,c)
473 str r1,[sp,#17*4] @ make room for r1
476 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
477 eor r0,r0,r10,ror#19 @ Sigma1(e)
482 @ ldrb r2,[r1,#3] @ 6
483 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
490 str r1,[sp,#17*4] @ make room for r1
494 eor r0,r0,r10,ror#19 @ Sigma1(e)
496 ldr r12,[r14],#4 @ *K256++
497 add r5,r5,r2 @ h+=X[i]
500 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
502 add r5,r5,r12 @ h+=K256[i]
503 eor r2,r2,r4 @ Ch(e,f,g)
505 add r5,r5,r2 @ h+=Ch(e,f,g)
508 cmp r12,#0xf2 @ done?
512 ldr r2,[r1],#4 @ prefetch
516 eor r12,r6,r7 @ a^b, b^c in next round
518 ldr r2,[sp,#8*4] @ from future BODY_16_xx
519 eor r12,r6,r7 @ a^b, b^c in next round
520 ldr r1,[sp,#5*4] @ from future BODY_16_xx
522 eor r0,r0,r6,ror#20 @ Sigma0(a)
523 and r3,r3,r12 @ (b^c)&=(a^b)
525 eor r3,r3,r7 @ Maj(a,b,c)
526 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
527 @ add r5,r5,r3 @ h+=Maj(a,b,c)
531 str r1,[sp,#17*4] @ make room for r1
534 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
535 eor r0,r0,r9,ror#19 @ Sigma1(e)
540 @ ldrb r2,[r1,#3] @ 7
541 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
548 str r1,[sp,#17*4] @ make room for r1
552 eor r0,r0,r9,ror#19 @ Sigma1(e)
554 ldr r3,[r14],#4 @ *K256++
555 add r4,r4,r2 @ h+=X[i]
558 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
560 add r4,r4,r3 @ h+=K256[i]
561 eor r2,r2,r11 @ Ch(e,f,g)
563 add r4,r4,r2 @ h+=Ch(e,f,g)
570 ldr r2,[r1],#4 @ prefetch
574 eor r3,r5,r6 @ a^b, b^c in next round
576 ldr r2,[sp,#9*4] @ from future BODY_16_xx
577 eor r3,r5,r6 @ a^b, b^c in next round
578 ldr r1,[sp,#6*4] @ from future BODY_16_xx
580 eor r0,r0,r5,ror#20 @ Sigma0(a)
581 and r12,r12,r3 @ (b^c)&=(a^b)
583 eor r12,r12,r6 @ Maj(a,b,c)
584 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
585 @ add r4,r4,r12 @ h+=Maj(a,b,c)
589 str r1,[sp,#17*4] @ make room for r1
592 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
593 eor r0,r0,r8,ror#19 @ Sigma1(e)
598 @ ldrb r2,[r1,#3] @ 8
599 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
606 str r1,[sp,#17*4] @ make room for r1
610 eor r0,r0,r8,ror#19 @ Sigma1(e)
612 ldr r12,[r14],#4 @ *K256++
613 add r11,r11,r2 @ h+=X[i]
616 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
618 add r11,r11,r12 @ h+=K256[i]
619 eor r2,r2,r10 @ Ch(e,f,g)
621 add r11,r11,r2 @ h+=Ch(e,f,g)
624 cmp r12,#0xf2 @ done?
628 ldr r2,[r1],#4 @ prefetch
632 eor r12,r4,r5 @ a^b, b^c in next round
634 ldr r2,[sp,#10*4] @ from future BODY_16_xx
635 eor r12,r4,r5 @ a^b, b^c in next round
636 ldr r1,[sp,#7*4] @ from future BODY_16_xx
638 eor r0,r0,r4,ror#20 @ Sigma0(a)
639 and r3,r3,r12 @ (b^c)&=(a^b)
641 eor r3,r3,r5 @ Maj(a,b,c)
642 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
643 @ add r11,r11,r3 @ h+=Maj(a,b,c)
647 str r1,[sp,#17*4] @ make room for r1
650 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
651 eor r0,r0,r7,ror#19 @ Sigma1(e)
656 @ ldrb r2,[r1,#3] @ 9
657 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
664 str r1,[sp,#17*4] @ make room for r1
668 eor r0,r0,r7,ror#19 @ Sigma1(e)
670 ldr r3,[r14],#4 @ *K256++
671 add r10,r10,r2 @ h+=X[i]
674 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
676 add r10,r10,r3 @ h+=K256[i]
677 eor r2,r2,r9 @ Ch(e,f,g)
678 eor r0,r11,r11,ror#11
679 add r10,r10,r2 @ h+=Ch(e,f,g)
686 ldr r2,[r1],#4 @ prefetch
690 eor r3,r11,r4 @ a^b, b^c in next round
692 ldr r2,[sp,#11*4] @ from future BODY_16_xx
693 eor r3,r11,r4 @ a^b, b^c in next round
694 ldr r1,[sp,#8*4] @ from future BODY_16_xx
696 eor r0,r0,r11,ror#20 @ Sigma0(a)
697 and r12,r12,r3 @ (b^c)&=(a^b)
699 eor r12,r12,r4 @ Maj(a,b,c)
700 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
701 @ add r10,r10,r12 @ h+=Maj(a,b,c)
703 @ ldr r2,[r1],#4 @ 10
705 str r1,[sp,#17*4] @ make room for r1
708 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
709 eor r0,r0,r6,ror#19 @ Sigma1(e)
714 @ ldrb r2,[r1,#3] @ 10
715 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
722 str r1,[sp,#17*4] @ make room for r1
726 eor r0,r0,r6,ror#19 @ Sigma1(e)
728 ldr r12,[r14],#4 @ *K256++
729 add r9,r9,r2 @ h+=X[i]
732 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
734 add r9,r9,r12 @ h+=K256[i]
735 eor r2,r2,r8 @ Ch(e,f,g)
736 eor r0,r10,r10,ror#11
737 add r9,r9,r2 @ h+=Ch(e,f,g)
740 cmp r12,#0xf2 @ done?
744 ldr r2,[r1],#4 @ prefetch
748 eor r12,r10,r11 @ a^b, b^c in next round
750 ldr r2,[sp,#12*4] @ from future BODY_16_xx
751 eor r12,r10,r11 @ a^b, b^c in next round
752 ldr r1,[sp,#9*4] @ from future BODY_16_xx
754 eor r0,r0,r10,ror#20 @ Sigma0(a)
755 and r3,r3,r12 @ (b^c)&=(a^b)
757 eor r3,r3,r11 @ Maj(a,b,c)
758 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
759 @ add r9,r9,r3 @ h+=Maj(a,b,c)
761 @ ldr r2,[r1],#4 @ 11
763 str r1,[sp,#17*4] @ make room for r1
766 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
767 eor r0,r0,r5,ror#19 @ Sigma1(e)
772 @ ldrb r2,[r1,#3] @ 11
773 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
780 str r1,[sp,#17*4] @ make room for r1
784 eor r0,r0,r5,ror#19 @ Sigma1(e)
786 ldr r3,[r14],#4 @ *K256++
787 add r8,r8,r2 @ h+=X[i]
790 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
792 add r8,r8,r3 @ h+=K256[i]
793 eor r2,r2,r7 @ Ch(e,f,g)
795 add r8,r8,r2 @ h+=Ch(e,f,g)
802 ldr r2,[r1],#4 @ prefetch
806 eor r3,r9,r10 @ a^b, b^c in next round
808 ldr r2,[sp,#13*4] @ from future BODY_16_xx
809 eor r3,r9,r10 @ a^b, b^c in next round
810 ldr r1,[sp,#10*4] @ from future BODY_16_xx
812 eor r0,r0,r9,ror#20 @ Sigma0(a)
813 and r12,r12,r3 @ (b^c)&=(a^b)
815 eor r12,r12,r10 @ Maj(a,b,c)
816 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
817 @ add r8,r8,r12 @ h+=Maj(a,b,c)
819 @ ldr r2,[r1],#4 @ 12
821 str r1,[sp,#17*4] @ make room for r1
824 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
825 eor r0,r0,r4,ror#19 @ Sigma1(e)
830 @ ldrb r2,[r1,#3] @ 12
831 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
838 str r1,[sp,#17*4] @ make room for r1
842 eor r0,r0,r4,ror#19 @ Sigma1(e)
844 ldr r12,[r14],#4 @ *K256++
845 add r7,r7,r2 @ h+=X[i]
848 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
850 add r7,r7,r12 @ h+=K256[i]
851 eor r2,r2,r6 @ Ch(e,f,g)
853 add r7,r7,r2 @ h+=Ch(e,f,g)
856 cmp r12,#0xf2 @ done?
860 ldr r2,[r1],#4 @ prefetch
864 eor r12,r8,r9 @ a^b, b^c in next round
866 ldr r2,[sp,#14*4] @ from future BODY_16_xx
867 eor r12,r8,r9 @ a^b, b^c in next round
868 ldr r1,[sp,#11*4] @ from future BODY_16_xx
870 eor r0,r0,r8,ror#20 @ Sigma0(a)
871 and r3,r3,r12 @ (b^c)&=(a^b)
872 add r11,r11,r7 @ d+=h
873 eor r3,r3,r9 @ Maj(a,b,c)
874 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
875 @ add r7,r7,r3 @ h+=Maj(a,b,c)
877 @ ldr r2,[r1],#4 @ 13
879 str r1,[sp,#17*4] @ make room for r1
882 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
883 eor r0,r0,r11,ror#19 @ Sigma1(e)
888 @ ldrb r2,[r1,#3] @ 13
889 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
896 str r1,[sp,#17*4] @ make room for r1
900 eor r0,r0,r11,ror#19 @ Sigma1(e)
902 ldr r3,[r14],#4 @ *K256++
903 add r6,r6,r2 @ h+=X[i]
906 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
908 add r6,r6,r3 @ h+=K256[i]
909 eor r2,r2,r5 @ Ch(e,f,g)
911 add r6,r6,r2 @ h+=Ch(e,f,g)
918 ldr r2,[r1],#4 @ prefetch
922 eor r3,r7,r8 @ a^b, b^c in next round
924 ldr r2,[sp,#15*4] @ from future BODY_16_xx
925 eor r3,r7,r8 @ a^b, b^c in next round
926 ldr r1,[sp,#12*4] @ from future BODY_16_xx
928 eor r0,r0,r7,ror#20 @ Sigma0(a)
929 and r12,r12,r3 @ (b^c)&=(a^b)
930 add r10,r10,r6 @ d+=h
931 eor r12,r12,r8 @ Maj(a,b,c)
932 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
933 @ add r6,r6,r12 @ h+=Maj(a,b,c)
935 @ ldr r2,[r1],#4 @ 14
937 str r1,[sp,#17*4] @ make room for r1
940 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
941 eor r0,r0,r10,ror#19 @ Sigma1(e)
946 @ ldrb r2,[r1,#3] @ 14
947 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
954 str r1,[sp,#17*4] @ make room for r1
958 eor r0,r0,r10,ror#19 @ Sigma1(e)
960 ldr r12,[r14],#4 @ *K256++
961 add r5,r5,r2 @ h+=X[i]
964 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
966 add r5,r5,r12 @ h+=K256[i]
967 eor r2,r2,r4 @ Ch(e,f,g)
969 add r5,r5,r2 @ h+=Ch(e,f,g)
972 cmp r12,#0xf2 @ done?
976 ldr r2,[r1],#4 @ prefetch
980 eor r12,r6,r7 @ a^b, b^c in next round
982 ldr r2,[sp,#0*4] @ from future BODY_16_xx
983 eor r12,r6,r7 @ a^b, b^c in next round
984 ldr r1,[sp,#13*4] @ from future BODY_16_xx
986 eor r0,r0,r6,ror#20 @ Sigma0(a)
987 and r3,r3,r12 @ (b^c)&=(a^b)
989 eor r3,r3,r7 @ Maj(a,b,c)
990 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
991 @ add r5,r5,r3 @ h+=Maj(a,b,c)
993 @ ldr r2,[r1],#4 @ 15
995 str r1,[sp,#17*4] @ make room for r1
998 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
999 eor r0,r0,r9,ror#19 @ Sigma1(e)
1004 @ ldrb r2,[r1,#3] @ 15
1005 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1012 str r1,[sp,#17*4] @ make room for r1
1016 eor r0,r0,r9,ror#19 @ Sigma1(e)
1018 ldr r3,[r14],#4 @ *K256++
1019 add r4,r4,r2 @ h+=X[i]
1022 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1024 add r4,r4,r3 @ h+=K256[i]
1025 eor r2,r2,r11 @ Ch(e,f,g)
1027 add r4,r4,r2 @ h+=Ch(e,f,g)
1030 cmp r3,#0xf2 @ done?
1033 # if __ARM_ARCH__>=7
1034 ldr r2,[r1],#4 @ prefetch
1038 eor r3,r5,r6 @ a^b, b^c in next round
1040 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1041 eor r3,r5,r6 @ a^b, b^c in next round
1042 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1044 eor r0,r0,r5,ror#20 @ Sigma0(a)
1045 and r12,r12,r3 @ (b^c)&=(a^b)
1047 eor r12,r12,r6 @ Maj(a,b,c)
1048 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1049 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1051 @ ldr r2,[sp,#1*4] @ 16
1054 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1057 eor r12,r12,r1,ror#19
1058 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1060 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1064 eor r0,r8,r8,ror#5 @ from BODY_00_15
1066 eor r0,r0,r8,ror#19 @ Sigma1(e)
1068 ldr r12,[r14],#4 @ *K256++
1069 add r11,r11,r2 @ h+=X[i]
1072 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1074 add r11,r11,r12 @ h+=K256[i]
1075 eor r2,r2,r10 @ Ch(e,f,g)
1077 add r11,r11,r2 @ h+=Ch(e,f,g)
1080 cmp r12,#0xf2 @ done?
1083 # if __ARM_ARCH__>=7
1084 ldr r2,[r1],#4 @ prefetch
1088 eor r12,r4,r5 @ a^b, b^c in next round
1090 ldr r2,[sp,#2*4] @ from future BODY_16_xx
1091 eor r12,r4,r5 @ a^b, b^c in next round
1092 ldr r1,[sp,#15*4] @ from future BODY_16_xx
1094 eor r0,r0,r4,ror#20 @ Sigma0(a)
1095 and r3,r3,r12 @ (b^c)&=(a^b)
1096 add r7,r7,r11 @ d+=h
1097 eor r3,r3,r5 @ Maj(a,b,c)
1098 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1099 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1100 @ ldr r2,[sp,#2*4] @ 17
1103 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1107 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1109 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1113 eor r0,r7,r7,ror#5 @ from BODY_00_15
1115 eor r0,r0,r7,ror#19 @ Sigma1(e)
1117 ldr r3,[r14],#4 @ *K256++
1118 add r10,r10,r2 @ h+=X[i]
1121 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1123 add r10,r10,r3 @ h+=K256[i]
1124 eor r2,r2,r9 @ Ch(e,f,g)
1125 eor r0,r11,r11,ror#11
1126 add r10,r10,r2 @ h+=Ch(e,f,g)
1129 cmp r3,#0xf2 @ done?
1132 # if __ARM_ARCH__>=7
1133 ldr r2,[r1],#4 @ prefetch
1137 eor r3,r11,r4 @ a^b, b^c in next round
1139 ldr r2,[sp,#3*4] @ from future BODY_16_xx
1140 eor r3,r11,r4 @ a^b, b^c in next round
1141 ldr r1,[sp,#0*4] @ from future BODY_16_xx
1143 eor r0,r0,r11,ror#20 @ Sigma0(a)
1144 and r12,r12,r3 @ (b^c)&=(a^b)
1145 add r6,r6,r10 @ d+=h
1146 eor r12,r12,r4 @ Maj(a,b,c)
1147 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1148 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1149 @ ldr r2,[sp,#3*4] @ 18
1152 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1155 eor r12,r12,r1,ror#19
1156 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1158 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1162 eor r0,r6,r6,ror#5 @ from BODY_00_15
1164 eor r0,r0,r6,ror#19 @ Sigma1(e)
1166 ldr r12,[r14],#4 @ *K256++
1167 add r9,r9,r2 @ h+=X[i]
1170 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1172 add r9,r9,r12 @ h+=K256[i]
1173 eor r2,r2,r8 @ Ch(e,f,g)
1174 eor r0,r10,r10,ror#11
1175 add r9,r9,r2 @ h+=Ch(e,f,g)
1178 cmp r12,#0xf2 @ done?
1181 # if __ARM_ARCH__>=7
1182 ldr r2,[r1],#4 @ prefetch
1186 eor r12,r10,r11 @ a^b, b^c in next round
1188 ldr r2,[sp,#4*4] @ from future BODY_16_xx
1189 eor r12,r10,r11 @ a^b, b^c in next round
1190 ldr r1,[sp,#1*4] @ from future BODY_16_xx
1192 eor r0,r0,r10,ror#20 @ Sigma0(a)
1193 and r3,r3,r12 @ (b^c)&=(a^b)
1195 eor r3,r3,r11 @ Maj(a,b,c)
1196 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1197 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1198 @ ldr r2,[sp,#4*4] @ 19
1201 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1205 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1207 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1211 eor r0,r5,r5,ror#5 @ from BODY_00_15
1213 eor r0,r0,r5,ror#19 @ Sigma1(e)
1215 ldr r3,[r14],#4 @ *K256++
1216 add r8,r8,r2 @ h+=X[i]
1219 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1221 add r8,r8,r3 @ h+=K256[i]
1222 eor r2,r2,r7 @ Ch(e,f,g)
1224 add r8,r8,r2 @ h+=Ch(e,f,g)
1227 cmp r3,#0xf2 @ done?
1230 # if __ARM_ARCH__>=7
1231 ldr r2,[r1],#4 @ prefetch
1235 eor r3,r9,r10 @ a^b, b^c in next round
1237 ldr r2,[sp,#5*4] @ from future BODY_16_xx
1238 eor r3,r9,r10 @ a^b, b^c in next round
1239 ldr r1,[sp,#2*4] @ from future BODY_16_xx
1241 eor r0,r0,r9,ror#20 @ Sigma0(a)
1242 and r12,r12,r3 @ (b^c)&=(a^b)
1244 eor r12,r12,r10 @ Maj(a,b,c)
1245 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1246 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1247 @ ldr r2,[sp,#5*4] @ 20
1250 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1253 eor r12,r12,r1,ror#19
1254 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1256 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1260 eor r0,r4,r4,ror#5 @ from BODY_00_15
1262 eor r0,r0,r4,ror#19 @ Sigma1(e)
1264 ldr r12,[r14],#4 @ *K256++
1265 add r7,r7,r2 @ h+=X[i]
1268 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1270 add r7,r7,r12 @ h+=K256[i]
1271 eor r2,r2,r6 @ Ch(e,f,g)
1273 add r7,r7,r2 @ h+=Ch(e,f,g)
1276 cmp r12,#0xf2 @ done?
1279 # if __ARM_ARCH__>=7
1280 ldr r2,[r1],#4 @ prefetch
1284 eor r12,r8,r9 @ a^b, b^c in next round
1286 ldr r2,[sp,#6*4] @ from future BODY_16_xx
1287 eor r12,r8,r9 @ a^b, b^c in next round
1288 ldr r1,[sp,#3*4] @ from future BODY_16_xx
1290 eor r0,r0,r8,ror#20 @ Sigma0(a)
1291 and r3,r3,r12 @ (b^c)&=(a^b)
1292 add r11,r11,r7 @ d+=h
1293 eor r3,r3,r9 @ Maj(a,b,c)
1294 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1295 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1296 @ ldr r2,[sp,#6*4] @ 21
1299 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1303 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1305 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1309 eor r0,r11,r11,ror#5 @ from BODY_00_15
1311 eor r0,r0,r11,ror#19 @ Sigma1(e)
1313 ldr r3,[r14],#4 @ *K256++
1314 add r6,r6,r2 @ h+=X[i]
1317 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1319 add r6,r6,r3 @ h+=K256[i]
1320 eor r2,r2,r5 @ Ch(e,f,g)
1322 add r6,r6,r2 @ h+=Ch(e,f,g)
1325 cmp r3,#0xf2 @ done?
1328 # if __ARM_ARCH__>=7
1329 ldr r2,[r1],#4 @ prefetch
1333 eor r3,r7,r8 @ a^b, b^c in next round
1335 ldr r2,[sp,#7*4] @ from future BODY_16_xx
1336 eor r3,r7,r8 @ a^b, b^c in next round
1337 ldr r1,[sp,#4*4] @ from future BODY_16_xx
1339 eor r0,r0,r7,ror#20 @ Sigma0(a)
1340 and r12,r12,r3 @ (b^c)&=(a^b)
1341 add r10,r10,r6 @ d+=h
1342 eor r12,r12,r8 @ Maj(a,b,c)
1343 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1344 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1345 @ ldr r2,[sp,#7*4] @ 22
1348 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1351 eor r12,r12,r1,ror#19
1352 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1354 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1358 eor r0,r10,r10,ror#5 @ from BODY_00_15
1360 eor r0,r0,r10,ror#19 @ Sigma1(e)
1362 ldr r12,[r14],#4 @ *K256++
1363 add r5,r5,r2 @ h+=X[i]
1366 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1368 add r5,r5,r12 @ h+=K256[i]
1369 eor r2,r2,r4 @ Ch(e,f,g)
1371 add r5,r5,r2 @ h+=Ch(e,f,g)
1374 cmp r12,#0xf2 @ done?
1377 # if __ARM_ARCH__>=7
1378 ldr r2,[r1],#4 @ prefetch
1382 eor r12,r6,r7 @ a^b, b^c in next round
1384 ldr r2,[sp,#8*4] @ from future BODY_16_xx
1385 eor r12,r6,r7 @ a^b, b^c in next round
1386 ldr r1,[sp,#5*4] @ from future BODY_16_xx
1388 eor r0,r0,r6,ror#20 @ Sigma0(a)
1389 and r3,r3,r12 @ (b^c)&=(a^b)
1391 eor r3,r3,r7 @ Maj(a,b,c)
1392 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1393 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1394 @ ldr r2,[sp,#8*4] @ 23
1397 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1401 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1403 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1407 eor r0,r9,r9,ror#5 @ from BODY_00_15
1409 eor r0,r0,r9,ror#19 @ Sigma1(e)
1411 ldr r3,[r14],#4 @ *K256++
1412 add r4,r4,r2 @ h+=X[i]
1415 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1417 add r4,r4,r3 @ h+=K256[i]
1418 eor r2,r2,r11 @ Ch(e,f,g)
1420 add r4,r4,r2 @ h+=Ch(e,f,g)
1423 cmp r3,#0xf2 @ done?
1426 # if __ARM_ARCH__>=7
1427 ldr r2,[r1],#4 @ prefetch
1431 eor r3,r5,r6 @ a^b, b^c in next round
1433 ldr r2,[sp,#9*4] @ from future BODY_16_xx
1434 eor r3,r5,r6 @ a^b, b^c in next round
1435 ldr r1,[sp,#6*4] @ from future BODY_16_xx
1437 eor r0,r0,r5,ror#20 @ Sigma0(a)
1438 and r12,r12,r3 @ (b^c)&=(a^b)
1440 eor r12,r12,r6 @ Maj(a,b,c)
1441 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1442 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1443 @ ldr r2,[sp,#9*4] @ 24
1446 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1449 eor r12,r12,r1,ror#19
1450 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1452 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1456 eor r0,r8,r8,ror#5 @ from BODY_00_15
1458 eor r0,r0,r8,ror#19 @ Sigma1(e)
1460 ldr r12,[r14],#4 @ *K256++
1461 add r11,r11,r2 @ h+=X[i]
1464 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1466 add r11,r11,r12 @ h+=K256[i]
1467 eor r2,r2,r10 @ Ch(e,f,g)
1469 add r11,r11,r2 @ h+=Ch(e,f,g)
1472 cmp r12,#0xf2 @ done?
1475 # if __ARM_ARCH__>=7
1476 ldr r2,[r1],#4 @ prefetch
1480 eor r12,r4,r5 @ a^b, b^c in next round
1482 ldr r2,[sp,#10*4] @ from future BODY_16_xx
1483 eor r12,r4,r5 @ a^b, b^c in next round
1484 ldr r1,[sp,#7*4] @ from future BODY_16_xx
1486 eor r0,r0,r4,ror#20 @ Sigma0(a)
1487 and r3,r3,r12 @ (b^c)&=(a^b)
1488 add r7,r7,r11 @ d+=h
1489 eor r3,r3,r5 @ Maj(a,b,c)
1490 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1491 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1492 @ ldr r2,[sp,#10*4] @ 25
1495 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1499 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1501 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1505 eor r0,r7,r7,ror#5 @ from BODY_00_15
1507 eor r0,r0,r7,ror#19 @ Sigma1(e)
1509 ldr r3,[r14],#4 @ *K256++
1510 add r10,r10,r2 @ h+=X[i]
1513 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1515 add r10,r10,r3 @ h+=K256[i]
1516 eor r2,r2,r9 @ Ch(e,f,g)
1517 eor r0,r11,r11,ror#11
1518 add r10,r10,r2 @ h+=Ch(e,f,g)
1521 cmp r3,#0xf2 @ done?
1524 # if __ARM_ARCH__>=7
1525 ldr r2,[r1],#4 @ prefetch
1529 eor r3,r11,r4 @ a^b, b^c in next round
1531 ldr r2,[sp,#11*4] @ from future BODY_16_xx
1532 eor r3,r11,r4 @ a^b, b^c in next round
1533 ldr r1,[sp,#8*4] @ from future BODY_16_xx
1535 eor r0,r0,r11,ror#20 @ Sigma0(a)
1536 and r12,r12,r3 @ (b^c)&=(a^b)
1537 add r6,r6,r10 @ d+=h
1538 eor r12,r12,r4 @ Maj(a,b,c)
1539 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1540 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1541 @ ldr r2,[sp,#11*4] @ 26
1544 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1547 eor r12,r12,r1,ror#19
1548 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1550 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1554 eor r0,r6,r6,ror#5 @ from BODY_00_15
1556 eor r0,r0,r6,ror#19 @ Sigma1(e)
1558 ldr r12,[r14],#4 @ *K256++
1559 add r9,r9,r2 @ h+=X[i]
1562 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1564 add r9,r9,r12 @ h+=K256[i]
1565 eor r2,r2,r8 @ Ch(e,f,g)
1566 eor r0,r10,r10,ror#11
1567 add r9,r9,r2 @ h+=Ch(e,f,g)
1570 cmp r12,#0xf2 @ done?
1573 # if __ARM_ARCH__>=7
1574 ldr r2,[r1],#4 @ prefetch
1578 eor r12,r10,r11 @ a^b, b^c in next round
1580 ldr r2,[sp,#12*4] @ from future BODY_16_xx
1581 eor r12,r10,r11 @ a^b, b^c in next round
1582 ldr r1,[sp,#9*4] @ from future BODY_16_xx
1584 eor r0,r0,r10,ror#20 @ Sigma0(a)
1585 and r3,r3,r12 @ (b^c)&=(a^b)
1587 eor r3,r3,r11 @ Maj(a,b,c)
1588 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1589 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1590 @ ldr r2,[sp,#12*4] @ 27
1593 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1597 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1599 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1603 eor r0,r5,r5,ror#5 @ from BODY_00_15
1605 eor r0,r0,r5,ror#19 @ Sigma1(e)
1607 ldr r3,[r14],#4 @ *K256++
1608 add r8,r8,r2 @ h+=X[i]
1611 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1613 add r8,r8,r3 @ h+=K256[i]
1614 eor r2,r2,r7 @ Ch(e,f,g)
1616 add r8,r8,r2 @ h+=Ch(e,f,g)
1619 cmp r3,#0xf2 @ done?
1622 # if __ARM_ARCH__>=7
1623 ldr r2,[r1],#4 @ prefetch
1627 eor r3,r9,r10 @ a^b, b^c in next round
1629 ldr r2,[sp,#13*4] @ from future BODY_16_xx
1630 eor r3,r9,r10 @ a^b, b^c in next round
1631 ldr r1,[sp,#10*4] @ from future BODY_16_xx
1633 eor r0,r0,r9,ror#20 @ Sigma0(a)
1634 and r12,r12,r3 @ (b^c)&=(a^b)
1636 eor r12,r12,r10 @ Maj(a,b,c)
1637 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1638 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1639 @ ldr r2,[sp,#13*4] @ 28
1642 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1645 eor r12,r12,r1,ror#19
1646 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1648 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1652 eor r0,r4,r4,ror#5 @ from BODY_00_15
1654 eor r0,r0,r4,ror#19 @ Sigma1(e)
1656 ldr r12,[r14],#4 @ *K256++
1657 add r7,r7,r2 @ h+=X[i]
1660 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1662 add r7,r7,r12 @ h+=K256[i]
1663 eor r2,r2,r6 @ Ch(e,f,g)
1665 add r7,r7,r2 @ h+=Ch(e,f,g)
1668 cmp r12,#0xf2 @ done?
1671 # if __ARM_ARCH__>=7
1672 ldr r2,[r1],#4 @ prefetch
1676 eor r12,r8,r9 @ a^b, b^c in next round
1678 ldr r2,[sp,#14*4] @ from future BODY_16_xx
1679 eor r12,r8,r9 @ a^b, b^c in next round
1680 ldr r1,[sp,#11*4] @ from future BODY_16_xx
1682 eor r0,r0,r8,ror#20 @ Sigma0(a)
1683 and r3,r3,r12 @ (b^c)&=(a^b)
1684 add r11,r11,r7 @ d+=h
1685 eor r3,r3,r9 @ Maj(a,b,c)
1686 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1687 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1688 @ ldr r2,[sp,#14*4] @ 29
1691 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1695 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1697 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1701 eor r0,r11,r11,ror#5 @ from BODY_00_15
1703 eor r0,r0,r11,ror#19 @ Sigma1(e)
1705 ldr r3,[r14],#4 @ *K256++
1706 add r6,r6,r2 @ h+=X[i]
1709 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1711 add r6,r6,r3 @ h+=K256[i]
1712 eor r2,r2,r5 @ Ch(e,f,g)
1714 add r6,r6,r2 @ h+=Ch(e,f,g)
1717 cmp r3,#0xf2 @ done?
1720 # if __ARM_ARCH__>=7
1721 ldr r2,[r1],#4 @ prefetch
1725 eor r3,r7,r8 @ a^b, b^c in next round
1727 ldr r2,[sp,#15*4] @ from future BODY_16_xx
1728 eor r3,r7,r8 @ a^b, b^c in next round
1729 ldr r1,[sp,#12*4] @ from future BODY_16_xx
1731 eor r0,r0,r7,ror#20 @ Sigma0(a)
1732 and r12,r12,r3 @ (b^c)&=(a^b)
1733 add r10,r10,r6 @ d+=h
1734 eor r12,r12,r8 @ Maj(a,b,c)
1735 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1736 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1737 @ ldr r2,[sp,#15*4] @ 30
1740 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1743 eor r12,r12,r1,ror#19
1744 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1746 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1750 eor r0,r10,r10,ror#5 @ from BODY_00_15
1752 eor r0,r0,r10,ror#19 @ Sigma1(e)
1754 ldr r12,[r14],#4 @ *K256++
1755 add r5,r5,r2 @ h+=X[i]
1758 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1760 add r5,r5,r12 @ h+=K256[i]
1761 eor r2,r2,r4 @ Ch(e,f,g)
1763 add r5,r5,r2 @ h+=Ch(e,f,g)
1766 cmp r12,#0xf2 @ done?
1769 # if __ARM_ARCH__>=7
1770 ldr r2,[r1],#4 @ prefetch
1774 eor r12,r6,r7 @ a^b, b^c in next round
1776 ldr r2,[sp,#0*4] @ from future BODY_16_xx
1777 eor r12,r6,r7 @ a^b, b^c in next round
1778 ldr r1,[sp,#13*4] @ from future BODY_16_xx
1780 eor r0,r0,r6,ror#20 @ Sigma0(a)
1781 and r3,r3,r12 @ (b^c)&=(a^b)
1783 eor r3,r3,r7 @ Maj(a,b,c)
1784 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1785 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1786 @ ldr r2,[sp,#0*4] @ 31
1789 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1793 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1795 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1799 eor r0,r9,r9,ror#5 @ from BODY_00_15
1801 eor r0,r0,r9,ror#19 @ Sigma1(e)
1803 ldr r3,[r14],#4 @ *K256++
1804 add r4,r4,r2 @ h+=X[i]
1807 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1809 add r4,r4,r3 @ h+=K256[i]
1810 eor r2,r2,r11 @ Ch(e,f,g)
1812 add r4,r4,r2 @ h+=Ch(e,f,g)
1815 cmp r3,#0xf2 @ done?
1818 # if __ARM_ARCH__>=7
1819 ldr r2,[r1],#4 @ prefetch
1823 eor r3,r5,r6 @ a^b, b^c in next round
1825 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1826 eor r3,r5,r6 @ a^b, b^c in next round
1827 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1829 eor r0,r0,r5,ror#20 @ Sigma0(a)
1830 and r12,r12,r3 @ (b^c)&=(a^b)
1832 eor r12,r12,r6 @ Maj(a,b,c)
1833 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1834 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1836 ite eq @ Thumb2 thing, sanity check in ARM
1838 ldreq r3,[sp,#16*4] @ pull ctx
1841 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1856 ldr r1,[sp,#17*4] @ pull inp
1857 ldr r12,[sp,#18*4] @ pull inp+len
1860 stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1862 sub r14,r14,#256 @ rewind Ktbl
1865 add sp,sp,#19*4 @ destroy frame
1867 ldmia sp!,{r4-r11,pc}
1869 ldmia sp!,{r4-r11,lr}
1871 moveq pc,lr @ be binary compatible with V4, yet
1872 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
1874 .size sha256_block_data_order,.-sha256_block_data_order
1875 #if __ARM_MAX_ARCH__>=7
1879 .global sha256_block_data_order_neon
1880 .type sha256_block_data_order_neon,%function
1882 sha256_block_data_order_neon:
1884 stmdb sp!,{r4-r12,lr}
1888 bic r11,r11,#15 @ align for 128-bit stores
1891 add r2,r1,r2,lsl#6 @ len to point at the end of inp
1897 vld1.32 {q8},[r14,:128]!
1898 vld1.32 {q9},[r14,:128]!
1899 vld1.32 {q10},[r14,:128]!
1900 vld1.32 {q11},[r14,:128]!
1901 vrev32.8 q0,q0 @ yes, even on
1903 vrev32.8 q1,q1 @ big-endian
1909 str r12,[sp,#76] @ save original sp
1912 vst1.32 {q8},[r1,:128]!
1914 vst1.32 {q9},[r1,:128]!
1916 vst1.32 {q10},[r1,:128]!
1917 vst1.32 {q11},[r1,:128]!
1935 eor r12,r0,r8,ror#19
1940 add r11,r11,r12,ror#6
1950 add r11,r11,r0,ror#2
1962 eor r0,r11,r11,ror#11
1965 add r10,r10,r3,ror#6
1968 eor r0,r0,r11,ror#20
1976 add r10,r10,r0,ror#2
1986 eor r12,r0,r6,ror#19
1988 eor r0,r10,r10,ror#11
1994 eor r0,r0,r10,ror#20
2003 vld1.32 {q8},[r14,:128]!
2023 vst1.32 {q8},[r1,:128]!
2033 eor r12,r0,r4,ror#19
2054 eor r0,r11,r11,ror#5
2059 eor r3,r0,r11,ror#19
2080 eor r0,r10,r10,ror#5
2084 eor r12,r0,r10,ror#19
2101 vld1.32 {q8},[r14,:128]!
2121 vst1.32 {q8},[r1,:128]!
2131 eor r12,r0,r8,ror#19
2136 add r11,r11,r12,ror#6
2146 add r11,r11,r0,ror#2
2158 eor r0,r11,r11,ror#11
2161 add r10,r10,r3,ror#6
2164 eor r0,r0,r11,ror#20
2172 add r10,r10,r0,ror#2
2182 eor r12,r0,r6,ror#19
2184 eor r0,r10,r10,ror#11
2190 eor r0,r0,r10,ror#20
2199 vld1.32 {q8},[r14,:128]!
2219 vst1.32 {q8},[r1,:128]!
2229 eor r12,r0,r4,ror#19
2250 eor r0,r11,r11,ror#5
2255 eor r3,r0,r11,ror#19
2276 eor r0,r10,r10,ror#5
2280 eor r12,r0,r10,ror#19
2297 vld1.32 {q8},[r14,:128]!
2317 vst1.32 {q8},[r1,:128]!
2320 teq r2,#0 @ check for K256 terminator
2327 sub r14,r14,#256 @ rewind r14
2330 subeq r1,r1,#64 @ avoid SEGV
2331 vld1.8 {q0},[r1]! @ load next input block
2342 vld1.32 {q8},[r14,:128]!
2344 eor r12,r0,r8,ror#19
2348 add r11,r11,r12,ror#6
2356 add r11,r11,r0,ror#2
2364 eor r0,r11,r11,ror#11
2366 add r10,r10,r3,ror#6
2368 eor r0,r0,r11,ror#20
2373 add r10,r10,r0,ror#2
2380 eor r12,r0,r6,ror#19
2381 eor r0,r10,r10,ror#11
2385 eor r0,r0,r10,ror#20
2409 vst1.32 {q8},[r1,:128]!
2414 vld1.32 {q8},[r14,:128]!
2416 eor r12,r0,r4,ror#19
2432 eor r0,r11,r11,ror#5
2435 eor r3,r0,r11,ror#19
2449 eor r0,r10,r10,ror#5
2452 eor r12,r0,r10,ror#19
2481 vst1.32 {q8},[r1,:128]!
2486 vld1.32 {q8},[r14,:128]!
2488 eor r12,r0,r8,ror#19
2492 add r11,r11,r12,ror#6
2500 add r11,r11,r0,ror#2
2508 eor r0,r11,r11,ror#11
2510 add r10,r10,r3,ror#6
2512 eor r0,r0,r11,ror#20
2517 add r10,r10,r0,ror#2
2524 eor r12,r0,r6,ror#19
2525 eor r0,r10,r10,ror#11
2529 eor r0,r0,r10,ror#20
2553 vst1.32 {q8},[r1,:128]!
2558 vld1.32 {q8},[r14,:128]!
2560 eor r12,r0,r4,ror#19
2576 eor r0,r11,r11,ror#5
2579 eor r3,r0,r11,ror#19
2593 eor r0,r10,r10,ror#5
2596 eor r12,r0,r10,ror#19
2625 vst1.32 {q8},[r1,:128]!
2627 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
2631 add r4,r4,r0 @ accumulate
2653 ldreq sp,[sp,#76] @ restore original sp
2658 ldmia sp!,{r4-r12,pc}
2659 .size sha256_block_data_order_neon,.-sha256_block_data_order_neon
2661 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2664 # define INST(a,b,c,d) .byte c,d|0xc,a,b
2666 # define INST(a,b,c,d) .byte a,b,c,d
2669 .type sha256_block_data_order_armv8,%function
2671 sha256_block_data_order_armv8:
2673 vld1.32 {q0,q1},[r0]
2676 sub r3,r3,#.LARMv8-K256
2680 add r2,r1,r2,lsl#6 @ len to point at the end of inp
2683 vld1.8 {q8-q9},[r1]!
2684 vld1.8 {q10-q11},[r1]!
2690 vmov q14,q0 @ offload
2695 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2697 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2698 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2699 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2702 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2704 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2705 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2706 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2708 vadd.i32 q12,q12,q10
2709 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2711 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2712 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2713 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2715 vadd.i32 q13,q13,q11
2716 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2718 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2719 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2720 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2723 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2725 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2726 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2727 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2730 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2732 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2733 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2734 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2736 vadd.i32 q12,q12,q10
2737 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2739 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2740 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2741 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2743 vadd.i32 q13,q13,q11
2744 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2746 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2747 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2748 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2751 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2753 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2754 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2755 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2758 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2760 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2761 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2762 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2764 vadd.i32 q12,q12,q10
2765 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2767 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2768 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2769 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2771 vadd.i32 q13,q13,q11
2772 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2774 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2775 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2776 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2780 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2781 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2786 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2787 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2790 vadd.i32 q12,q12,q10
2791 sub r3,r3,#256-16 @ rewind
2793 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2794 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2796 vadd.i32 q13,q13,q11
2798 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2799 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2806 vst1.32 {q0,q1},[r0]
2809 .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2811 .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2813 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2814 .comm OPENSSL_armcap_P,4,4