Finish refactoring of DomCodeToUsLayoutKeyboardCode().
[chromium-blink-merge.git] / third_party / boringssl / linux-arm / crypto / sha / sha512-armv4.S
blobc794f87fcff76cbdc2a4d0f5f9e6f0df88d25d1d
1 #if defined(__arm__)
3 @ ====================================================================
4 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 @ project. The module is, however, dual licensed under OpenSSL and
6 @ CRYPTOGAMS licenses depending on where you obtain it. For further
7 @ details see http://www.openssl.org/~appro/cryptogams/.
9 @ Permission to use under GPL terms is granted.
10 @ ====================================================================
12 @ SHA512 block procedure for ARMv4. September 2007.
14 @ This code is ~4.5 (four and a half) times faster than code generated
15 @ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
16 @ Xscale PXA250 core].
18 @ July 2010.
20 @ Rescheduling for dual-issue pipeline resulted in 6% improvement on
21 @ Cortex A8 core and ~40 cycles per processed byte.
23 @ February 2011.
25 @ Profiler-assisted and platform-specific optimization resulted in 7%
26 @ improvement on Coxtex A8 core and ~38 cycles per byte.
28 @ March 2011.
30 @ Add NEON implementation. On Cortex A8 it was measured to process
31 @ one byte in 23.3 cycles or ~60% faster than integer-only code.
33 @ August 2012.
35 @ Improve NEON performance by 12% on Snapdragon S4. In absolute
36 @ terms it's 22.6 cycles per byte, which is disappointing result.
37 @ Technical writers asserted that 3-way S4 pipeline can sustain
38 @ multiple NEON instructions per cycle, but dual NEON issue could
39 @ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
40 @ for further details. On side note Cortex-A15 processes one byte in
41 @ 16 cycles.
43 @ Byte order [in]dependence. =========================================
45 @ Originally caller was expected to maintain specific *dword* order in
46 @ h[0-7], namely with most significant dword at *lower* address, which
47 @ was reflected in below two parameters as 0 and 4. Now caller is
48 @ expected to maintain native byte order for whole 64-bit values.
49 #ifndef __KERNEL__
50 # include "arm_arch.h"
51 # define VFP_ABI_PUSH   vstmdb  sp!,{d8-d15}
52 # define VFP_ABI_POP    vldmia  sp!,{d8-d15}
53 #else
54 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
55 # define __ARM_MAX_ARCH__ 7
56 # define VFP_ABI_PUSH
57 # define VFP_ABI_POP
58 #endif
60 #ifdef __ARMEL__
61 # define LO 0
62 # define HI 4
63 # define WORD64(hi0,lo0,hi1,lo1)        .word   lo0,hi0, lo1,hi1
64 #else
65 # define HI 0
66 # define LO 4
67 # define WORD64(hi0,lo0,hi1,lo1)        .word   hi0,lo0, hi1,lo1
68 #endif
70 .text
71 #if __ARM_ARCH__<7 || defined(__APPLE__)
72 .code   32
73 #else
74 .syntax unified
75 # ifdef __thumb2__
76 #  define adrl adr
77 .thumb
78 # else
79 .code   32
80 # endif
81 #endif
83 .type   K512,%object
84 .align  5
85 K512:
86         WORD64(0x428a2f98,0xd728ae22,   0x71374491,0x23ef65cd)
87         WORD64(0xb5c0fbcf,0xec4d3b2f,   0xe9b5dba5,0x8189dbbc)
88         WORD64(0x3956c25b,0xf348b538,   0x59f111f1,0xb605d019)
89         WORD64(0x923f82a4,0xaf194f9b,   0xab1c5ed5,0xda6d8118)
90         WORD64(0xd807aa98,0xa3030242,   0x12835b01,0x45706fbe)
91         WORD64(0x243185be,0x4ee4b28c,   0x550c7dc3,0xd5ffb4e2)
92         WORD64(0x72be5d74,0xf27b896f,   0x80deb1fe,0x3b1696b1)
93         WORD64(0x9bdc06a7,0x25c71235,   0xc19bf174,0xcf692694)
94         WORD64(0xe49b69c1,0x9ef14ad2,   0xefbe4786,0x384f25e3)
95         WORD64(0x0fc19dc6,0x8b8cd5b5,   0x240ca1cc,0x77ac9c65)
96         WORD64(0x2de92c6f,0x592b0275,   0x4a7484aa,0x6ea6e483)
97         WORD64(0x5cb0a9dc,0xbd41fbd4,   0x76f988da,0x831153b5)
98         WORD64(0x983e5152,0xee66dfab,   0xa831c66d,0x2db43210)
99         WORD64(0xb00327c8,0x98fb213f,   0xbf597fc7,0xbeef0ee4)
100         WORD64(0xc6e00bf3,0x3da88fc2,   0xd5a79147,0x930aa725)
101         WORD64(0x06ca6351,0xe003826f,   0x14292967,0x0a0e6e70)
102         WORD64(0x27b70a85,0x46d22ffc,   0x2e1b2138,0x5c26c926)
103         WORD64(0x4d2c6dfc,0x5ac42aed,   0x53380d13,0x9d95b3df)
104         WORD64(0x650a7354,0x8baf63de,   0x766a0abb,0x3c77b2a8)
105         WORD64(0x81c2c92e,0x47edaee6,   0x92722c85,0x1482353b)
106         WORD64(0xa2bfe8a1,0x4cf10364,   0xa81a664b,0xbc423001)
107         WORD64(0xc24b8b70,0xd0f89791,   0xc76c51a3,0x0654be30)
108         WORD64(0xd192e819,0xd6ef5218,   0xd6990624,0x5565a910)
109         WORD64(0xf40e3585,0x5771202a,   0x106aa070,0x32bbd1b8)
110         WORD64(0x19a4c116,0xb8d2d0c8,   0x1e376c08,0x5141ab53)
111         WORD64(0x2748774c,0xdf8eeb99,   0x34b0bcb5,0xe19b48a8)
112         WORD64(0x391c0cb3,0xc5c95a63,   0x4ed8aa4a,0xe3418acb)
113         WORD64(0x5b9cca4f,0x7763e373,   0x682e6ff3,0xd6b2b8a3)
114         WORD64(0x748f82ee,0x5defb2fc,   0x78a5636f,0x43172f60)
115         WORD64(0x84c87814,0xa1f0ab72,   0x8cc70208,0x1a6439ec)
116         WORD64(0x90befffa,0x23631e28,   0xa4506ceb,0xde82bde9)
117         WORD64(0xbef9a3f7,0xb2c67915,   0xc67178f2,0xe372532b)
118         WORD64(0xca273ece,0xea26619c,   0xd186b8c7,0x21c0c207)
119         WORD64(0xeada7dd6,0xcde0eb1e,   0xf57d4f7f,0xee6ed178)
120         WORD64(0x06f067aa,0x72176fba,   0x0a637dc5,0xa2c898a6)
121         WORD64(0x113f9804,0xbef90dae,   0x1b710b35,0x131c471b)
122         WORD64(0x28db77f5,0x23047d84,   0x32caab7b,0x40c72493)
123         WORD64(0x3c9ebe0a,0x15c9bebc,   0x431d67c4,0x9c100d4c)
124         WORD64(0x4cc5d4be,0xcb3e42b6,   0x597f299c,0xfc657e2a)
125         WORD64(0x5fcb6fab,0x3ad6faec,   0x6c44198c,0x4a475817)
126 .size   K512,.-K512
127 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
128 .LOPENSSL_armcap:
129 .word   OPENSSL_armcap_P-.Lsha512_block_data_order
130 .skip   32-4
131 #else
132 .skip   32
133 #endif
135 .globl  sha512_block_data_order
136 .type   sha512_block_data_order,%function
137 sha512_block_data_order:
138 .Lsha512_block_data_order:
139 #if __ARM_ARCH__<7
140         sub     r3,pc,#8                @ sha512_block_data_order
141 #else
142         adr     r3,sha512_block_data_order
143 #endif
144 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
145         ldr     r12,.LOPENSSL_armcap
146         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
147 #ifdef  __APPLE__
148         ldr     r12,[r12]
149 #endif
150         tst     r12,#1
151         bne     .LNEON
152 #endif
153         add     r2,r1,r2,lsl#7  @ len to point at the end of inp
154         stmdb   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
155         sub     r14,r3,#672             @ K512
156         sub     sp,sp,#9*8
158         ldr     r7,[r0,#32+LO]
159         ldr     r8,[r0,#32+HI]
160         ldr     r9, [r0,#48+LO]
161         ldr     r10, [r0,#48+HI]
162         ldr     r11, [r0,#56+LO]
163         ldr     r12, [r0,#56+HI]
164 .Loop:
165         str     r9, [sp,#48+0]
166         str     r10, [sp,#48+4]
167         str     r11, [sp,#56+0]
168         str     r12, [sp,#56+4]
169         ldr     r5,[r0,#0+LO]
170         ldr     r6,[r0,#0+HI]
171         ldr     r3,[r0,#8+LO]
172         ldr     r4,[r0,#8+HI]
173         ldr     r9, [r0,#16+LO]
174         ldr     r10, [r0,#16+HI]
175         ldr     r11, [r0,#24+LO]
176         ldr     r12, [r0,#24+HI]
177         str     r3,[sp,#8+0]
178         str     r4,[sp,#8+4]
179         str     r9, [sp,#16+0]
180         str     r10, [sp,#16+4]
181         str     r11, [sp,#24+0]
182         str     r12, [sp,#24+4]
183         ldr     r3,[r0,#40+LO]
184         ldr     r4,[r0,#40+HI]
185         str     r3,[sp,#40+0]
186         str     r4,[sp,#40+4]
188 .L00_15:
189 #if __ARM_ARCH__<7
190         ldrb    r3,[r1,#7]
191         ldrb    r9, [r1,#6]
192         ldrb    r10, [r1,#5]
193         ldrb    r11, [r1,#4]
194         ldrb    r4,[r1,#3]
195         ldrb    r12, [r1,#2]
196         orr     r3,r3,r9,lsl#8
197         ldrb    r9, [r1,#1]
198         orr     r3,r3,r10,lsl#16
199         ldrb    r10, [r1],#8
200         orr     r3,r3,r11,lsl#24
201         orr     r4,r4,r12,lsl#8
202         orr     r4,r4,r9,lsl#16
203         orr     r4,r4,r10,lsl#24
204 #else
205         ldr     r3,[r1,#4]
206         ldr     r4,[r1],#8
207 #ifdef __ARMEL__
208         rev     r3,r3
209         rev     r4,r4
210 #endif
211 #endif
212         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
213         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
214         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
215         mov     r9,r7,lsr#14
216         str     r3,[sp,#64+0]
217         mov     r10,r8,lsr#14
218         str     r4,[sp,#64+4]
219         eor     r9,r9,r8,lsl#18
220         ldr     r11,[sp,#56+0]  @ h.lo
221         eor     r10,r10,r7,lsl#18
222         ldr     r12,[sp,#56+4]  @ h.hi
223         eor     r9,r9,r7,lsr#18
224         eor     r10,r10,r8,lsr#18
225         eor     r9,r9,r8,lsl#14
226         eor     r10,r10,r7,lsl#14
227         eor     r9,r9,r8,lsr#9
228         eor     r10,r10,r7,lsr#9
229         eor     r9,r9,r7,lsl#23
230         eor     r10,r10,r8,lsl#23       @ Sigma1(e)
231         adds    r3,r3,r9
232         ldr     r9,[sp,#40+0]   @ f.lo
233         adc     r4,r4,r10               @ T += Sigma1(e)
234         ldr     r10,[sp,#40+4]  @ f.hi
235         adds    r3,r3,r11
236         ldr     r11,[sp,#48+0]  @ g.lo
237         adc     r4,r4,r12               @ T += h
238         ldr     r12,[sp,#48+4]  @ g.hi
240         eor     r9,r9,r11
241         str     r7,[sp,#32+0]
242         eor     r10,r10,r12
243         str     r8,[sp,#32+4]
244         and     r9,r9,r7
245         str     r5,[sp,#0+0]
246         and     r10,r10,r8
247         str     r6,[sp,#0+4]
248         eor     r9,r9,r11
249         ldr     r11,[r14,#LO]   @ K[i].lo
250         eor     r10,r10,r12             @ Ch(e,f,g)
251         ldr     r12,[r14,#HI]   @ K[i].hi
253         adds    r3,r3,r9
254         ldr     r7,[sp,#24+0]   @ d.lo
255         adc     r4,r4,r10               @ T += Ch(e,f,g)
256         ldr     r8,[sp,#24+4]   @ d.hi
257         adds    r3,r3,r11
258         and     r9,r11,#0xff
259         adc     r4,r4,r12               @ T += K[i]
260         adds    r7,r7,r3
261         ldr     r11,[sp,#8+0]   @ b.lo
262         adc     r8,r8,r4                @ d += T
263         teq     r9,#148
265         ldr     r12,[sp,#16+0]  @ c.lo
266 #if __ARM_ARCH__>=7
267         it      eq                      @ Thumb2 thing, sanity check in ARM
268 #endif
269         orreq   r14,r14,#1
270         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
271         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
272         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
273         mov     r9,r5,lsr#28
274         mov     r10,r6,lsr#28
275         eor     r9,r9,r6,lsl#4
276         eor     r10,r10,r5,lsl#4
277         eor     r9,r9,r6,lsr#2
278         eor     r10,r10,r5,lsr#2
279         eor     r9,r9,r5,lsl#30
280         eor     r10,r10,r6,lsl#30
281         eor     r9,r9,r6,lsr#7
282         eor     r10,r10,r5,lsr#7
283         eor     r9,r9,r5,lsl#25
284         eor     r10,r10,r6,lsl#25       @ Sigma0(a)
285         adds    r3,r3,r9
286         and     r9,r5,r11
287         adc     r4,r4,r10               @ T += Sigma0(a)
289         ldr     r10,[sp,#8+4]   @ b.hi
290         orr     r5,r5,r11
291         ldr     r11,[sp,#16+4]  @ c.hi
292         and     r5,r5,r12
293         and     r12,r6,r10
294         orr     r6,r6,r10
295         orr     r5,r5,r9                @ Maj(a,b,c).lo
296         and     r6,r6,r11
297         adds    r5,r5,r3
298         orr     r6,r6,r12               @ Maj(a,b,c).hi
299         sub     sp,sp,#8
300         adc     r6,r6,r4                @ h += T
301         tst     r14,#1
302         add     r14,r14,#8
303         tst     r14,#1
304         beq     .L00_15
305         ldr     r9,[sp,#184+0]
306         ldr     r10,[sp,#184+4]
307         bic     r14,r14,#1
308 .L16_79:
309         @ sigma0(x)     (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
310         @ LO            lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
311         @ HI            hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
312         mov     r3,r9,lsr#1
313         ldr     r11,[sp,#80+0]
314         mov     r4,r10,lsr#1
315         ldr     r12,[sp,#80+4]
316         eor     r3,r3,r10,lsl#31
317         eor     r4,r4,r9,lsl#31
318         eor     r3,r3,r9,lsr#8
319         eor     r4,r4,r10,lsr#8
320         eor     r3,r3,r10,lsl#24
321         eor     r4,r4,r9,lsl#24
322         eor     r3,r3,r9,lsr#7
323         eor     r4,r4,r10,lsr#7
324         eor     r3,r3,r10,lsl#25
326         @ sigma1(x)     (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
327         @ LO            lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
328         @ HI            hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
329         mov     r9,r11,lsr#19
330         mov     r10,r12,lsr#19
331         eor     r9,r9,r12,lsl#13
332         eor     r10,r10,r11,lsl#13
333         eor     r9,r9,r12,lsr#29
334         eor     r10,r10,r11,lsr#29
335         eor     r9,r9,r11,lsl#3
336         eor     r10,r10,r12,lsl#3
337         eor     r9,r9,r11,lsr#6
338         eor     r10,r10,r12,lsr#6
339         ldr     r11,[sp,#120+0]
340         eor     r9,r9,r12,lsl#26
342         ldr     r12,[sp,#120+4]
343         adds    r3,r3,r9
344         ldr     r9,[sp,#192+0]
345         adc     r4,r4,r10
347         ldr     r10,[sp,#192+4]
348         adds    r3,r3,r11
349         adc     r4,r4,r12
350         adds    r3,r3,r9
351         adc     r4,r4,r10
352         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
353         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
354         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
355         mov     r9,r7,lsr#14
356         str     r3,[sp,#64+0]
357         mov     r10,r8,lsr#14
358         str     r4,[sp,#64+4]
359         eor     r9,r9,r8,lsl#18
360         ldr     r11,[sp,#56+0]  @ h.lo
361         eor     r10,r10,r7,lsl#18
362         ldr     r12,[sp,#56+4]  @ h.hi
363         eor     r9,r9,r7,lsr#18
364         eor     r10,r10,r8,lsr#18
365         eor     r9,r9,r8,lsl#14
366         eor     r10,r10,r7,lsl#14
367         eor     r9,r9,r8,lsr#9
368         eor     r10,r10,r7,lsr#9
369         eor     r9,r9,r7,lsl#23
370         eor     r10,r10,r8,lsl#23       @ Sigma1(e)
371         adds    r3,r3,r9
372         ldr     r9,[sp,#40+0]   @ f.lo
373         adc     r4,r4,r10               @ T += Sigma1(e)
374         ldr     r10,[sp,#40+4]  @ f.hi
375         adds    r3,r3,r11
376         ldr     r11,[sp,#48+0]  @ g.lo
377         adc     r4,r4,r12               @ T += h
378         ldr     r12,[sp,#48+4]  @ g.hi
380         eor     r9,r9,r11
381         str     r7,[sp,#32+0]
382         eor     r10,r10,r12
383         str     r8,[sp,#32+4]
384         and     r9,r9,r7
385         str     r5,[sp,#0+0]
386         and     r10,r10,r8
387         str     r6,[sp,#0+4]
388         eor     r9,r9,r11
389         ldr     r11,[r14,#LO]   @ K[i].lo
390         eor     r10,r10,r12             @ Ch(e,f,g)
391         ldr     r12,[r14,#HI]   @ K[i].hi
393         adds    r3,r3,r9
394         ldr     r7,[sp,#24+0]   @ d.lo
395         adc     r4,r4,r10               @ T += Ch(e,f,g)
396         ldr     r8,[sp,#24+4]   @ d.hi
397         adds    r3,r3,r11
398         and     r9,r11,#0xff
399         adc     r4,r4,r12               @ T += K[i]
400         adds    r7,r7,r3
401         ldr     r11,[sp,#8+0]   @ b.lo
402         adc     r8,r8,r4                @ d += T
403         teq     r9,#23
405         ldr     r12,[sp,#16+0]  @ c.lo
406 #if __ARM_ARCH__>=7
407         it      eq                      @ Thumb2 thing, sanity check in ARM
408 #endif
409         orreq   r14,r14,#1
410         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
411         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
412         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
413         mov     r9,r5,lsr#28
414         mov     r10,r6,lsr#28
415         eor     r9,r9,r6,lsl#4
416         eor     r10,r10,r5,lsl#4
417         eor     r9,r9,r6,lsr#2
418         eor     r10,r10,r5,lsr#2
419         eor     r9,r9,r5,lsl#30
420         eor     r10,r10,r6,lsl#30
421         eor     r9,r9,r6,lsr#7
422         eor     r10,r10,r5,lsr#7
423         eor     r9,r9,r5,lsl#25
424         eor     r10,r10,r6,lsl#25       @ Sigma0(a)
425         adds    r3,r3,r9
426         and     r9,r5,r11
427         adc     r4,r4,r10               @ T += Sigma0(a)
429         ldr     r10,[sp,#8+4]   @ b.hi
430         orr     r5,r5,r11
431         ldr     r11,[sp,#16+4]  @ c.hi
432         and     r5,r5,r12
433         and     r12,r6,r10
434         orr     r6,r6,r10
435         orr     r5,r5,r9                @ Maj(a,b,c).lo
436         and     r6,r6,r11
437         adds    r5,r5,r3
438         orr     r6,r6,r12               @ Maj(a,b,c).hi
439         sub     sp,sp,#8
440         adc     r6,r6,r4                @ h += T
441         tst     r14,#1
442         add     r14,r14,#8
443 #if __ARM_ARCH__>=7
444         ittt    eq                      @ Thumb2 thing, sanity check in ARM
445 #endif
446         ldreq   r9,[sp,#184+0]
447         ldreq   r10,[sp,#184+4]
448         beq     .L16_79
449         bic     r14,r14,#1
451         ldr     r3,[sp,#8+0]
452         ldr     r4,[sp,#8+4]
453         ldr     r9, [r0,#0+LO]
454         ldr     r10, [r0,#0+HI]
455         ldr     r11, [r0,#8+LO]
456         ldr     r12, [r0,#8+HI]
457         adds    r9,r5,r9
458         str     r9, [r0,#0+LO]
459         adc     r10,r6,r10
460         str     r10, [r0,#0+HI]
461         adds    r11,r3,r11
462         str     r11, [r0,#8+LO]
463         adc     r12,r4,r12
464         str     r12, [r0,#8+HI]
466         ldr     r5,[sp,#16+0]
467         ldr     r6,[sp,#16+4]
468         ldr     r3,[sp,#24+0]
469         ldr     r4,[sp,#24+4]
470         ldr     r9, [r0,#16+LO]
471         ldr     r10, [r0,#16+HI]
472         ldr     r11, [r0,#24+LO]
473         ldr     r12, [r0,#24+HI]
474         adds    r9,r5,r9
475         str     r9, [r0,#16+LO]
476         adc     r10,r6,r10
477         str     r10, [r0,#16+HI]
478         adds    r11,r3,r11
479         str     r11, [r0,#24+LO]
480         adc     r12,r4,r12
481         str     r12, [r0,#24+HI]
483         ldr     r3,[sp,#40+0]
484         ldr     r4,[sp,#40+4]
485         ldr     r9, [r0,#32+LO]
486         ldr     r10, [r0,#32+HI]
487         ldr     r11, [r0,#40+LO]
488         ldr     r12, [r0,#40+HI]
489         adds    r7,r7,r9
490         str     r7,[r0,#32+LO]
491         adc     r8,r8,r10
492         str     r8,[r0,#32+HI]
493         adds    r11,r3,r11
494         str     r11, [r0,#40+LO]
495         adc     r12,r4,r12
496         str     r12, [r0,#40+HI]
498         ldr     r5,[sp,#48+0]
499         ldr     r6,[sp,#48+4]
500         ldr     r3,[sp,#56+0]
501         ldr     r4,[sp,#56+4]
502         ldr     r9, [r0,#48+LO]
503         ldr     r10, [r0,#48+HI]
504         ldr     r11, [r0,#56+LO]
505         ldr     r12, [r0,#56+HI]
506         adds    r9,r5,r9
507         str     r9, [r0,#48+LO]
508         adc     r10,r6,r10
509         str     r10, [r0,#48+HI]
510         adds    r11,r3,r11
511         str     r11, [r0,#56+LO]
512         adc     r12,r4,r12
513         str     r12, [r0,#56+HI]
515         add     sp,sp,#640
516         sub     r14,r14,#640
518         teq     r1,r2
519         bne     .Loop
521         add     sp,sp,#8*9              @ destroy frame
522 #if __ARM_ARCH__>=5
523         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
524 #else
525         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
526         tst     lr,#1
527         moveq   pc,lr                   @ be binary compatible with V4, yet
528 .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
529 #endif
530 .size   sha512_block_data_order,.-sha512_block_data_order
531 #if __ARM_MAX_ARCH__>=7
532 .arch   armv7-a
533 .fpu    neon
535 .globl  sha512_block_data_order_neon
536 .type   sha512_block_data_order_neon,%function
537 .align  4
538 sha512_block_data_order_neon:
539 .LNEON:
540         dmb     @ errata #451034 on early Cortex A8
541         add     r2,r1,r2,lsl#7  @ len to point at the end of inp
542         adr     r3,K512
543         VFP_ABI_PUSH
544         vldmia  r0,{d16,d17,d18,d19,d20,d21,d22,d23}            @ load context
545 .Loop_neon:
546         vshr.u64        d24,d20,#14     @ 0
547 #if 0<16
548         vld1.64 {d0},[r1]!      @ handles unaligned
549 #endif
550         vshr.u64        d25,d20,#18
551 #if 0>0
552         vadd.i64        d16,d30                 @ h+=Maj from the past
553 #endif
554         vshr.u64        d26,d20,#41
555         vld1.64 {d28},[r3,:64]! @ K[i++]
556         vsli.64 d24,d20,#50
557         vsli.64 d25,d20,#46
558         vmov    d29,d20
559         vsli.64 d26,d20,#23
560 #if 0<16 && defined(__ARMEL__)
561         vrev64.8        d0,d0
562 #endif
563         veor    d25,d24
564         vbsl    d29,d21,d22             @ Ch(e,f,g)
565         vshr.u64        d24,d16,#28
566         veor    d26,d25                 @ Sigma1(e)
567         vadd.i64        d27,d29,d23
568         vshr.u64        d25,d16,#34
569         vsli.64 d24,d16,#36
570         vadd.i64        d27,d26
571         vshr.u64        d26,d16,#39
572         vadd.i64        d28,d0
573         vsli.64 d25,d16,#30
574         veor    d30,d16,d17
575         vsli.64 d26,d16,#25
576         veor    d23,d24,d25
577         vadd.i64        d27,d28
578         vbsl    d30,d18,d17             @ Maj(a,b,c)
579         veor    d23,d26                 @ Sigma0(a)
580         vadd.i64        d19,d27
581         vadd.i64        d30,d27
582         @ vadd.i64      d23,d30
583         vshr.u64        d24,d19,#14     @ 1
584 #if 1<16
585         vld1.64 {d1},[r1]!      @ handles unaligned
586 #endif
587         vshr.u64        d25,d19,#18
588 #if 1>0
589         vadd.i64        d23,d30                 @ h+=Maj from the past
590 #endif
591         vshr.u64        d26,d19,#41
592         vld1.64 {d28},[r3,:64]! @ K[i++]
593         vsli.64 d24,d19,#50
594         vsli.64 d25,d19,#46
595         vmov    d29,d19
596         vsli.64 d26,d19,#23
597 #if 1<16 && defined(__ARMEL__)
598         vrev64.8        d1,d1
599 #endif
600         veor    d25,d24
601         vbsl    d29,d20,d21             @ Ch(e,f,g)
602         vshr.u64        d24,d23,#28
603         veor    d26,d25                 @ Sigma1(e)
604         vadd.i64        d27,d29,d22
605         vshr.u64        d25,d23,#34
606         vsli.64 d24,d23,#36
607         vadd.i64        d27,d26
608         vshr.u64        d26,d23,#39
609         vadd.i64        d28,d1
610         vsli.64 d25,d23,#30
611         veor    d30,d23,d16
612         vsli.64 d26,d23,#25
613         veor    d22,d24,d25
614         vadd.i64        d27,d28
615         vbsl    d30,d17,d16             @ Maj(a,b,c)
616         veor    d22,d26                 @ Sigma0(a)
617         vadd.i64        d18,d27
618         vadd.i64        d30,d27
619         @ vadd.i64      d22,d30
620         vshr.u64        d24,d18,#14     @ 2
621 #if 2<16
622         vld1.64 {d2},[r1]!      @ handles unaligned
623 #endif
624         vshr.u64        d25,d18,#18
625 #if 2>0
626         vadd.i64        d22,d30                 @ h+=Maj from the past
627 #endif
628         vshr.u64        d26,d18,#41
629         vld1.64 {d28},[r3,:64]! @ K[i++]
630         vsli.64 d24,d18,#50
631         vsli.64 d25,d18,#46
632         vmov    d29,d18
633         vsli.64 d26,d18,#23
634 #if 2<16 && defined(__ARMEL__)
635         vrev64.8        d2,d2
636 #endif
637         veor    d25,d24
638         vbsl    d29,d19,d20             @ Ch(e,f,g)
639         vshr.u64        d24,d22,#28
640         veor    d26,d25                 @ Sigma1(e)
641         vadd.i64        d27,d29,d21
642         vshr.u64        d25,d22,#34
643         vsli.64 d24,d22,#36
644         vadd.i64        d27,d26
645         vshr.u64        d26,d22,#39
646         vadd.i64        d28,d2
647         vsli.64 d25,d22,#30
648         veor    d30,d22,d23
649         vsli.64 d26,d22,#25
650         veor    d21,d24,d25
651         vadd.i64        d27,d28
652         vbsl    d30,d16,d23             @ Maj(a,b,c)
653         veor    d21,d26                 @ Sigma0(a)
654         vadd.i64        d17,d27
655         vadd.i64        d30,d27
656         @ vadd.i64      d21,d30
657         vshr.u64        d24,d17,#14     @ 3
658 #if 3<16
659         vld1.64 {d3},[r1]!      @ handles unaligned
660 #endif
661         vshr.u64        d25,d17,#18
662 #if 3>0
663         vadd.i64        d21,d30                 @ h+=Maj from the past
664 #endif
665         vshr.u64        d26,d17,#41
666         vld1.64 {d28},[r3,:64]! @ K[i++]
667         vsli.64 d24,d17,#50
668         vsli.64 d25,d17,#46
669         vmov    d29,d17
670         vsli.64 d26,d17,#23
671 #if 3<16 && defined(__ARMEL__)
672         vrev64.8        d3,d3
673 #endif
674         veor    d25,d24
675         vbsl    d29,d18,d19             @ Ch(e,f,g)
676         vshr.u64        d24,d21,#28
677         veor    d26,d25                 @ Sigma1(e)
678         vadd.i64        d27,d29,d20
679         vshr.u64        d25,d21,#34
680         vsli.64 d24,d21,#36
681         vadd.i64        d27,d26
682         vshr.u64        d26,d21,#39
683         vadd.i64        d28,d3
684         vsli.64 d25,d21,#30
685         veor    d30,d21,d22
686         vsli.64 d26,d21,#25
687         veor    d20,d24,d25
688         vadd.i64        d27,d28
689         vbsl    d30,d23,d22             @ Maj(a,b,c)
690         veor    d20,d26                 @ Sigma0(a)
691         vadd.i64        d16,d27
692         vadd.i64        d30,d27
693         @ vadd.i64      d20,d30
694         vshr.u64        d24,d16,#14     @ 4
695 #if 4<16
696         vld1.64 {d4},[r1]!      @ handles unaligned
697 #endif
698         vshr.u64        d25,d16,#18
699 #if 4>0
700         vadd.i64        d20,d30                 @ h+=Maj from the past
701 #endif
702         vshr.u64        d26,d16,#41
703         vld1.64 {d28},[r3,:64]! @ K[i++]
704         vsli.64 d24,d16,#50
705         vsli.64 d25,d16,#46
706         vmov    d29,d16
707         vsli.64 d26,d16,#23
708 #if 4<16 && defined(__ARMEL__)
709         vrev64.8        d4,d4
710 #endif
711         veor    d25,d24
712         vbsl    d29,d17,d18             @ Ch(e,f,g)
713         vshr.u64        d24,d20,#28
714         veor    d26,d25                 @ Sigma1(e)
715         vadd.i64        d27,d29,d19
716         vshr.u64        d25,d20,#34
717         vsli.64 d24,d20,#36
718         vadd.i64        d27,d26
719         vshr.u64        d26,d20,#39
720         vadd.i64        d28,d4
721         vsli.64 d25,d20,#30
722         veor    d30,d20,d21
723         vsli.64 d26,d20,#25
724         veor    d19,d24,d25
725         vadd.i64        d27,d28
726         vbsl    d30,d22,d21             @ Maj(a,b,c)
727         veor    d19,d26                 @ Sigma0(a)
728         vadd.i64        d23,d27
729         vadd.i64        d30,d27
730         @ vadd.i64      d19,d30
731         vshr.u64        d24,d23,#14     @ 5
732 #if 5<16
733         vld1.64 {d5},[r1]!      @ handles unaligned
734 #endif
735         vshr.u64        d25,d23,#18
736 #if 5>0
737         vadd.i64        d19,d30                 @ h+=Maj from the past
738 #endif
739         vshr.u64        d26,d23,#41
740         vld1.64 {d28},[r3,:64]! @ K[i++]
741         vsli.64 d24,d23,#50
742         vsli.64 d25,d23,#46
743         vmov    d29,d23
744         vsli.64 d26,d23,#23
745 #if 5<16 && defined(__ARMEL__)
746         vrev64.8        d5,d5
747 #endif
748         veor    d25,d24
749         vbsl    d29,d16,d17             @ Ch(e,f,g)
750         vshr.u64        d24,d19,#28
751         veor    d26,d25                 @ Sigma1(e)
752         vadd.i64        d27,d29,d18
753         vshr.u64        d25,d19,#34
754         vsli.64 d24,d19,#36
755         vadd.i64        d27,d26
756         vshr.u64        d26,d19,#39
757         vadd.i64        d28,d5
758         vsli.64 d25,d19,#30
759         veor    d30,d19,d20
760         vsli.64 d26,d19,#25
761         veor    d18,d24,d25
762         vadd.i64        d27,d28
763         vbsl    d30,d21,d20             @ Maj(a,b,c)
764         veor    d18,d26                 @ Sigma0(a)
765         vadd.i64        d22,d27
766         vadd.i64        d30,d27
767         @ vadd.i64      d18,d30
768         vshr.u64        d24,d22,#14     @ 6
769 #if 6<16
770         vld1.64 {d6},[r1]!      @ handles unaligned
771 #endif
772         vshr.u64        d25,d22,#18
773 #if 6>0
774         vadd.i64        d18,d30                 @ h+=Maj from the past
775 #endif
776         vshr.u64        d26,d22,#41
777         vld1.64 {d28},[r3,:64]! @ K[i++]
778         vsli.64 d24,d22,#50
779         vsli.64 d25,d22,#46
780         vmov    d29,d22
781         vsli.64 d26,d22,#23
782 #if 6<16 && defined(__ARMEL__)
783         vrev64.8        d6,d6
784 #endif
785         veor    d25,d24
786         vbsl    d29,d23,d16             @ Ch(e,f,g)
787         vshr.u64        d24,d18,#28
788         veor    d26,d25                 @ Sigma1(e)
789         vadd.i64        d27,d29,d17
790         vshr.u64        d25,d18,#34
791         vsli.64 d24,d18,#36
792         vadd.i64        d27,d26
793         vshr.u64        d26,d18,#39
794         vadd.i64        d28,d6
795         vsli.64 d25,d18,#30
796         veor    d30,d18,d19
797         vsli.64 d26,d18,#25
798         veor    d17,d24,d25
799         vadd.i64        d27,d28
800         vbsl    d30,d20,d19             @ Maj(a,b,c)
801         veor    d17,d26                 @ Sigma0(a)
802         vadd.i64        d21,d27
803         vadd.i64        d30,d27
804         @ vadd.i64      d17,d30
805         vshr.u64        d24,d21,#14     @ 7
806 #if 7<16
807         vld1.64 {d7},[r1]!      @ handles unaligned
808 #endif
809         vshr.u64        d25,d21,#18
810 #if 7>0
811         vadd.i64        d17,d30                 @ h+=Maj from the past
812 #endif
813         vshr.u64        d26,d21,#41
814         vld1.64 {d28},[r3,:64]! @ K[i++]
815         vsli.64 d24,d21,#50
816         vsli.64 d25,d21,#46
817         vmov    d29,d21
818         vsli.64 d26,d21,#23
819 #if 7<16 && defined(__ARMEL__)
820         vrev64.8        d7,d7
821 #endif
822         veor    d25,d24
823         vbsl    d29,d22,d23             @ Ch(e,f,g)
824         vshr.u64        d24,d17,#28
825         veor    d26,d25                 @ Sigma1(e)
826         vadd.i64        d27,d29,d16
827         vshr.u64        d25,d17,#34
828         vsli.64 d24,d17,#36
829         vadd.i64        d27,d26
830         vshr.u64        d26,d17,#39
831         vadd.i64        d28,d7
832         vsli.64 d25,d17,#30
833         veor    d30,d17,d18
834         vsli.64 d26,d17,#25
835         veor    d16,d24,d25
836         vadd.i64        d27,d28
837         vbsl    d30,d19,d18             @ Maj(a,b,c)
838         veor    d16,d26                 @ Sigma0(a)
839         vadd.i64        d20,d27
840         vadd.i64        d30,d27
841         @ vadd.i64      d16,d30
842         vshr.u64        d24,d20,#14     @ 8
843 #if 8<16
844         vld1.64 {d8},[r1]!      @ handles unaligned
845 #endif
846         vshr.u64        d25,d20,#18
847 #if 8>0
848         vadd.i64        d16,d30                 @ h+=Maj from the past
849 #endif
850         vshr.u64        d26,d20,#41
851         vld1.64 {d28},[r3,:64]! @ K[i++]
852         vsli.64 d24,d20,#50
853         vsli.64 d25,d20,#46
854         vmov    d29,d20
855         vsli.64 d26,d20,#23
856 #if 8<16 && defined(__ARMEL__)
857         vrev64.8        d8,d8
858 #endif
859         veor    d25,d24
860         vbsl    d29,d21,d22             @ Ch(e,f,g)
861         vshr.u64        d24,d16,#28
862         veor    d26,d25                 @ Sigma1(e)
863         vadd.i64        d27,d29,d23
864         vshr.u64        d25,d16,#34
865         vsli.64 d24,d16,#36
866         vadd.i64        d27,d26
867         vshr.u64        d26,d16,#39
868         vadd.i64        d28,d8
869         vsli.64 d25,d16,#30
870         veor    d30,d16,d17
871         vsli.64 d26,d16,#25
872         veor    d23,d24,d25
873         vadd.i64        d27,d28
874         vbsl    d30,d18,d17             @ Maj(a,b,c)
875         veor    d23,d26                 @ Sigma0(a)
876         vadd.i64        d19,d27
877         vadd.i64        d30,d27
878         @ vadd.i64      d23,d30
879         vshr.u64        d24,d19,#14     @ 9
880 #if 9<16
881         vld1.64 {d9},[r1]!      @ handles unaligned
882 #endif
883         vshr.u64        d25,d19,#18
884 #if 9>0
885         vadd.i64        d23,d30                 @ h+=Maj from the past
886 #endif
887         vshr.u64        d26,d19,#41
888         vld1.64 {d28},[r3,:64]! @ K[i++]
889         vsli.64 d24,d19,#50
890         vsli.64 d25,d19,#46
891         vmov    d29,d19
892         vsli.64 d26,d19,#23
893 #if 9<16 && defined(__ARMEL__)
894         vrev64.8        d9,d9
895 #endif
896         veor    d25,d24
897         vbsl    d29,d20,d21             @ Ch(e,f,g)
898         vshr.u64        d24,d23,#28
899         veor    d26,d25                 @ Sigma1(e)
900         vadd.i64        d27,d29,d22
901         vshr.u64        d25,d23,#34
902         vsli.64 d24,d23,#36
903         vadd.i64        d27,d26
904         vshr.u64        d26,d23,#39
905         vadd.i64        d28,d9
906         vsli.64 d25,d23,#30
907         veor    d30,d23,d16
908         vsli.64 d26,d23,#25
909         veor    d22,d24,d25
910         vadd.i64        d27,d28
911         vbsl    d30,d17,d16             @ Maj(a,b,c)
912         veor    d22,d26                 @ Sigma0(a)
913         vadd.i64        d18,d27
914         vadd.i64        d30,d27
915         @ vadd.i64      d22,d30
916         vshr.u64        d24,d18,#14     @ 10
917 #if 10<16
918         vld1.64 {d10},[r1]!     @ handles unaligned
919 #endif
920         vshr.u64        d25,d18,#18
921 #if 10>0
922         vadd.i64        d22,d30                 @ h+=Maj from the past
923 #endif
924         vshr.u64        d26,d18,#41
925         vld1.64 {d28},[r3,:64]! @ K[i++]
926         vsli.64 d24,d18,#50
927         vsli.64 d25,d18,#46
928         vmov    d29,d18
929         vsli.64 d26,d18,#23
930 #if 10<16 && defined(__ARMEL__)
931         vrev64.8        d10,d10
932 #endif
933         veor    d25,d24
934         vbsl    d29,d19,d20             @ Ch(e,f,g)
935         vshr.u64        d24,d22,#28
936         veor    d26,d25                 @ Sigma1(e)
937         vadd.i64        d27,d29,d21
938         vshr.u64        d25,d22,#34
939         vsli.64 d24,d22,#36
940         vadd.i64        d27,d26
941         vshr.u64        d26,d22,#39
942         vadd.i64        d28,d10
943         vsli.64 d25,d22,#30
944         veor    d30,d22,d23
945         vsli.64 d26,d22,#25
946         veor    d21,d24,d25
947         vadd.i64        d27,d28
948         vbsl    d30,d16,d23             @ Maj(a,b,c)
949         veor    d21,d26                 @ Sigma0(a)
950         vadd.i64        d17,d27
951         vadd.i64        d30,d27
952         @ vadd.i64      d21,d30
953         vshr.u64        d24,d17,#14     @ 11
954 #if 11<16
955         vld1.64 {d11},[r1]!     @ handles unaligned
956 #endif
957         vshr.u64        d25,d17,#18
958 #if 11>0
959         vadd.i64        d21,d30                 @ h+=Maj from the past
960 #endif
961         vshr.u64        d26,d17,#41
962         vld1.64 {d28},[r3,:64]! @ K[i++]
963         vsli.64 d24,d17,#50
964         vsli.64 d25,d17,#46
965         vmov    d29,d17
966         vsli.64 d26,d17,#23
967 #if 11<16 && defined(__ARMEL__)
968         vrev64.8        d11,d11
969 #endif
970         veor    d25,d24
971         vbsl    d29,d18,d19             @ Ch(e,f,g)
972         vshr.u64        d24,d21,#28
973         veor    d26,d25                 @ Sigma1(e)
974         vadd.i64        d27,d29,d20
975         vshr.u64        d25,d21,#34
976         vsli.64 d24,d21,#36
977         vadd.i64        d27,d26
978         vshr.u64        d26,d21,#39
979         vadd.i64        d28,d11
980         vsli.64 d25,d21,#30
981         veor    d30,d21,d22
982         vsli.64 d26,d21,#25
983         veor    d20,d24,d25
984         vadd.i64        d27,d28
985         vbsl    d30,d23,d22             @ Maj(a,b,c)
986         veor    d20,d26                 @ Sigma0(a)
987         vadd.i64        d16,d27
988         vadd.i64        d30,d27
989         @ vadd.i64      d20,d30
990         vshr.u64        d24,d16,#14     @ 12
991 #if 12<16
992         vld1.64 {d12},[r1]!     @ handles unaligned
993 #endif
994         vshr.u64        d25,d16,#18
995 #if 12>0
996         vadd.i64        d20,d30                 @ h+=Maj from the past
997 #endif
998         vshr.u64        d26,d16,#41
999         vld1.64 {d28},[r3,:64]! @ K[i++]
1000         vsli.64 d24,d16,#50
1001         vsli.64 d25,d16,#46
1002         vmov    d29,d16
1003         vsli.64 d26,d16,#23
1004 #if 12<16 && defined(__ARMEL__)
1005         vrev64.8        d12,d12
1006 #endif
1007         veor    d25,d24
1008         vbsl    d29,d17,d18             @ Ch(e,f,g)
1009         vshr.u64        d24,d20,#28
1010         veor    d26,d25                 @ Sigma1(e)
1011         vadd.i64        d27,d29,d19
1012         vshr.u64        d25,d20,#34
1013         vsli.64 d24,d20,#36
1014         vadd.i64        d27,d26
1015         vshr.u64        d26,d20,#39
1016         vadd.i64        d28,d12
1017         vsli.64 d25,d20,#30
1018         veor    d30,d20,d21
1019         vsli.64 d26,d20,#25
1020         veor    d19,d24,d25
1021         vadd.i64        d27,d28
1022         vbsl    d30,d22,d21             @ Maj(a,b,c)
1023         veor    d19,d26                 @ Sigma0(a)
1024         vadd.i64        d23,d27
1025         vadd.i64        d30,d27
1026         @ vadd.i64      d19,d30
1027         vshr.u64        d24,d23,#14     @ 13
1028 #if 13<16
1029         vld1.64 {d13},[r1]!     @ handles unaligned
1030 #endif
1031         vshr.u64        d25,d23,#18
1032 #if 13>0
1033         vadd.i64        d19,d30                 @ h+=Maj from the past
1034 #endif
1035         vshr.u64        d26,d23,#41
1036         vld1.64 {d28},[r3,:64]! @ K[i++]
1037         vsli.64 d24,d23,#50
1038         vsli.64 d25,d23,#46
1039         vmov    d29,d23
1040         vsli.64 d26,d23,#23
1041 #if 13<16 && defined(__ARMEL__)
1042         vrev64.8        d13,d13
1043 #endif
1044         veor    d25,d24
1045         vbsl    d29,d16,d17             @ Ch(e,f,g)
1046         vshr.u64        d24,d19,#28
1047         veor    d26,d25                 @ Sigma1(e)
1048         vadd.i64        d27,d29,d18
1049         vshr.u64        d25,d19,#34
1050         vsli.64 d24,d19,#36
1051         vadd.i64        d27,d26
1052         vshr.u64        d26,d19,#39
1053         vadd.i64        d28,d13
1054         vsli.64 d25,d19,#30
1055         veor    d30,d19,d20
1056         vsli.64 d26,d19,#25
1057         veor    d18,d24,d25
1058         vadd.i64        d27,d28
1059         vbsl    d30,d21,d20             @ Maj(a,b,c)
1060         veor    d18,d26                 @ Sigma0(a)
1061         vadd.i64        d22,d27
1062         vadd.i64        d30,d27
1063         @ vadd.i64      d18,d30
1064         vshr.u64        d24,d22,#14     @ 14
1065 #if 14<16
1066         vld1.64 {d14},[r1]!     @ handles unaligned
1067 #endif
1068         vshr.u64        d25,d22,#18
1069 #if 14>0
1070         vadd.i64        d18,d30                 @ h+=Maj from the past
1071 #endif
1072         vshr.u64        d26,d22,#41
1073         vld1.64 {d28},[r3,:64]! @ K[i++]
1074         vsli.64 d24,d22,#50
1075         vsli.64 d25,d22,#46
1076         vmov    d29,d22
1077         vsli.64 d26,d22,#23
1078 #if 14<16 && defined(__ARMEL__)
1079         vrev64.8        d14,d14
1080 #endif
1081         veor    d25,d24
1082         vbsl    d29,d23,d16             @ Ch(e,f,g)
1083         vshr.u64        d24,d18,#28
1084         veor    d26,d25                 @ Sigma1(e)
1085         vadd.i64        d27,d29,d17
1086         vshr.u64        d25,d18,#34
1087         vsli.64 d24,d18,#36
1088         vadd.i64        d27,d26
1089         vshr.u64        d26,d18,#39
1090         vadd.i64        d28,d14
1091         vsli.64 d25,d18,#30
1092         veor    d30,d18,d19
1093         vsli.64 d26,d18,#25
1094         veor    d17,d24,d25
1095         vadd.i64        d27,d28
1096         vbsl    d30,d20,d19             @ Maj(a,b,c)
1097         veor    d17,d26                 @ Sigma0(a)
1098         vadd.i64        d21,d27
1099         vadd.i64        d30,d27
1100         @ vadd.i64      d17,d30
1101         vshr.u64        d24,d21,#14     @ 15
1102 #if 15<16
1103         vld1.64 {d15},[r1]!     @ handles unaligned
1104 #endif
1105         vshr.u64        d25,d21,#18
1106 #if 15>0
1107         vadd.i64        d17,d30                 @ h+=Maj from the past
1108 #endif
1109         vshr.u64        d26,d21,#41
1110         vld1.64 {d28},[r3,:64]! @ K[i++]
1111         vsli.64 d24,d21,#50
1112         vsli.64 d25,d21,#46
1113         vmov    d29,d21
1114         vsli.64 d26,d21,#23
1115 #if 15<16 && defined(__ARMEL__)
1116         vrev64.8        d15,d15
1117 #endif
1118         veor    d25,d24
1119         vbsl    d29,d22,d23             @ Ch(e,f,g)
1120         vshr.u64        d24,d17,#28
1121         veor    d26,d25                 @ Sigma1(e)
1122         vadd.i64        d27,d29,d16
1123         vshr.u64        d25,d17,#34
1124         vsli.64 d24,d17,#36
1125         vadd.i64        d27,d26
1126         vshr.u64        d26,d17,#39
1127         vadd.i64        d28,d15
1128         vsli.64 d25,d17,#30
1129         veor    d30,d17,d18
1130         vsli.64 d26,d17,#25
1131         veor    d16,d24,d25
1132         vadd.i64        d27,d28
1133         vbsl    d30,d19,d18             @ Maj(a,b,c)
1134         veor    d16,d26                 @ Sigma0(a)
1135         vadd.i64        d20,d27
1136         vadd.i64        d30,d27
1137         @ vadd.i64      d16,d30
1138         mov     r12,#4
1139 .L16_79_neon:
1140         subs    r12,#1
1141         vshr.u64        q12,q7,#19
1142         vshr.u64        q13,q7,#61
1143         vadd.i64        d16,d30                 @ h+=Maj from the past
1144         vshr.u64        q15,q7,#6
1145         vsli.64 q12,q7,#45
1146         vext.8  q14,q0,q1,#8    @ X[i+1]
1147         vsli.64 q13,q7,#3
1148         veor    q15,q12
1149         vshr.u64        q12,q14,#1
1150         veor    q15,q13                         @ sigma1(X[i+14])
1151         vshr.u64        q13,q14,#8
1152         vadd.i64        q0,q15
1153         vshr.u64        q15,q14,#7
1154         vsli.64 q12,q14,#63
1155         vsli.64 q13,q14,#56
1156         vext.8  q14,q4,q5,#8    @ X[i+9]
1157         veor    q15,q12
1158         vshr.u64        d24,d20,#14             @ from NEON_00_15
1159         vadd.i64        q0,q14
1160         vshr.u64        d25,d20,#18             @ from NEON_00_15
1161         veor    q15,q13                         @ sigma0(X[i+1])
1162         vshr.u64        d26,d20,#41             @ from NEON_00_15
1163         vadd.i64        q0,q15
1164         vld1.64 {d28},[r3,:64]! @ K[i++]
1165         vsli.64 d24,d20,#50
1166         vsli.64 d25,d20,#46
1167         vmov    d29,d20
1168         vsli.64 d26,d20,#23
1169 #if 16<16 && defined(__ARMEL__)
1170         vrev64.8        ,
1171 #endif
1172         veor    d25,d24
1173         vbsl    d29,d21,d22             @ Ch(e,f,g)
1174         vshr.u64        d24,d16,#28
1175         veor    d26,d25                 @ Sigma1(e)
1176         vadd.i64        d27,d29,d23
1177         vshr.u64        d25,d16,#34
1178         vsli.64 d24,d16,#36
1179         vadd.i64        d27,d26
1180         vshr.u64        d26,d16,#39
1181         vadd.i64        d28,d0
1182         vsli.64 d25,d16,#30
1183         veor    d30,d16,d17
1184         vsli.64 d26,d16,#25
1185         veor    d23,d24,d25
1186         vadd.i64        d27,d28
1187         vbsl    d30,d18,d17             @ Maj(a,b,c)
1188         veor    d23,d26                 @ Sigma0(a)
1189         vadd.i64        d19,d27
1190         vadd.i64        d30,d27
1191         @ vadd.i64      d23,d30
1192         vshr.u64        d24,d19,#14     @ 17
1193 #if 17<16
1194         vld1.64 {d1},[r1]!      @ handles unaligned
1195 #endif
1196         vshr.u64        d25,d19,#18
1197 #if 17>0
1198         vadd.i64        d23,d30                 @ h+=Maj from the past
1199 #endif
1200         vshr.u64        d26,d19,#41
1201         vld1.64 {d28},[r3,:64]! @ K[i++]
1202         vsli.64 d24,d19,#50
1203         vsli.64 d25,d19,#46
1204         vmov    d29,d19
1205         vsli.64 d26,d19,#23
1206 #if 17<16 && defined(__ARMEL__)
1207         vrev64.8        ,
1208 #endif
1209         veor    d25,d24
1210         vbsl    d29,d20,d21             @ Ch(e,f,g)
1211         vshr.u64        d24,d23,#28
1212         veor    d26,d25                 @ Sigma1(e)
1213         vadd.i64        d27,d29,d22
1214         vshr.u64        d25,d23,#34
1215         vsli.64 d24,d23,#36
1216         vadd.i64        d27,d26
1217         vshr.u64        d26,d23,#39
1218         vadd.i64        d28,d1
1219         vsli.64 d25,d23,#30
1220         veor    d30,d23,d16
1221         vsli.64 d26,d23,#25
1222         veor    d22,d24,d25
1223         vadd.i64        d27,d28
1224         vbsl    d30,d17,d16             @ Maj(a,b,c)
1225         veor    d22,d26                 @ Sigma0(a)
1226         vadd.i64        d18,d27
1227         vadd.i64        d30,d27
1228         @ vadd.i64      d22,d30
1229         vshr.u64        q12,q0,#19
1230         vshr.u64        q13,q0,#61
1231         vadd.i64        d22,d30                 @ h+=Maj from the past
1232         vshr.u64        q15,q0,#6
1233         vsli.64 q12,q0,#45
1234         vext.8  q14,q1,q2,#8    @ X[i+1]
1235         vsli.64 q13,q0,#3
1236         veor    q15,q12
1237         vshr.u64        q12,q14,#1
1238         veor    q15,q13                         @ sigma1(X[i+14])
1239         vshr.u64        q13,q14,#8
1240         vadd.i64        q1,q15
1241         vshr.u64        q15,q14,#7
1242         vsli.64 q12,q14,#63
1243         vsli.64 q13,q14,#56
1244         vext.8  q14,q5,q6,#8    @ X[i+9]
1245         veor    q15,q12
1246         vshr.u64        d24,d18,#14             @ from NEON_00_15
1247         vadd.i64        q1,q14
1248         vshr.u64        d25,d18,#18             @ from NEON_00_15
1249         veor    q15,q13                         @ sigma0(X[i+1])
1250         vshr.u64        d26,d18,#41             @ from NEON_00_15
1251         vadd.i64        q1,q15
1252         vld1.64 {d28},[r3,:64]! @ K[i++]
1253         vsli.64 d24,d18,#50
1254         vsli.64 d25,d18,#46
1255         vmov    d29,d18
1256         vsli.64 d26,d18,#23
1257 #if 18<16 && defined(__ARMEL__)
1258         vrev64.8        ,
1259 #endif
1260         veor    d25,d24
1261         vbsl    d29,d19,d20             @ Ch(e,f,g)
1262         vshr.u64        d24,d22,#28
1263         veor    d26,d25                 @ Sigma1(e)
1264         vadd.i64        d27,d29,d21
1265         vshr.u64        d25,d22,#34
1266         vsli.64 d24,d22,#36
1267         vadd.i64        d27,d26
1268         vshr.u64        d26,d22,#39
1269         vadd.i64        d28,d2
1270         vsli.64 d25,d22,#30
1271         veor    d30,d22,d23
1272         vsli.64 d26,d22,#25
1273         veor    d21,d24,d25
1274         vadd.i64        d27,d28
1275         vbsl    d30,d16,d23             @ Maj(a,b,c)
1276         veor    d21,d26                 @ Sigma0(a)
1277         vadd.i64        d17,d27
1278         vadd.i64        d30,d27
1279         @ vadd.i64      d21,d30
1280         vshr.u64        d24,d17,#14     @ 19
1281 #if 19<16
1282         vld1.64 {d3},[r1]!      @ handles unaligned
1283 #endif
1284         vshr.u64        d25,d17,#18
1285 #if 19>0
1286         vadd.i64        d21,d30                 @ h+=Maj from the past
1287 #endif
1288         vshr.u64        d26,d17,#41
1289         vld1.64 {d28},[r3,:64]! @ K[i++]
1290         vsli.64 d24,d17,#50
1291         vsli.64 d25,d17,#46
1292         vmov    d29,d17
1293         vsli.64 d26,d17,#23
1294 #if 19<16 && defined(__ARMEL__)
1295         vrev64.8        ,
1296 #endif
1297         veor    d25,d24
1298         vbsl    d29,d18,d19             @ Ch(e,f,g)
1299         vshr.u64        d24,d21,#28
1300         veor    d26,d25                 @ Sigma1(e)
1301         vadd.i64        d27,d29,d20
1302         vshr.u64        d25,d21,#34
1303         vsli.64 d24,d21,#36
1304         vadd.i64        d27,d26
1305         vshr.u64        d26,d21,#39
1306         vadd.i64        d28,d3
1307         vsli.64 d25,d21,#30
1308         veor    d30,d21,d22
1309         vsli.64 d26,d21,#25
1310         veor    d20,d24,d25
1311         vadd.i64        d27,d28
1312         vbsl    d30,d23,d22             @ Maj(a,b,c)
1313         veor    d20,d26                 @ Sigma0(a)
1314         vadd.i64        d16,d27
1315         vadd.i64        d30,d27
1316         @ vadd.i64      d20,d30
1317         vshr.u64        q12,q1,#19
1318         vshr.u64        q13,q1,#61
1319         vadd.i64        d20,d30                 @ h+=Maj from the past
1320         vshr.u64        q15,q1,#6
1321         vsli.64 q12,q1,#45
1322         vext.8  q14,q2,q3,#8    @ X[i+1]
1323         vsli.64 q13,q1,#3
1324         veor    q15,q12
1325         vshr.u64        q12,q14,#1
1326         veor    q15,q13                         @ sigma1(X[i+14])
1327         vshr.u64        q13,q14,#8
1328         vadd.i64        q2,q15
1329         vshr.u64        q15,q14,#7
1330         vsli.64 q12,q14,#63
1331         vsli.64 q13,q14,#56
1332         vext.8  q14,q6,q7,#8    @ X[i+9]
1333         veor    q15,q12
1334         vshr.u64        d24,d16,#14             @ from NEON_00_15
1335         vadd.i64        q2,q14
1336         vshr.u64        d25,d16,#18             @ from NEON_00_15
1337         veor    q15,q13                         @ sigma0(X[i+1])
1338         vshr.u64        d26,d16,#41             @ from NEON_00_15
1339         vadd.i64        q2,q15
1340         vld1.64 {d28},[r3,:64]! @ K[i++]
1341         vsli.64 d24,d16,#50
1342         vsli.64 d25,d16,#46
1343         vmov    d29,d16
1344         vsli.64 d26,d16,#23
1345 #if 20<16 && defined(__ARMEL__)
1346         vrev64.8        ,
1347 #endif
1348         veor    d25,d24
1349         vbsl    d29,d17,d18             @ Ch(e,f,g)
1350         vshr.u64        d24,d20,#28
1351         veor    d26,d25                 @ Sigma1(e)
1352         vadd.i64        d27,d29,d19
1353         vshr.u64        d25,d20,#34
1354         vsli.64 d24,d20,#36
1355         vadd.i64        d27,d26
1356         vshr.u64        d26,d20,#39
1357         vadd.i64        d28,d4
1358         vsli.64 d25,d20,#30
1359         veor    d30,d20,d21
1360         vsli.64 d26,d20,#25
1361         veor    d19,d24,d25
1362         vadd.i64        d27,d28
1363         vbsl    d30,d22,d21             @ Maj(a,b,c)
1364         veor    d19,d26                 @ Sigma0(a)
1365         vadd.i64        d23,d27
1366         vadd.i64        d30,d27
1367         @ vadd.i64      d19,d30
1368         vshr.u64        d24,d23,#14     @ 21
1369 #if 21<16
1370         vld1.64 {d5},[r1]!      @ handles unaligned
1371 #endif
1372         vshr.u64        d25,d23,#18
1373 #if 21>0
1374         vadd.i64        d19,d30                 @ h+=Maj from the past
1375 #endif
1376         vshr.u64        d26,d23,#41
1377         vld1.64 {d28},[r3,:64]! @ K[i++]
1378         vsli.64 d24,d23,#50
1379         vsli.64 d25,d23,#46
1380         vmov    d29,d23
1381         vsli.64 d26,d23,#23
1382 #if 21<16 && defined(__ARMEL__)
1383         vrev64.8        ,
1384 #endif
1385         veor    d25,d24
1386         vbsl    d29,d16,d17             @ Ch(e,f,g)
1387         vshr.u64        d24,d19,#28
1388         veor    d26,d25                 @ Sigma1(e)
1389         vadd.i64        d27,d29,d18
1390         vshr.u64        d25,d19,#34
1391         vsli.64 d24,d19,#36
1392         vadd.i64        d27,d26
1393         vshr.u64        d26,d19,#39
1394         vadd.i64        d28,d5
1395         vsli.64 d25,d19,#30
1396         veor    d30,d19,d20
1397         vsli.64 d26,d19,#25
1398         veor    d18,d24,d25
1399         vadd.i64        d27,d28
1400         vbsl    d30,d21,d20             @ Maj(a,b,c)
1401         veor    d18,d26                 @ Sigma0(a)
1402         vadd.i64        d22,d27
1403         vadd.i64        d30,d27
1404         @ vadd.i64      d18,d30
1405         vshr.u64        q12,q2,#19
1406         vshr.u64        q13,q2,#61
1407         vadd.i64        d18,d30                 @ h+=Maj from the past
1408         vshr.u64        q15,q2,#6
1409         vsli.64 q12,q2,#45
1410         vext.8  q14,q3,q4,#8    @ X[i+1]
1411         vsli.64 q13,q2,#3
1412         veor    q15,q12
1413         vshr.u64        q12,q14,#1
1414         veor    q15,q13                         @ sigma1(X[i+14])
1415         vshr.u64        q13,q14,#8
1416         vadd.i64        q3,q15
1417         vshr.u64        q15,q14,#7
1418         vsli.64 q12,q14,#63
1419         vsli.64 q13,q14,#56
1420         vext.8  q14,q7,q0,#8    @ X[i+9]
1421         veor    q15,q12
1422         vshr.u64        d24,d22,#14             @ from NEON_00_15
1423         vadd.i64        q3,q14
1424         vshr.u64        d25,d22,#18             @ from NEON_00_15
1425         veor    q15,q13                         @ sigma0(X[i+1])
1426         vshr.u64        d26,d22,#41             @ from NEON_00_15
1427         vadd.i64        q3,q15
1428         vld1.64 {d28},[r3,:64]! @ K[i++]
1429         vsli.64 d24,d22,#50
1430         vsli.64 d25,d22,#46
1431         vmov    d29,d22
1432         vsli.64 d26,d22,#23
1433 #if 22<16 && defined(__ARMEL__)
1434         vrev64.8        ,
1435 #endif
1436         veor    d25,d24
1437         vbsl    d29,d23,d16             @ Ch(e,f,g)
1438         vshr.u64        d24,d18,#28
1439         veor    d26,d25                 @ Sigma1(e)
1440         vadd.i64        d27,d29,d17
1441         vshr.u64        d25,d18,#34
1442         vsli.64 d24,d18,#36
1443         vadd.i64        d27,d26
1444         vshr.u64        d26,d18,#39
1445         vadd.i64        d28,d6
1446         vsli.64 d25,d18,#30
1447         veor    d30,d18,d19
1448         vsli.64 d26,d18,#25
1449         veor    d17,d24,d25
1450         vadd.i64        d27,d28
1451         vbsl    d30,d20,d19             @ Maj(a,b,c)
1452         veor    d17,d26                 @ Sigma0(a)
1453         vadd.i64        d21,d27
1454         vadd.i64        d30,d27
1455         @ vadd.i64      d17,d30
1456         vshr.u64        d24,d21,#14     @ 23
1457 #if 23<16
1458         vld1.64 {d7},[r1]!      @ handles unaligned
1459 #endif
1460         vshr.u64        d25,d21,#18
1461 #if 23>0
1462         vadd.i64        d17,d30                 @ h+=Maj from the past
1463 #endif
1464         vshr.u64        d26,d21,#41
1465         vld1.64 {d28},[r3,:64]! @ K[i++]
1466         vsli.64 d24,d21,#50
1467         vsli.64 d25,d21,#46
1468         vmov    d29,d21
1469         vsli.64 d26,d21,#23
1470 #if 23<16 && defined(__ARMEL__)
1471         vrev64.8        ,
1472 #endif
1473         veor    d25,d24
1474         vbsl    d29,d22,d23             @ Ch(e,f,g)
1475         vshr.u64        d24,d17,#28
1476         veor    d26,d25                 @ Sigma1(e)
1477         vadd.i64        d27,d29,d16
1478         vshr.u64        d25,d17,#34
1479         vsli.64 d24,d17,#36
1480         vadd.i64        d27,d26
1481         vshr.u64        d26,d17,#39
1482         vadd.i64        d28,d7
1483         vsli.64 d25,d17,#30
1484         veor    d30,d17,d18
1485         vsli.64 d26,d17,#25
1486         veor    d16,d24,d25
1487         vadd.i64        d27,d28
1488         vbsl    d30,d19,d18             @ Maj(a,b,c)
1489         veor    d16,d26                 @ Sigma0(a)
1490         vadd.i64        d20,d27
1491         vadd.i64        d30,d27
1492         @ vadd.i64      d16,d30
1493         vshr.u64        q12,q3,#19
1494         vshr.u64        q13,q3,#61
1495         vadd.i64        d16,d30                 @ h+=Maj from the past
1496         vshr.u64        q15,q3,#6
1497         vsli.64 q12,q3,#45
1498         vext.8  q14,q4,q5,#8    @ X[i+1]
1499         vsli.64 q13,q3,#3
1500         veor    q15,q12
1501         vshr.u64        q12,q14,#1
1502         veor    q15,q13                         @ sigma1(X[i+14])
1503         vshr.u64        q13,q14,#8
1504         vadd.i64        q4,q15
1505         vshr.u64        q15,q14,#7
1506         vsli.64 q12,q14,#63
1507         vsli.64 q13,q14,#56
1508         vext.8  q14,q0,q1,#8    @ X[i+9]
1509         veor    q15,q12
1510         vshr.u64        d24,d20,#14             @ from NEON_00_15
1511         vadd.i64        q4,q14
1512         vshr.u64        d25,d20,#18             @ from NEON_00_15
1513         veor    q15,q13                         @ sigma0(X[i+1])
1514         vshr.u64        d26,d20,#41             @ from NEON_00_15
1515         vadd.i64        q4,q15
1516         vld1.64 {d28},[r3,:64]! @ K[i++]
1517         vsli.64 d24,d20,#50
1518         vsli.64 d25,d20,#46
1519         vmov    d29,d20
1520         vsli.64 d26,d20,#23
1521 #if 24<16 && defined(__ARMEL__)
1522         vrev64.8        ,
1523 #endif
1524         veor    d25,d24
1525         vbsl    d29,d21,d22             @ Ch(e,f,g)
1526         vshr.u64        d24,d16,#28
1527         veor    d26,d25                 @ Sigma1(e)
1528         vadd.i64        d27,d29,d23
1529         vshr.u64        d25,d16,#34
1530         vsli.64 d24,d16,#36
1531         vadd.i64        d27,d26
1532         vshr.u64        d26,d16,#39
1533         vadd.i64        d28,d8
1534         vsli.64 d25,d16,#30
1535         veor    d30,d16,d17
1536         vsli.64 d26,d16,#25
1537         veor    d23,d24,d25
1538         vadd.i64        d27,d28
1539         vbsl    d30,d18,d17             @ Maj(a,b,c)
1540         veor    d23,d26                 @ Sigma0(a)
1541         vadd.i64        d19,d27
1542         vadd.i64        d30,d27
1543         @ vadd.i64      d23,d30
1544         vshr.u64        d24,d19,#14     @ 25
1545 #if 25<16
1546         vld1.64 {d9},[r1]!      @ handles unaligned
1547 #endif
1548         vshr.u64        d25,d19,#18
1549 #if 25>0
1550         vadd.i64        d23,d30                 @ h+=Maj from the past
1551 #endif
1552         vshr.u64        d26,d19,#41
1553         vld1.64 {d28},[r3,:64]! @ K[i++]
1554         vsli.64 d24,d19,#50
1555         vsli.64 d25,d19,#46
1556         vmov    d29,d19
1557         vsli.64 d26,d19,#23
1558 #if 25<16 && defined(__ARMEL__)
1559         vrev64.8        ,
1560 #endif
1561         veor    d25,d24
1562         vbsl    d29,d20,d21             @ Ch(e,f,g)
1563         vshr.u64        d24,d23,#28
1564         veor    d26,d25                 @ Sigma1(e)
1565         vadd.i64        d27,d29,d22
1566         vshr.u64        d25,d23,#34
1567         vsli.64 d24,d23,#36
1568         vadd.i64        d27,d26
1569         vshr.u64        d26,d23,#39
1570         vadd.i64        d28,d9
1571         vsli.64 d25,d23,#30
1572         veor    d30,d23,d16
1573         vsli.64 d26,d23,#25
1574         veor    d22,d24,d25
1575         vadd.i64        d27,d28
1576         vbsl    d30,d17,d16             @ Maj(a,b,c)
1577         veor    d22,d26                 @ Sigma0(a)
1578         vadd.i64        d18,d27
1579         vadd.i64        d30,d27
1580         @ vadd.i64      d22,d30
1581         vshr.u64        q12,q4,#19
1582         vshr.u64        q13,q4,#61
1583         vadd.i64        d22,d30                 @ h+=Maj from the past
1584         vshr.u64        q15,q4,#6
1585         vsli.64 q12,q4,#45
1586         vext.8  q14,q5,q6,#8    @ X[i+1]
1587         vsli.64 q13,q4,#3
1588         veor    q15,q12
1589         vshr.u64        q12,q14,#1
1590         veor    q15,q13                         @ sigma1(X[i+14])
1591         vshr.u64        q13,q14,#8
1592         vadd.i64        q5,q15
1593         vshr.u64        q15,q14,#7
1594         vsli.64 q12,q14,#63
1595         vsli.64 q13,q14,#56
1596         vext.8  q14,q1,q2,#8    @ X[i+9]
1597         veor    q15,q12
1598         vshr.u64        d24,d18,#14             @ from NEON_00_15
1599         vadd.i64        q5,q14
1600         vshr.u64        d25,d18,#18             @ from NEON_00_15
1601         veor    q15,q13                         @ sigma0(X[i+1])
1602         vshr.u64        d26,d18,#41             @ from NEON_00_15
1603         vadd.i64        q5,q15
1604         vld1.64 {d28},[r3,:64]! @ K[i++]
1605         vsli.64 d24,d18,#50
1606         vsli.64 d25,d18,#46
1607         vmov    d29,d18
1608         vsli.64 d26,d18,#23
1609 #if 26<16 && defined(__ARMEL__)
1610         vrev64.8        ,
1611 #endif
1612         veor    d25,d24
1613         vbsl    d29,d19,d20             @ Ch(e,f,g)
1614         vshr.u64        d24,d22,#28
1615         veor    d26,d25                 @ Sigma1(e)
1616         vadd.i64        d27,d29,d21
1617         vshr.u64        d25,d22,#34
1618         vsli.64 d24,d22,#36
1619         vadd.i64        d27,d26
1620         vshr.u64        d26,d22,#39
1621         vadd.i64        d28,d10
1622         vsli.64 d25,d22,#30
1623         veor    d30,d22,d23
1624         vsli.64 d26,d22,#25
1625         veor    d21,d24,d25
1626         vadd.i64        d27,d28
1627         vbsl    d30,d16,d23             @ Maj(a,b,c)
1628         veor    d21,d26                 @ Sigma0(a)
1629         vadd.i64        d17,d27
1630         vadd.i64        d30,d27
1631         @ vadd.i64      d21,d30
1632         vshr.u64        d24,d17,#14     @ 27
1633 #if 27<16
1634         vld1.64 {d11},[r1]!     @ handles unaligned
1635 #endif
1636         vshr.u64        d25,d17,#18
1637 #if 27>0
1638         vadd.i64        d21,d30                 @ h+=Maj from the past
1639 #endif
1640         vshr.u64        d26,d17,#41
1641         vld1.64 {d28},[r3,:64]! @ K[i++]
1642         vsli.64 d24,d17,#50
1643         vsli.64 d25,d17,#46
1644         vmov    d29,d17
1645         vsli.64 d26,d17,#23
1646 #if 27<16 && defined(__ARMEL__)
1647         vrev64.8        ,
1648 #endif
1649         veor    d25,d24
1650         vbsl    d29,d18,d19             @ Ch(e,f,g)
1651         vshr.u64        d24,d21,#28
1652         veor    d26,d25                 @ Sigma1(e)
1653         vadd.i64        d27,d29,d20
1654         vshr.u64        d25,d21,#34
1655         vsli.64 d24,d21,#36
1656         vadd.i64        d27,d26
1657         vshr.u64        d26,d21,#39
1658         vadd.i64        d28,d11
1659         vsli.64 d25,d21,#30
1660         veor    d30,d21,d22
1661         vsli.64 d26,d21,#25
1662         veor    d20,d24,d25
1663         vadd.i64        d27,d28
1664         vbsl    d30,d23,d22             @ Maj(a,b,c)
1665         veor    d20,d26                 @ Sigma0(a)
1666         vadd.i64        d16,d27
1667         vadd.i64        d30,d27
1668         @ vadd.i64      d20,d30
1669         vshr.u64        q12,q5,#19
1670         vshr.u64        q13,q5,#61
1671         vadd.i64        d20,d30                 @ h+=Maj from the past
1672         vshr.u64        q15,q5,#6
1673         vsli.64 q12,q5,#45
1674         vext.8  q14,q6,q7,#8    @ X[i+1]
1675         vsli.64 q13,q5,#3
1676         veor    q15,q12
1677         vshr.u64        q12,q14,#1
1678         veor    q15,q13                         @ sigma1(X[i+14])
1679         vshr.u64        q13,q14,#8
1680         vadd.i64        q6,q15
1681         vshr.u64        q15,q14,#7
1682         vsli.64 q12,q14,#63
1683         vsli.64 q13,q14,#56
1684         vext.8  q14,q2,q3,#8    @ X[i+9]
1685         veor    q15,q12
1686         vshr.u64        d24,d16,#14             @ from NEON_00_15
1687         vadd.i64        q6,q14
1688         vshr.u64        d25,d16,#18             @ from NEON_00_15
1689         veor    q15,q13                         @ sigma0(X[i+1])
1690         vshr.u64        d26,d16,#41             @ from NEON_00_15
1691         vadd.i64        q6,q15
1692         vld1.64 {d28},[r3,:64]! @ K[i++]
1693         vsli.64 d24,d16,#50
1694         vsli.64 d25,d16,#46
1695         vmov    d29,d16
1696         vsli.64 d26,d16,#23
1697 #if 28<16 && defined(__ARMEL__)
1698         vrev64.8        ,
1699 #endif
1700         veor    d25,d24
1701         vbsl    d29,d17,d18             @ Ch(e,f,g)
1702         vshr.u64        d24,d20,#28
1703         veor    d26,d25                 @ Sigma1(e)
1704         vadd.i64        d27,d29,d19
1705         vshr.u64        d25,d20,#34
1706         vsli.64 d24,d20,#36
1707         vadd.i64        d27,d26
1708         vshr.u64        d26,d20,#39
1709         vadd.i64        d28,d12
1710         vsli.64 d25,d20,#30
1711         veor    d30,d20,d21
1712         vsli.64 d26,d20,#25
1713         veor    d19,d24,d25
1714         vadd.i64        d27,d28
1715         vbsl    d30,d22,d21             @ Maj(a,b,c)
1716         veor    d19,d26                 @ Sigma0(a)
1717         vadd.i64        d23,d27
1718         vadd.i64        d30,d27
1719         @ vadd.i64      d19,d30
1720         vshr.u64        d24,d23,#14     @ 29
1721 #if 29<16
1722         vld1.64 {d13},[r1]!     @ handles unaligned
1723 #endif
1724         vshr.u64        d25,d23,#18
1725 #if 29>0
1726         vadd.i64        d19,d30                 @ h+=Maj from the past
1727 #endif
1728         vshr.u64        d26,d23,#41
1729         vld1.64 {d28},[r3,:64]! @ K[i++]
1730         vsli.64 d24,d23,#50
1731         vsli.64 d25,d23,#46
1732         vmov    d29,d23
1733         vsli.64 d26,d23,#23
1734 #if 29<16 && defined(__ARMEL__)
1735         vrev64.8        ,
1736 #endif
1737         veor    d25,d24
1738         vbsl    d29,d16,d17             @ Ch(e,f,g)
1739         vshr.u64        d24,d19,#28
1740         veor    d26,d25                 @ Sigma1(e)
1741         vadd.i64        d27,d29,d18
1742         vshr.u64        d25,d19,#34
1743         vsli.64 d24,d19,#36
1744         vadd.i64        d27,d26
1745         vshr.u64        d26,d19,#39
1746         vadd.i64        d28,d13
1747         vsli.64 d25,d19,#30
1748         veor    d30,d19,d20
1749         vsli.64 d26,d19,#25
1750         veor    d18,d24,d25
1751         vadd.i64        d27,d28
1752         vbsl    d30,d21,d20             @ Maj(a,b,c)
1753         veor    d18,d26                 @ Sigma0(a)
1754         vadd.i64        d22,d27
1755         vadd.i64        d30,d27
1756         @ vadd.i64      d18,d30
1757         vshr.u64        q12,q6,#19
1758         vshr.u64        q13,q6,#61
1759         vadd.i64        d18,d30                 @ h+=Maj from the past
1760         vshr.u64        q15,q6,#6
1761         vsli.64 q12,q6,#45
1762         vext.8  q14,q7,q0,#8    @ X[i+1]
1763         vsli.64 q13,q6,#3
1764         veor    q15,q12
1765         vshr.u64        q12,q14,#1
1766         veor    q15,q13                         @ sigma1(X[i+14])
1767         vshr.u64        q13,q14,#8
1768         vadd.i64        q7,q15
1769         vshr.u64        q15,q14,#7
1770         vsli.64 q12,q14,#63
1771         vsli.64 q13,q14,#56
1772         vext.8  q14,q3,q4,#8    @ X[i+9]
1773         veor    q15,q12
1774         vshr.u64        d24,d22,#14             @ from NEON_00_15
1775         vadd.i64        q7,q14
1776         vshr.u64        d25,d22,#18             @ from NEON_00_15
1777         veor    q15,q13                         @ sigma0(X[i+1])
1778         vshr.u64        d26,d22,#41             @ from NEON_00_15
1779         vadd.i64        q7,q15
1780         vld1.64 {d28},[r3,:64]! @ K[i++]
1781         vsli.64 d24,d22,#50
1782         vsli.64 d25,d22,#46
1783         vmov    d29,d22
1784         vsli.64 d26,d22,#23
1785 #if 30<16 && defined(__ARMEL__)
1786         vrev64.8        ,
1787 #endif
1788         veor    d25,d24
1789         vbsl    d29,d23,d16             @ Ch(e,f,g)
1790         vshr.u64        d24,d18,#28
1791         veor    d26,d25                 @ Sigma1(e)
1792         vadd.i64        d27,d29,d17
1793         vshr.u64        d25,d18,#34
1794         vsli.64 d24,d18,#36
1795         vadd.i64        d27,d26
1796         vshr.u64        d26,d18,#39
1797         vadd.i64        d28,d14
1798         vsli.64 d25,d18,#30
1799         veor    d30,d18,d19
1800         vsli.64 d26,d18,#25
1801         veor    d17,d24,d25
1802         vadd.i64        d27,d28
1803         vbsl    d30,d20,d19             @ Maj(a,b,c)
1804         veor    d17,d26                 @ Sigma0(a)
1805         vadd.i64        d21,d27
1806         vadd.i64        d30,d27
1807         @ vadd.i64      d17,d30
1808         vshr.u64        d24,d21,#14     @ 31
1809 #if 31<16
1810         vld1.64 {d15},[r1]!     @ handles unaligned
1811 #endif
1812         vshr.u64        d25,d21,#18
1813 #if 31>0
1814         vadd.i64        d17,d30                 @ h+=Maj from the past
1815 #endif
1816         vshr.u64        d26,d21,#41
1817         vld1.64 {d28},[r3,:64]! @ K[i++]
1818         vsli.64 d24,d21,#50
1819         vsli.64 d25,d21,#46
1820         vmov    d29,d21
1821         vsli.64 d26,d21,#23
1822 #if 31<16 && defined(__ARMEL__)
1823         vrev64.8        ,
1824 #endif
1825         veor    d25,d24
1826         vbsl    d29,d22,d23             @ Ch(e,f,g)
1827         vshr.u64        d24,d17,#28
1828         veor    d26,d25                 @ Sigma1(e)
1829         vadd.i64        d27,d29,d16
1830         vshr.u64        d25,d17,#34
1831         vsli.64 d24,d17,#36
1832         vadd.i64        d27,d26
1833         vshr.u64        d26,d17,#39
1834         vadd.i64        d28,d15
1835         vsli.64 d25,d17,#30
1836         veor    d30,d17,d18
1837         vsli.64 d26,d17,#25
1838         veor    d16,d24,d25
1839         vadd.i64        d27,d28
1840         vbsl    d30,d19,d18             @ Maj(a,b,c)
1841         veor    d16,d26                 @ Sigma0(a)
1842         vadd.i64        d20,d27
1843         vadd.i64        d30,d27
1844         @ vadd.i64      d16,d30
1845         bne     .L16_79_neon
1847         vadd.i64        d16,d30         @ h+=Maj from the past
1848         vldmia  r0,{d24,d25,d26,d27,d28,d29,d30,d31}    @ load context to temp
1849         vadd.i64        q8,q12          @ vectorized accumulate
1850         vadd.i64        q9,q13
1851         vadd.i64        q10,q14
1852         vadd.i64        q11,q15
1853         vstmia  r0,{d16,d17,d18,d19,d20,d21,d22,d23}    @ save context
1854         teq     r1,r2
1855         sub     r3,#640 @ rewind K512
1856         bne     .Loop_neon
1858         VFP_ABI_POP
1859         bx      lr                              @ .word 0xe12fff1e
1860 .size   sha512_block_data_order_neon,.-sha512_block_data_order_neon
1861 #endif
1862 .byte   83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1863 .align  2
1864 .align  2
1865 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
1866 .comm   OPENSSL_armcap_P,4,4
1867 .hidden OPENSSL_armcap_P
1868 #endif
1869 #endif