Revert of Update the extension whitelist for application host change. (patchset ...
[chromium-blink-merge.git] / third_party / boringssl / linux-arm / crypto / sha / sha1-armv4-large.S
blob5abc32815c29d62c545f9a8966d96ec9511b36af
1 #include "arm_arch.h"
3 .text
4 .code   32
6 .global sha1_block_data_order
7 .type   sha1_block_data_order,%function
9 .align  5
10 sha1_block_data_order:
11 #if __ARM_MAX_ARCH__>=7
12         sub     r3,pc,#8                @ sha1_block_data_order
13         ldr     r12,.LOPENSSL_armcap
14         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
15         tst     r12,#ARMV8_SHA1
16         bne     .LARMv8
17         tst     r12,#ARMV7_NEON
18         bne     .LNEON
19 #endif
20         stmdb   sp!,{r4-r12,lr}
21         add     r2,r1,r2,lsl#6  @ r2 to point at the end of r1
22         ldmia   r0,{r3,r4,r5,r6,r7}
23 .Lloop:
24         ldr     r8,.LK_00_19
25         mov     r14,sp
26         sub     sp,sp,#15*4
27         mov     r5,r5,ror#30
28         mov     r6,r6,ror#30
29         mov     r7,r7,ror#30            @ [6]
30 .L_00_15:
31 #if __ARM_ARCH__<7
32         ldrb    r10,[r1,#2]
33         ldrb    r9,[r1,#3]
34         ldrb    r11,[r1,#1]
35         add     r7,r8,r7,ror#2                  @ E+=K_00_19
36         ldrb    r12,[r1],#4
37         orr     r9,r9,r10,lsl#8
38         eor     r10,r5,r6                       @ F_xx_xx
39         orr     r9,r9,r11,lsl#16
40         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
41         orr     r9,r9,r12,lsl#24
42 #else
43         ldr     r9,[r1],#4                      @ handles unaligned
44         add     r7,r8,r7,ror#2                  @ E+=K_00_19
45         eor     r10,r5,r6                       @ F_xx_xx
46         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
47 #ifdef __ARMEL__
48         rev     r9,r9                           @ byte swap
49 #endif
50 #endif
51         and     r10,r4,r10,ror#2
52         add     r7,r7,r9                        @ E+=X[i]
53         eor     r10,r10,r6,ror#2                @ F_00_19(B,C,D)
54         str     r9,[r14,#-4]!
55         add     r7,r7,r10                       @ E+=F_00_19(B,C,D)
56 #if __ARM_ARCH__<7
57         ldrb    r10,[r1,#2]
58         ldrb    r9,[r1,#3]
59         ldrb    r11,[r1,#1]
60         add     r6,r8,r6,ror#2                  @ E+=K_00_19
61         ldrb    r12,[r1],#4
62         orr     r9,r9,r10,lsl#8
63         eor     r10,r4,r5                       @ F_xx_xx
64         orr     r9,r9,r11,lsl#16
65         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
66         orr     r9,r9,r12,lsl#24
67 #else
68         ldr     r9,[r1],#4                      @ handles unaligned
69         add     r6,r8,r6,ror#2                  @ E+=K_00_19
70         eor     r10,r4,r5                       @ F_xx_xx
71         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
72 #ifdef __ARMEL__
73         rev     r9,r9                           @ byte swap
74 #endif
75 #endif
76         and     r10,r3,r10,ror#2
77         add     r6,r6,r9                        @ E+=X[i]
78         eor     r10,r10,r5,ror#2                @ F_00_19(B,C,D)
79         str     r9,[r14,#-4]!
80         add     r6,r6,r10                       @ E+=F_00_19(B,C,D)
81 #if __ARM_ARCH__<7
82         ldrb    r10,[r1,#2]
83         ldrb    r9,[r1,#3]
84         ldrb    r11,[r1,#1]
85         add     r5,r8,r5,ror#2                  @ E+=K_00_19
86         ldrb    r12,[r1],#4
87         orr     r9,r9,r10,lsl#8
88         eor     r10,r3,r4                       @ F_xx_xx
89         orr     r9,r9,r11,lsl#16
90         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
91         orr     r9,r9,r12,lsl#24
92 #else
93         ldr     r9,[r1],#4                      @ handles unaligned
94         add     r5,r8,r5,ror#2                  @ E+=K_00_19
95         eor     r10,r3,r4                       @ F_xx_xx
96         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
97 #ifdef __ARMEL__
98         rev     r9,r9                           @ byte swap
99 #endif
100 #endif
101         and     r10,r7,r10,ror#2
102         add     r5,r5,r9                        @ E+=X[i]
103         eor     r10,r10,r4,ror#2                @ F_00_19(B,C,D)
104         str     r9,[r14,#-4]!
105         add     r5,r5,r10                       @ E+=F_00_19(B,C,D)
106 #if __ARM_ARCH__<7
107         ldrb    r10,[r1,#2]
108         ldrb    r9,[r1,#3]
109         ldrb    r11,[r1,#1]
110         add     r4,r8,r4,ror#2                  @ E+=K_00_19
111         ldrb    r12,[r1],#4
112         orr     r9,r9,r10,lsl#8
113         eor     r10,r7,r3                       @ F_xx_xx
114         orr     r9,r9,r11,lsl#16
115         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
116         orr     r9,r9,r12,lsl#24
117 #else
118         ldr     r9,[r1],#4                      @ handles unaligned
119         add     r4,r8,r4,ror#2                  @ E+=K_00_19
120         eor     r10,r7,r3                       @ F_xx_xx
121         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
122 #ifdef __ARMEL__
123         rev     r9,r9                           @ byte swap
124 #endif
125 #endif
126         and     r10,r6,r10,ror#2
127         add     r4,r4,r9                        @ E+=X[i]
128         eor     r10,r10,r3,ror#2                @ F_00_19(B,C,D)
129         str     r9,[r14,#-4]!
130         add     r4,r4,r10                       @ E+=F_00_19(B,C,D)
131 #if __ARM_ARCH__<7
132         ldrb    r10,[r1,#2]
133         ldrb    r9,[r1,#3]
134         ldrb    r11,[r1,#1]
135         add     r3,r8,r3,ror#2                  @ E+=K_00_19
136         ldrb    r12,[r1],#4
137         orr     r9,r9,r10,lsl#8
138         eor     r10,r6,r7                       @ F_xx_xx
139         orr     r9,r9,r11,lsl#16
140         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
141         orr     r9,r9,r12,lsl#24
142 #else
143         ldr     r9,[r1],#4                      @ handles unaligned
144         add     r3,r8,r3,ror#2                  @ E+=K_00_19
145         eor     r10,r6,r7                       @ F_xx_xx
146         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
147 #ifdef __ARMEL__
148         rev     r9,r9                           @ byte swap
149 #endif
150 #endif
151         and     r10,r5,r10,ror#2
152         add     r3,r3,r9                        @ E+=X[i]
153         eor     r10,r10,r7,ror#2                @ F_00_19(B,C,D)
154         str     r9,[r14,#-4]!
155         add     r3,r3,r10                       @ E+=F_00_19(B,C,D)
156         teq     r14,sp
157         bne     .L_00_15                @ [((11+4)*5+2)*3]
158         sub     sp,sp,#25*4
159 #if __ARM_ARCH__<7
160         ldrb    r10,[r1,#2]
161         ldrb    r9,[r1,#3]
162         ldrb    r11,[r1,#1]
163         add     r7,r8,r7,ror#2                  @ E+=K_00_19
164         ldrb    r12,[r1],#4
165         orr     r9,r9,r10,lsl#8
166         eor     r10,r5,r6                       @ F_xx_xx
167         orr     r9,r9,r11,lsl#16
168         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
169         orr     r9,r9,r12,lsl#24
170 #else
171         ldr     r9,[r1],#4                      @ handles unaligned
172         add     r7,r8,r7,ror#2                  @ E+=K_00_19
173         eor     r10,r5,r6                       @ F_xx_xx
174         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
175 #ifdef __ARMEL__
176         rev     r9,r9                           @ byte swap
177 #endif
178 #endif
179         and     r10,r4,r10,ror#2
180         add     r7,r7,r9                        @ E+=X[i]
181         eor     r10,r10,r6,ror#2                @ F_00_19(B,C,D)
182         str     r9,[r14,#-4]!
183         add     r7,r7,r10                       @ E+=F_00_19(B,C,D)
184         ldr     r9,[r14,#15*4]
185         ldr     r10,[r14,#13*4]
186         ldr     r11,[r14,#7*4]
187         add     r6,r8,r6,ror#2                  @ E+=K_xx_xx
188         ldr     r12,[r14,#2*4]
189         eor     r9,r9,r10
190         eor     r11,r11,r12                     @ 1 cycle stall
191         eor     r10,r4,r5                       @ F_xx_xx
192         mov     r9,r9,ror#31
193         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
194         eor     r9,r9,r11,ror#31
195         str     r9,[r14,#-4]!
196         and r10,r3,r10,ror#2                                    @ F_xx_xx
197                                                 @ F_xx_xx
198         add     r6,r6,r9                        @ E+=X[i]
199         eor     r10,r10,r5,ror#2                @ F_00_19(B,C,D)
200         add     r6,r6,r10                       @ E+=F_00_19(B,C,D)
201         ldr     r9,[r14,#15*4]
202         ldr     r10,[r14,#13*4]
203         ldr     r11,[r14,#7*4]
204         add     r5,r8,r5,ror#2                  @ E+=K_xx_xx
205         ldr     r12,[r14,#2*4]
206         eor     r9,r9,r10
207         eor     r11,r11,r12                     @ 1 cycle stall
208         eor     r10,r3,r4                       @ F_xx_xx
209         mov     r9,r9,ror#31
210         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
211         eor     r9,r9,r11,ror#31
212         str     r9,[r14,#-4]!
213         and r10,r7,r10,ror#2                                    @ F_xx_xx
214                                                 @ F_xx_xx
215         add     r5,r5,r9                        @ E+=X[i]
216         eor     r10,r10,r4,ror#2                @ F_00_19(B,C,D)
217         add     r5,r5,r10                       @ E+=F_00_19(B,C,D)
218         ldr     r9,[r14,#15*4]
219         ldr     r10,[r14,#13*4]
220         ldr     r11,[r14,#7*4]
221         add     r4,r8,r4,ror#2                  @ E+=K_xx_xx
222         ldr     r12,[r14,#2*4]
223         eor     r9,r9,r10
224         eor     r11,r11,r12                     @ 1 cycle stall
225         eor     r10,r7,r3                       @ F_xx_xx
226         mov     r9,r9,ror#31
227         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
228         eor     r9,r9,r11,ror#31
229         str     r9,[r14,#-4]!
230         and r10,r6,r10,ror#2                                    @ F_xx_xx
231                                                 @ F_xx_xx
232         add     r4,r4,r9                        @ E+=X[i]
233         eor     r10,r10,r3,ror#2                @ F_00_19(B,C,D)
234         add     r4,r4,r10                       @ E+=F_00_19(B,C,D)
235         ldr     r9,[r14,#15*4]
236         ldr     r10,[r14,#13*4]
237         ldr     r11,[r14,#7*4]
238         add     r3,r8,r3,ror#2                  @ E+=K_xx_xx
239         ldr     r12,[r14,#2*4]
240         eor     r9,r9,r10
241         eor     r11,r11,r12                     @ 1 cycle stall
242         eor     r10,r6,r7                       @ F_xx_xx
243         mov     r9,r9,ror#31
244         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
245         eor     r9,r9,r11,ror#31
246         str     r9,[r14,#-4]!
247         and r10,r5,r10,ror#2                                    @ F_xx_xx
248                                                 @ F_xx_xx
249         add     r3,r3,r9                        @ E+=X[i]
250         eor     r10,r10,r7,ror#2                @ F_00_19(B,C,D)
251         add     r3,r3,r10                       @ E+=F_00_19(B,C,D)
253         ldr     r8,.LK_20_39            @ [+15+16*4]
254         cmn     sp,#0                   @ [+3], clear carry to denote 20_39
255 .L_20_39_or_60_79:
256         ldr     r9,[r14,#15*4]
257         ldr     r10,[r14,#13*4]
258         ldr     r11,[r14,#7*4]
259         add     r7,r8,r7,ror#2                  @ E+=K_xx_xx
260         ldr     r12,[r14,#2*4]
261         eor     r9,r9,r10
262         eor     r11,r11,r12                     @ 1 cycle stall
263         eor     r10,r5,r6                       @ F_xx_xx
264         mov     r9,r9,ror#31
265         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
266         eor     r9,r9,r11,ror#31
267         str     r9,[r14,#-4]!
268         eor r10,r4,r10,ror#2                                    @ F_xx_xx
269                                                 @ F_xx_xx
270         add     r7,r7,r9                        @ E+=X[i]
271         add     r7,r7,r10                       @ E+=F_20_39(B,C,D)
272         ldr     r9,[r14,#15*4]
273         ldr     r10,[r14,#13*4]
274         ldr     r11,[r14,#7*4]
275         add     r6,r8,r6,ror#2                  @ E+=K_xx_xx
276         ldr     r12,[r14,#2*4]
277         eor     r9,r9,r10
278         eor     r11,r11,r12                     @ 1 cycle stall
279         eor     r10,r4,r5                       @ F_xx_xx
280         mov     r9,r9,ror#31
281         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
282         eor     r9,r9,r11,ror#31
283         str     r9,[r14,#-4]!
284         eor r10,r3,r10,ror#2                                    @ F_xx_xx
285                                                 @ F_xx_xx
286         add     r6,r6,r9                        @ E+=X[i]
287         add     r6,r6,r10                       @ E+=F_20_39(B,C,D)
288         ldr     r9,[r14,#15*4]
289         ldr     r10,[r14,#13*4]
290         ldr     r11,[r14,#7*4]
291         add     r5,r8,r5,ror#2                  @ E+=K_xx_xx
292         ldr     r12,[r14,#2*4]
293         eor     r9,r9,r10
294         eor     r11,r11,r12                     @ 1 cycle stall
295         eor     r10,r3,r4                       @ F_xx_xx
296         mov     r9,r9,ror#31
297         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
298         eor     r9,r9,r11,ror#31
299         str     r9,[r14,#-4]!
300         eor r10,r7,r10,ror#2                                    @ F_xx_xx
301                                                 @ F_xx_xx
302         add     r5,r5,r9                        @ E+=X[i]
303         add     r5,r5,r10                       @ E+=F_20_39(B,C,D)
304         ldr     r9,[r14,#15*4]
305         ldr     r10,[r14,#13*4]
306         ldr     r11,[r14,#7*4]
307         add     r4,r8,r4,ror#2                  @ E+=K_xx_xx
308         ldr     r12,[r14,#2*4]
309         eor     r9,r9,r10
310         eor     r11,r11,r12                     @ 1 cycle stall
311         eor     r10,r7,r3                       @ F_xx_xx
312         mov     r9,r9,ror#31
313         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
314         eor     r9,r9,r11,ror#31
315         str     r9,[r14,#-4]!
316         eor r10,r6,r10,ror#2                                    @ F_xx_xx
317                                                 @ F_xx_xx
318         add     r4,r4,r9                        @ E+=X[i]
319         add     r4,r4,r10                       @ E+=F_20_39(B,C,D)
320         ldr     r9,[r14,#15*4]
321         ldr     r10,[r14,#13*4]
322         ldr     r11,[r14,#7*4]
323         add     r3,r8,r3,ror#2                  @ E+=K_xx_xx
324         ldr     r12,[r14,#2*4]
325         eor     r9,r9,r10
326         eor     r11,r11,r12                     @ 1 cycle stall
327         eor     r10,r6,r7                       @ F_xx_xx
328         mov     r9,r9,ror#31
329         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
330         eor     r9,r9,r11,ror#31
331         str     r9,[r14,#-4]!
332         eor r10,r5,r10,ror#2                                    @ F_xx_xx
333                                                 @ F_xx_xx
334         add     r3,r3,r9                        @ E+=X[i]
335         add     r3,r3,r10                       @ E+=F_20_39(B,C,D)
336         teq     r14,sp                  @ preserve carry
337         bne     .L_20_39_or_60_79       @ [+((12+3)*5+2)*4]
338         bcs     .L_done                 @ [+((12+3)*5+2)*4], spare 300 bytes
340         ldr     r8,.LK_40_59
341         sub     sp,sp,#20*4             @ [+2]
342 .L_40_59:
343         ldr     r9,[r14,#15*4]
344         ldr     r10,[r14,#13*4]
345         ldr     r11,[r14,#7*4]
346         add     r7,r8,r7,ror#2                  @ E+=K_xx_xx
347         ldr     r12,[r14,#2*4]
348         eor     r9,r9,r10
349         eor     r11,r11,r12                     @ 1 cycle stall
350         eor     r10,r5,r6                       @ F_xx_xx
351         mov     r9,r9,ror#31
352         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
353         eor     r9,r9,r11,ror#31
354         str     r9,[r14,#-4]!
355         and r10,r4,r10,ror#2                                    @ F_xx_xx
356         and r11,r5,r6                                   @ F_xx_xx
357         add     r7,r7,r9                        @ E+=X[i]
358         add     r7,r7,r10                       @ E+=F_40_59(B,C,D)
359         add     r7,r7,r11,ror#2
360         ldr     r9,[r14,#15*4]
361         ldr     r10,[r14,#13*4]
362         ldr     r11,[r14,#7*4]
363         add     r6,r8,r6,ror#2                  @ E+=K_xx_xx
364         ldr     r12,[r14,#2*4]
365         eor     r9,r9,r10
366         eor     r11,r11,r12                     @ 1 cycle stall
367         eor     r10,r4,r5                       @ F_xx_xx
368         mov     r9,r9,ror#31
369         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
370         eor     r9,r9,r11,ror#31
371         str     r9,[r14,#-4]!
372         and r10,r3,r10,ror#2                                    @ F_xx_xx
373         and r11,r4,r5                                   @ F_xx_xx
374         add     r6,r6,r9                        @ E+=X[i]
375         add     r6,r6,r10                       @ E+=F_40_59(B,C,D)
376         add     r6,r6,r11,ror#2
377         ldr     r9,[r14,#15*4]
378         ldr     r10,[r14,#13*4]
379         ldr     r11,[r14,#7*4]
380         add     r5,r8,r5,ror#2                  @ E+=K_xx_xx
381         ldr     r12,[r14,#2*4]
382         eor     r9,r9,r10
383         eor     r11,r11,r12                     @ 1 cycle stall
384         eor     r10,r3,r4                       @ F_xx_xx
385         mov     r9,r9,ror#31
386         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
387         eor     r9,r9,r11,ror#31
388         str     r9,[r14,#-4]!
389         and r10,r7,r10,ror#2                                    @ F_xx_xx
390         and r11,r3,r4                                   @ F_xx_xx
391         add     r5,r5,r9                        @ E+=X[i]
392         add     r5,r5,r10                       @ E+=F_40_59(B,C,D)
393         add     r5,r5,r11,ror#2
394         ldr     r9,[r14,#15*4]
395         ldr     r10,[r14,#13*4]
396         ldr     r11,[r14,#7*4]
397         add     r4,r8,r4,ror#2                  @ E+=K_xx_xx
398         ldr     r12,[r14,#2*4]
399         eor     r9,r9,r10
400         eor     r11,r11,r12                     @ 1 cycle stall
401         eor     r10,r7,r3                       @ F_xx_xx
402         mov     r9,r9,ror#31
403         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
404         eor     r9,r9,r11,ror#31
405         str     r9,[r14,#-4]!
406         and r10,r6,r10,ror#2                                    @ F_xx_xx
407         and r11,r7,r3                                   @ F_xx_xx
408         add     r4,r4,r9                        @ E+=X[i]
409         add     r4,r4,r10                       @ E+=F_40_59(B,C,D)
410         add     r4,r4,r11,ror#2
411         ldr     r9,[r14,#15*4]
412         ldr     r10,[r14,#13*4]
413         ldr     r11,[r14,#7*4]
414         add     r3,r8,r3,ror#2                  @ E+=K_xx_xx
415         ldr     r12,[r14,#2*4]
416         eor     r9,r9,r10
417         eor     r11,r11,r12                     @ 1 cycle stall
418         eor     r10,r6,r7                       @ F_xx_xx
419         mov     r9,r9,ror#31
420         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
421         eor     r9,r9,r11,ror#31
422         str     r9,[r14,#-4]!
423         and r10,r5,r10,ror#2                                    @ F_xx_xx
424         and r11,r6,r7                                   @ F_xx_xx
425         add     r3,r3,r9                        @ E+=X[i]
426         add     r3,r3,r10                       @ E+=F_40_59(B,C,D)
427         add     r3,r3,r11,ror#2
428         teq     r14,sp
429         bne     .L_40_59                @ [+((12+5)*5+2)*4]
431         ldr     r8,.LK_60_79
432         sub     sp,sp,#20*4
433         cmp     sp,#0                   @ set carry to denote 60_79
434         b       .L_20_39_or_60_79       @ [+4], spare 300 bytes
435 .L_done:
436         add     sp,sp,#80*4             @ "deallocate" stack frame
437         ldmia   r0,{r8,r9,r10,r11,r12}
438         add     r3,r8,r3
439         add     r4,r9,r4
440         add     r5,r10,r5,ror#2
441         add     r6,r11,r6,ror#2
442         add     r7,r12,r7,ror#2
443         stmia   r0,{r3,r4,r5,r6,r7}
444         teq     r1,r2
445         bne     .Lloop                  @ [+18], total 1307
447 #if __ARM_ARCH__>=5
448         ldmia   sp!,{r4-r12,pc}
449 #else
450         ldmia   sp!,{r4-r12,lr}
451         tst     lr,#1
452         moveq   pc,lr                   @ be binary compatible with V4, yet
453         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
454 #endif
455 .size   sha1_block_data_order,.-sha1_block_data_order
457 .align  5
458 .LK_00_19:      .word   0x5a827999
459 .LK_20_39:      .word   0x6ed9eba1
460 .LK_40_59:      .word   0x8f1bbcdc
461 .LK_60_79:      .word   0xca62c1d6
462 #if __ARM_MAX_ARCH__>=7
463 .LOPENSSL_armcap:
464 .word   OPENSSL_armcap_P-sha1_block_data_order
465 #endif
466 .asciz  "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
467 .align  5
468 #if __ARM_MAX_ARCH__>=7
469 .arch   armv7-a
470 .fpu    neon
472 .type   sha1_block_data_order_neon,%function
473 .align  4
474 sha1_block_data_order_neon:
475 .LNEON:
476         stmdb   sp!,{r4-r12,lr}
477         add     r2,r1,r2,lsl#6  @ r2 to point at the end of r1
478         @ dmb                           @ errata #451034 on early Cortex A8
479         @ vstmdb        sp!,{d8-d15}    @ ABI specification says so
480         mov     r14,sp
481         sub     sp,sp,#64               @ alloca
482         adr     r8,.LK_00_19
483         bic     sp,sp,#15               @ align for 128-bit stores
485         ldmia   r0,{r3,r4,r5,r6,r7}     @ load context
486         mov     r12,sp
488         vld1.8          {q0-q1},[r1]!   @ handles unaligned
489         veor            q15,q15,q15
490         vld1.8          {q2-q3},[r1]!
491         vld1.32         {d28[],d29[]},[r8,:32]! @ load K_00_19
492         vrev32.8        q0,q0           @ yes, even on
493         vrev32.8        q1,q1           @ big-endian...
494         vrev32.8        q2,q2
495         vadd.i32        q8,q0,q14
496         vrev32.8        q3,q3
497         vadd.i32        q9,q1,q14
498         vst1.32         {q8},[r12,:128]!
499         vadd.i32        q10,q2,q14
500         vst1.32         {q9},[r12,:128]!
501         vst1.32         {q10},[r12,:128]!
502         ldr             r9,[sp]                 @ big RAW stall
504 .Loop_neon:
505         vext.8  q8,q0,q1,#8
506         bic     r10,r6,r4
507         add     r7,r7,r9
508         and     r11,r5,r4
509         vadd.i32        q13,q3,q14
510         ldr     r9,[sp,#4]
511         add     r7,r7,r3,ror#27
512         vext.8  q12,q3,q15,#4
513         eor     r11,r11,r10
514         mov     r4,r4,ror#2
515         add     r7,r7,r11
516         veor    q8,q8,q0
517         bic     r10,r5,r3
518         add     r6,r6,r9
519         veor    q12,q12,q2
520         and     r11,r4,r3
521         ldr     r9,[sp,#8]
522         veor    q12,q12,q8
523         add     r6,r6,r7,ror#27
524         eor     r11,r11,r10
525         vst1.32 {q13},[r12,:128]!
526         sub     r12,r12,#64
527         mov     r3,r3,ror#2
528         add     r6,r6,r11
529         vext.8  q13,q15,q12,#4
530         bic     r10,r4,r7
531         add     r5,r5,r9
532         vadd.i32        q8,q12,q12
533         and     r11,r3,r7
534         ldr     r9,[sp,#12]
535         vsri.32 q8,q12,#31
536         add     r5,r5,r6,ror#27
537         eor     r11,r11,r10
538         mov     r7,r7,ror#2
539         vshr.u32        q12,q13,#30
540         add     r5,r5,r11
541         bic     r10,r3,r6
542         vshl.u32        q13,q13,#2
543         add     r4,r4,r9
544         and     r11,r7,r6
545         veor    q8,q8,q12
546         ldr     r9,[sp,#16]
547         add     r4,r4,r5,ror#27
548         veor    q8,q8,q13
549         eor     r11,r11,r10
550         mov     r6,r6,ror#2
551         add     r4,r4,r11
552         vext.8  q9,q1,q2,#8
553         bic     r10,r7,r5
554         add     r3,r3,r9
555         and     r11,r6,r5
556         vadd.i32        q13,q8,q14
557         ldr     r9,[sp,#20]
558         vld1.32 {d28[],d29[]},[r8,:32]!
559         add     r3,r3,r4,ror#27
560         vext.8  q12,q8,q15,#4
561         eor     r11,r11,r10
562         mov     r5,r5,ror#2
563         add     r3,r3,r11
564         veor    q9,q9,q1
565         bic     r10,r6,r4
566         add     r7,r7,r9
567         veor    q12,q12,q3
568         and     r11,r5,r4
569         ldr     r9,[sp,#24]
570         veor    q12,q12,q9
571         add     r7,r7,r3,ror#27
572         eor     r11,r11,r10
573         vst1.32 {q13},[r12,:128]!
574         mov     r4,r4,ror#2
575         add     r7,r7,r11
576         vext.8  q13,q15,q12,#4
577         bic     r10,r5,r3
578         add     r6,r6,r9
579         vadd.i32        q9,q12,q12
580         and     r11,r4,r3
581         ldr     r9,[sp,#28]
582         vsri.32 q9,q12,#31
583         add     r6,r6,r7,ror#27
584         eor     r11,r11,r10
585         mov     r3,r3,ror#2
586         vshr.u32        q12,q13,#30
587         add     r6,r6,r11
588         bic     r10,r4,r7
589         vshl.u32        q13,q13,#2
590         add     r5,r5,r9
591         and     r11,r3,r7
592         veor    q9,q9,q12
593         ldr     r9,[sp,#32]
594         add     r5,r5,r6,ror#27
595         veor    q9,q9,q13
596         eor     r11,r11,r10
597         mov     r7,r7,ror#2
598         add     r5,r5,r11
599         vext.8  q10,q2,q3,#8
600         bic     r10,r3,r6
601         add     r4,r4,r9
602         and     r11,r7,r6
603         vadd.i32        q13,q9,q14
604         ldr     r9,[sp,#36]
605         add     r4,r4,r5,ror#27
606         vext.8  q12,q9,q15,#4
607         eor     r11,r11,r10
608         mov     r6,r6,ror#2
609         add     r4,r4,r11
610         veor    q10,q10,q2
611         bic     r10,r7,r5
612         add     r3,r3,r9
613         veor    q12,q12,q8
614         and     r11,r6,r5
615         ldr     r9,[sp,#40]
616         veor    q12,q12,q10
617         add     r3,r3,r4,ror#27
618         eor     r11,r11,r10
619         vst1.32 {q13},[r12,:128]!
620         mov     r5,r5,ror#2
621         add     r3,r3,r11
622         vext.8  q13,q15,q12,#4
623         bic     r10,r6,r4
624         add     r7,r7,r9
625         vadd.i32        q10,q12,q12
626         and     r11,r5,r4
627         ldr     r9,[sp,#44]
628         vsri.32 q10,q12,#31
629         add     r7,r7,r3,ror#27
630         eor     r11,r11,r10
631         mov     r4,r4,ror#2
632         vshr.u32        q12,q13,#30
633         add     r7,r7,r11
634         bic     r10,r5,r3
635         vshl.u32        q13,q13,#2
636         add     r6,r6,r9
637         and     r11,r4,r3
638         veor    q10,q10,q12
639         ldr     r9,[sp,#48]
640         add     r6,r6,r7,ror#27
641         veor    q10,q10,q13
642         eor     r11,r11,r10
643         mov     r3,r3,ror#2
644         add     r6,r6,r11
645         vext.8  q11,q3,q8,#8
646         bic     r10,r4,r7
647         add     r5,r5,r9
648         and     r11,r3,r7
649         vadd.i32        q13,q10,q14
650         ldr     r9,[sp,#52]
651         add     r5,r5,r6,ror#27
652         vext.8  q12,q10,q15,#4
653         eor     r11,r11,r10
654         mov     r7,r7,ror#2
655         add     r5,r5,r11
656         veor    q11,q11,q3
657         bic     r10,r3,r6
658         add     r4,r4,r9
659         veor    q12,q12,q9
660         and     r11,r7,r6
661         ldr     r9,[sp,#56]
662         veor    q12,q12,q11
663         add     r4,r4,r5,ror#27
664         eor     r11,r11,r10
665         vst1.32 {q13},[r12,:128]!
666         mov     r6,r6,ror#2
667         add     r4,r4,r11
668         vext.8  q13,q15,q12,#4
669         bic     r10,r7,r5
670         add     r3,r3,r9
671         vadd.i32        q11,q12,q12
672         and     r11,r6,r5
673         ldr     r9,[sp,#60]
674         vsri.32 q11,q12,#31
675         add     r3,r3,r4,ror#27
676         eor     r11,r11,r10
677         mov     r5,r5,ror#2
678         vshr.u32        q12,q13,#30
679         add     r3,r3,r11
680         bic     r10,r6,r4
681         vshl.u32        q13,q13,#2
682         add     r7,r7,r9
683         and     r11,r5,r4
684         veor    q11,q11,q12
685         ldr     r9,[sp,#0]
686         add     r7,r7,r3,ror#27
687         veor    q11,q11,q13
688         eor     r11,r11,r10
689         mov     r4,r4,ror#2
690         add     r7,r7,r11
691         vext.8  q12,q10,q11,#8
692         bic     r10,r5,r3
693         add     r6,r6,r9
694         and     r11,r4,r3
695         veor    q0,q0,q8
696         ldr     r9,[sp,#4]
697         add     r6,r6,r7,ror#27
698         veor    q0,q0,q1
699         eor     r11,r11,r10
700         mov     r3,r3,ror#2
701         vadd.i32        q13,q11,q14
702         add     r6,r6,r11
703         bic     r10,r4,r7
704         veor    q12,q12,q0
705         add     r5,r5,r9
706         and     r11,r3,r7
707         vshr.u32        q0,q12,#30
708         ldr     r9,[sp,#8]
709         add     r5,r5,r6,ror#27
710         vst1.32 {q13},[r12,:128]!
711         sub     r12,r12,#64
712         eor     r11,r11,r10
713         mov     r7,r7,ror#2
714         vsli.32 q0,q12,#2
715         add     r5,r5,r11
716         bic     r10,r3,r6
717         add     r4,r4,r9
718         and     r11,r7,r6
719         ldr     r9,[sp,#12]
720         add     r4,r4,r5,ror#27
721         eor     r11,r11,r10
722         mov     r6,r6,ror#2
723         add     r4,r4,r11
724         bic     r10,r7,r5
725         add     r3,r3,r9
726         and     r11,r6,r5
727         ldr     r9,[sp,#16]
728         add     r3,r3,r4,ror#27
729         eor     r11,r11,r10
730         mov     r5,r5,ror#2
731         add     r3,r3,r11
732         vext.8  q12,q11,q0,#8
733         eor     r10,r4,r6
734         add     r7,r7,r9
735         ldr     r9,[sp,#20]
736         veor    q1,q1,q9
737         eor     r11,r10,r5
738         add     r7,r7,r3,ror#27
739         veor    q1,q1,q2
740         mov     r4,r4,ror#2
741         add     r7,r7,r11
742         vadd.i32        q13,q0,q14
743         eor     r10,r3,r5
744         add     r6,r6,r9
745         veor    q12,q12,q1
746         ldr     r9,[sp,#24]
747         eor     r11,r10,r4
748         vshr.u32        q1,q12,#30
749         add     r6,r6,r7,ror#27
750         mov     r3,r3,ror#2
751         vst1.32 {q13},[r12,:128]!
752         add     r6,r6,r11
753         eor     r10,r7,r4
754         vsli.32 q1,q12,#2
755         add     r5,r5,r9
756         ldr     r9,[sp,#28]
757         eor     r11,r10,r3
758         add     r5,r5,r6,ror#27
759         mov     r7,r7,ror#2
760         add     r5,r5,r11
761         eor     r10,r6,r3
762         add     r4,r4,r9
763         ldr     r9,[sp,#32]
764         eor     r11,r10,r7
765         add     r4,r4,r5,ror#27
766         mov     r6,r6,ror#2
767         add     r4,r4,r11
768         vext.8  q12,q0,q1,#8
769         eor     r10,r5,r7
770         add     r3,r3,r9
771         ldr     r9,[sp,#36]
772         veor    q2,q2,q10
773         eor     r11,r10,r6
774         add     r3,r3,r4,ror#27
775         veor    q2,q2,q3
776         mov     r5,r5,ror#2
777         add     r3,r3,r11
778         vadd.i32        q13,q1,q14
779         eor     r10,r4,r6
780         vld1.32 {d28[],d29[]},[r8,:32]!
781         add     r7,r7,r9
782         veor    q12,q12,q2
783         ldr     r9,[sp,#40]
784         eor     r11,r10,r5
785         vshr.u32        q2,q12,#30
786         add     r7,r7,r3,ror#27
787         mov     r4,r4,ror#2
788         vst1.32 {q13},[r12,:128]!
789         add     r7,r7,r11
790         eor     r10,r3,r5
791         vsli.32 q2,q12,#2
792         add     r6,r6,r9
793         ldr     r9,[sp,#44]
794         eor     r11,r10,r4
795         add     r6,r6,r7,ror#27
796         mov     r3,r3,ror#2
797         add     r6,r6,r11
798         eor     r10,r7,r4
799         add     r5,r5,r9
800         ldr     r9,[sp,#48]
801         eor     r11,r10,r3
802         add     r5,r5,r6,ror#27
803         mov     r7,r7,ror#2
804         add     r5,r5,r11
805         vext.8  q12,q1,q2,#8
806         eor     r10,r6,r3
807         add     r4,r4,r9
808         ldr     r9,[sp,#52]
809         veor    q3,q3,q11
810         eor     r11,r10,r7
811         add     r4,r4,r5,ror#27
812         veor    q3,q3,q8
813         mov     r6,r6,ror#2
814         add     r4,r4,r11
815         vadd.i32        q13,q2,q14
816         eor     r10,r5,r7
817         add     r3,r3,r9
818         veor    q12,q12,q3
819         ldr     r9,[sp,#56]
820         eor     r11,r10,r6
821         vshr.u32        q3,q12,#30
822         add     r3,r3,r4,ror#27
823         mov     r5,r5,ror#2
824         vst1.32 {q13},[r12,:128]!
825         add     r3,r3,r11
826         eor     r10,r4,r6
827         vsli.32 q3,q12,#2
828         add     r7,r7,r9
829         ldr     r9,[sp,#60]
830         eor     r11,r10,r5
831         add     r7,r7,r3,ror#27
832         mov     r4,r4,ror#2
833         add     r7,r7,r11
834         eor     r10,r3,r5
835         add     r6,r6,r9
836         ldr     r9,[sp,#0]
837         eor     r11,r10,r4
838         add     r6,r6,r7,ror#27
839         mov     r3,r3,ror#2
840         add     r6,r6,r11
841         vext.8  q12,q2,q3,#8
842         eor     r10,r7,r4
843         add     r5,r5,r9
844         ldr     r9,[sp,#4]
845         veor    q8,q8,q0
846         eor     r11,r10,r3
847         add     r5,r5,r6,ror#27
848         veor    q8,q8,q9
849         mov     r7,r7,ror#2
850         add     r5,r5,r11
851         vadd.i32        q13,q3,q14
852         eor     r10,r6,r3
853         add     r4,r4,r9
854         veor    q12,q12,q8
855         ldr     r9,[sp,#8]
856         eor     r11,r10,r7
857         vshr.u32        q8,q12,#30
858         add     r4,r4,r5,ror#27
859         mov     r6,r6,ror#2
860         vst1.32 {q13},[r12,:128]!
861         sub     r12,r12,#64
862         add     r4,r4,r11
863         eor     r10,r5,r7
864         vsli.32 q8,q12,#2
865         add     r3,r3,r9
866         ldr     r9,[sp,#12]
867         eor     r11,r10,r6
868         add     r3,r3,r4,ror#27
869         mov     r5,r5,ror#2
870         add     r3,r3,r11
871         eor     r10,r4,r6
872         add     r7,r7,r9
873         ldr     r9,[sp,#16]
874         eor     r11,r10,r5
875         add     r7,r7,r3,ror#27
876         mov     r4,r4,ror#2
877         add     r7,r7,r11
878         vext.8  q12,q3,q8,#8
879         eor     r10,r3,r5
880         add     r6,r6,r9
881         ldr     r9,[sp,#20]
882         veor    q9,q9,q1
883         eor     r11,r10,r4
884         add     r6,r6,r7,ror#27
885         veor    q9,q9,q10
886         mov     r3,r3,ror#2
887         add     r6,r6,r11
888         vadd.i32        q13,q8,q14
889         eor     r10,r7,r4
890         add     r5,r5,r9
891         veor    q12,q12,q9
892         ldr     r9,[sp,#24]
893         eor     r11,r10,r3
894         vshr.u32        q9,q12,#30
895         add     r5,r5,r6,ror#27
896         mov     r7,r7,ror#2
897         vst1.32 {q13},[r12,:128]!
898         add     r5,r5,r11
899         eor     r10,r6,r3
900         vsli.32 q9,q12,#2
901         add     r4,r4,r9
902         ldr     r9,[sp,#28]
903         eor     r11,r10,r7
904         add     r4,r4,r5,ror#27
905         mov     r6,r6,ror#2
906         add     r4,r4,r11
907         eor     r10,r5,r7
908         add     r3,r3,r9
909         ldr     r9,[sp,#32]
910         eor     r11,r10,r6
911         add     r3,r3,r4,ror#27
912         mov     r5,r5,ror#2
913         add     r3,r3,r11
914         vext.8  q12,q8,q9,#8
915         add     r7,r7,r9
916         and     r10,r5,r6
917         ldr     r9,[sp,#36]
918         veor    q10,q10,q2
919         add     r7,r7,r3,ror#27
920         eor     r11,r5,r6
921         veor    q10,q10,q11
922         add     r7,r7,r10
923         and     r11,r11,r4
924         vadd.i32        q13,q9,q14
925         mov     r4,r4,ror#2
926         add     r7,r7,r11
927         veor    q12,q12,q10
928         add     r6,r6,r9
929         and     r10,r4,r5
930         vshr.u32        q10,q12,#30
931         ldr     r9,[sp,#40]
932         add     r6,r6,r7,ror#27
933         vst1.32 {q13},[r12,:128]!
934         eor     r11,r4,r5
935         add     r6,r6,r10
936         vsli.32 q10,q12,#2
937         and     r11,r11,r3
938         mov     r3,r3,ror#2
939         add     r6,r6,r11
940         add     r5,r5,r9
941         and     r10,r3,r4
942         ldr     r9,[sp,#44]
943         add     r5,r5,r6,ror#27
944         eor     r11,r3,r4
945         add     r5,r5,r10
946         and     r11,r11,r7
947         mov     r7,r7,ror#2
948         add     r5,r5,r11
949         add     r4,r4,r9
950         and     r10,r7,r3
951         ldr     r9,[sp,#48]
952         add     r4,r4,r5,ror#27
953         eor     r11,r7,r3
954         add     r4,r4,r10
955         and     r11,r11,r6
956         mov     r6,r6,ror#2
957         add     r4,r4,r11
958         vext.8  q12,q9,q10,#8
959         add     r3,r3,r9
960         and     r10,r6,r7
961         ldr     r9,[sp,#52]
962         veor    q11,q11,q3
963         add     r3,r3,r4,ror#27
964         eor     r11,r6,r7
965         veor    q11,q11,q0
966         add     r3,r3,r10
967         and     r11,r11,r5
968         vadd.i32        q13,q10,q14
969         mov     r5,r5,ror#2
970         vld1.32 {d28[],d29[]},[r8,:32]!
971         add     r3,r3,r11
972         veor    q12,q12,q11
973         add     r7,r7,r9
974         and     r10,r5,r6
975         vshr.u32        q11,q12,#30
976         ldr     r9,[sp,#56]
977         add     r7,r7,r3,ror#27
978         vst1.32 {q13},[r12,:128]!
979         eor     r11,r5,r6
980         add     r7,r7,r10
981         vsli.32 q11,q12,#2
982         and     r11,r11,r4
983         mov     r4,r4,ror#2
984         add     r7,r7,r11
985         add     r6,r6,r9
986         and     r10,r4,r5
987         ldr     r9,[sp,#60]
988         add     r6,r6,r7,ror#27
989         eor     r11,r4,r5
990         add     r6,r6,r10
991         and     r11,r11,r3
992         mov     r3,r3,ror#2
993         add     r6,r6,r11
994         add     r5,r5,r9
995         and     r10,r3,r4
996         ldr     r9,[sp,#0]
997         add     r5,r5,r6,ror#27
998         eor     r11,r3,r4
999         add     r5,r5,r10
1000         and     r11,r11,r7
1001         mov     r7,r7,ror#2
1002         add     r5,r5,r11
1003         vext.8  q12,q10,q11,#8
1004         add     r4,r4,r9
1005         and     r10,r7,r3
1006         ldr     r9,[sp,#4]
1007         veor    q0,q0,q8
1008         add     r4,r4,r5,ror#27
1009         eor     r11,r7,r3
1010         veor    q0,q0,q1
1011         add     r4,r4,r10
1012         and     r11,r11,r6
1013         vadd.i32        q13,q11,q14
1014         mov     r6,r6,ror#2
1015         add     r4,r4,r11
1016         veor    q12,q12,q0
1017         add     r3,r3,r9
1018         and     r10,r6,r7
1019         vshr.u32        q0,q12,#30
1020         ldr     r9,[sp,#8]
1021         add     r3,r3,r4,ror#27
1022         vst1.32 {q13},[r12,:128]!
1023         sub     r12,r12,#64
1024         eor     r11,r6,r7
1025         add     r3,r3,r10
1026         vsli.32 q0,q12,#2
1027         and     r11,r11,r5
1028         mov     r5,r5,ror#2
1029         add     r3,r3,r11
1030         add     r7,r7,r9
1031         and     r10,r5,r6
1032         ldr     r9,[sp,#12]
1033         add     r7,r7,r3,ror#27
1034         eor     r11,r5,r6
1035         add     r7,r7,r10
1036         and     r11,r11,r4
1037         mov     r4,r4,ror#2
1038         add     r7,r7,r11
1039         add     r6,r6,r9
1040         and     r10,r4,r5
1041         ldr     r9,[sp,#16]
1042         add     r6,r6,r7,ror#27
1043         eor     r11,r4,r5
1044         add     r6,r6,r10
1045         and     r11,r11,r3
1046         mov     r3,r3,ror#2
1047         add     r6,r6,r11
1048         vext.8  q12,q11,q0,#8
1049         add     r5,r5,r9
1050         and     r10,r3,r4
1051         ldr     r9,[sp,#20]
1052         veor    q1,q1,q9
1053         add     r5,r5,r6,ror#27
1054         eor     r11,r3,r4
1055         veor    q1,q1,q2
1056         add     r5,r5,r10
1057         and     r11,r11,r7
1058         vadd.i32        q13,q0,q14
1059         mov     r7,r7,ror#2
1060         add     r5,r5,r11
1061         veor    q12,q12,q1
1062         add     r4,r4,r9
1063         and     r10,r7,r3
1064         vshr.u32        q1,q12,#30
1065         ldr     r9,[sp,#24]
1066         add     r4,r4,r5,ror#27
1067         vst1.32 {q13},[r12,:128]!
1068         eor     r11,r7,r3
1069         add     r4,r4,r10
1070         vsli.32 q1,q12,#2
1071         and     r11,r11,r6
1072         mov     r6,r6,ror#2
1073         add     r4,r4,r11
1074         add     r3,r3,r9
1075         and     r10,r6,r7
1076         ldr     r9,[sp,#28]
1077         add     r3,r3,r4,ror#27
1078         eor     r11,r6,r7
1079         add     r3,r3,r10
1080         and     r11,r11,r5
1081         mov     r5,r5,ror#2
1082         add     r3,r3,r11
1083         add     r7,r7,r9
1084         and     r10,r5,r6
1085         ldr     r9,[sp,#32]
1086         add     r7,r7,r3,ror#27
1087         eor     r11,r5,r6
1088         add     r7,r7,r10
1089         and     r11,r11,r4
1090         mov     r4,r4,ror#2
1091         add     r7,r7,r11
1092         vext.8  q12,q0,q1,#8
1093         add     r6,r6,r9
1094         and     r10,r4,r5
1095         ldr     r9,[sp,#36]
1096         veor    q2,q2,q10
1097         add     r6,r6,r7,ror#27
1098         eor     r11,r4,r5
1099         veor    q2,q2,q3
1100         add     r6,r6,r10
1101         and     r11,r11,r3
1102         vadd.i32        q13,q1,q14
1103         mov     r3,r3,ror#2
1104         add     r6,r6,r11
1105         veor    q12,q12,q2
1106         add     r5,r5,r9
1107         and     r10,r3,r4
1108         vshr.u32        q2,q12,#30
1109         ldr     r9,[sp,#40]
1110         add     r5,r5,r6,ror#27
1111         vst1.32 {q13},[r12,:128]!
1112         eor     r11,r3,r4
1113         add     r5,r5,r10
1114         vsli.32 q2,q12,#2
1115         and     r11,r11,r7
1116         mov     r7,r7,ror#2
1117         add     r5,r5,r11
1118         add     r4,r4,r9
1119         and     r10,r7,r3
1120         ldr     r9,[sp,#44]
1121         add     r4,r4,r5,ror#27
1122         eor     r11,r7,r3
1123         add     r4,r4,r10
1124         and     r11,r11,r6
1125         mov     r6,r6,ror#2
1126         add     r4,r4,r11
1127         add     r3,r3,r9
1128         and     r10,r6,r7
1129         ldr     r9,[sp,#48]
1130         add     r3,r3,r4,ror#27
1131         eor     r11,r6,r7
1132         add     r3,r3,r10
1133         and     r11,r11,r5
1134         mov     r5,r5,ror#2
1135         add     r3,r3,r11
1136         vext.8  q12,q1,q2,#8
1137         eor     r10,r4,r6
1138         add     r7,r7,r9
1139         ldr     r9,[sp,#52]
1140         veor    q3,q3,q11
1141         eor     r11,r10,r5
1142         add     r7,r7,r3,ror#27
1143         veor    q3,q3,q8
1144         mov     r4,r4,ror#2
1145         add     r7,r7,r11
1146         vadd.i32        q13,q2,q14
1147         eor     r10,r3,r5
1148         add     r6,r6,r9
1149         veor    q12,q12,q3
1150         ldr     r9,[sp,#56]
1151         eor     r11,r10,r4
1152         vshr.u32        q3,q12,#30
1153         add     r6,r6,r7,ror#27
1154         mov     r3,r3,ror#2
1155         vst1.32 {q13},[r12,:128]!
1156         add     r6,r6,r11
1157         eor     r10,r7,r4
1158         vsli.32 q3,q12,#2
1159         add     r5,r5,r9
1160         ldr     r9,[sp,#60]
1161         eor     r11,r10,r3
1162         add     r5,r5,r6,ror#27
1163         mov     r7,r7,ror#2
1164         add     r5,r5,r11
1165         eor     r10,r6,r3
1166         add     r4,r4,r9
1167         ldr     r9,[sp,#0]
1168         eor     r11,r10,r7
1169         add     r4,r4,r5,ror#27
1170         mov     r6,r6,ror#2
1171         add     r4,r4,r11
1172         vadd.i32        q13,q3,q14
1173         eor     r10,r5,r7
1174         add     r3,r3,r9
1175         vst1.32 {q13},[r12,:128]!
1176         sub     r12,r12,#64
1177         teq     r1,r2
1178         sub     r8,r8,#16
1179         subeq   r1,r1,#64
1180         vld1.8  {q0-q1},[r1]!
1181         ldr     r9,[sp,#4]
1182         eor     r11,r10,r6
1183         vld1.8  {q2-q3},[r1]!
1184         add     r3,r3,r4,ror#27
1185         mov     r5,r5,ror#2
1186         vld1.32 {d28[],d29[]},[r8,:32]!
1187         add     r3,r3,r11
1188         eor     r10,r4,r6
1189         vrev32.8        q0,q0
1190         add     r7,r7,r9
1191         ldr     r9,[sp,#8]
1192         eor     r11,r10,r5
1193         add     r7,r7,r3,ror#27
1194         mov     r4,r4,ror#2
1195         add     r7,r7,r11
1196         eor     r10,r3,r5
1197         add     r6,r6,r9
1198         ldr     r9,[sp,#12]
1199         eor     r11,r10,r4
1200         add     r6,r6,r7,ror#27
1201         mov     r3,r3,ror#2
1202         add     r6,r6,r11
1203         eor     r10,r7,r4
1204         add     r5,r5,r9
1205         ldr     r9,[sp,#16]
1206         eor     r11,r10,r3
1207         add     r5,r5,r6,ror#27
1208         mov     r7,r7,ror#2
1209         add     r5,r5,r11
1210         vrev32.8        q1,q1
1211         eor     r10,r6,r3
1212         add     r4,r4,r9
1213         vadd.i32        q8,q0,q14
1214         ldr     r9,[sp,#20]
1215         eor     r11,r10,r7
1216         vst1.32 {q8},[r12,:128]!
1217         add     r4,r4,r5,ror#27
1218         mov     r6,r6,ror#2
1219         add     r4,r4,r11
1220         eor     r10,r5,r7
1221         add     r3,r3,r9
1222         ldr     r9,[sp,#24]
1223         eor     r11,r10,r6
1224         add     r3,r3,r4,ror#27
1225         mov     r5,r5,ror#2
1226         add     r3,r3,r11
1227         eor     r10,r4,r6
1228         add     r7,r7,r9
1229         ldr     r9,[sp,#28]
1230         eor     r11,r10,r5
1231         add     r7,r7,r3,ror#27
1232         mov     r4,r4,ror#2
1233         add     r7,r7,r11
1234         eor     r10,r3,r5
1235         add     r6,r6,r9
1236         ldr     r9,[sp,#32]
1237         eor     r11,r10,r4
1238         add     r6,r6,r7,ror#27
1239         mov     r3,r3,ror#2
1240         add     r6,r6,r11
1241         vrev32.8        q2,q2
1242         eor     r10,r7,r4
1243         add     r5,r5,r9
1244         vadd.i32        q9,q1,q14
1245         ldr     r9,[sp,#36]
1246         eor     r11,r10,r3
1247         vst1.32 {q9},[r12,:128]!
1248         add     r5,r5,r6,ror#27
1249         mov     r7,r7,ror#2
1250         add     r5,r5,r11
1251         eor     r10,r6,r3
1252         add     r4,r4,r9
1253         ldr     r9,[sp,#40]
1254         eor     r11,r10,r7
1255         add     r4,r4,r5,ror#27
1256         mov     r6,r6,ror#2
1257         add     r4,r4,r11
1258         eor     r10,r5,r7
1259         add     r3,r3,r9
1260         ldr     r9,[sp,#44]
1261         eor     r11,r10,r6
1262         add     r3,r3,r4,ror#27
1263         mov     r5,r5,ror#2
1264         add     r3,r3,r11
1265         eor     r10,r4,r6
1266         add     r7,r7,r9
1267         ldr     r9,[sp,#48]
1268         eor     r11,r10,r5
1269         add     r7,r7,r3,ror#27
1270         mov     r4,r4,ror#2
1271         add     r7,r7,r11
1272         vrev32.8        q3,q3
1273         eor     r10,r3,r5
1274         add     r6,r6,r9
1275         vadd.i32        q10,q2,q14
1276         ldr     r9,[sp,#52]
1277         eor     r11,r10,r4
1278         vst1.32 {q10},[r12,:128]!
1279         add     r6,r6,r7,ror#27
1280         mov     r3,r3,ror#2
1281         add     r6,r6,r11
1282         eor     r10,r7,r4
1283         add     r5,r5,r9
1284         ldr     r9,[sp,#56]
1285         eor     r11,r10,r3
1286         add     r5,r5,r6,ror#27
1287         mov     r7,r7,ror#2
1288         add     r5,r5,r11
1289         eor     r10,r6,r3
1290         add     r4,r4,r9
1291         ldr     r9,[sp,#60]
1292         eor     r11,r10,r7
1293         add     r4,r4,r5,ror#27
1294         mov     r6,r6,ror#2
1295         add     r4,r4,r11
1296         eor     r10,r5,r7
1297         add     r3,r3,r9
1298         eor     r11,r10,r6
1299         add     r3,r3,r4,ror#27
1300         mov     r5,r5,ror#2
1301         add     r3,r3,r11
1302         ldmia   r0,{r9,r10,r11,r12}     @ accumulate context
1303         add     r3,r3,r9
1304         ldr     r9,[r0,#16]
1305         add     r4,r4,r10
1306         add     r5,r5,r11
1307         add     r6,r6,r12
1308         moveq   sp,r14
1309         add     r7,r7,r9
1310         ldrne   r9,[sp]
1311         stmia   r0,{r3,r4,r5,r6,r7}
1312         addne   r12,sp,#3*16
1313         bne     .Loop_neon
1315         @ vldmia        sp!,{d8-d15}
1316         ldmia   sp!,{r4-r12,pc}
1317 .size   sha1_block_data_order_neon,.-sha1_block_data_order_neon
1318 #endif
1319 #if __ARM_MAX_ARCH__>=7
1320 .type   sha1_block_data_order_armv8,%function
1321 .align  5
1322 sha1_block_data_order_armv8:
1323 .LARMv8:
1324         vstmdb  sp!,{d8-d15}            @ ABI specification says so
1326         veor    q1,q1,q1
1327         adr     r3,.LK_00_19
1328         vld1.32 {q0},[r0]!
1329         vld1.32 {d2[0]},[r0]
1330         sub     r0,r0,#16
1331         vld1.32 {d16[],d17[]},[r3,:32]!
1332         vld1.32 {d18[],d19[]},[r3,:32]!
1333         vld1.32 {d20[],d21[]},[r3,:32]!
1334         vld1.32 {d22[],d23[]},[r3,:32]
1336 .Loop_v8:
1337         vld1.8          {q4-q5},[r1]!
1338         vld1.8          {q6-q7},[r1]!
1339         vrev32.8        q4,q4
1340         vrev32.8        q5,q5
1342         vadd.i32        q12,q8,q4
1343         vrev32.8        q6,q6
1344         vmov            q14,q0  @ offload
1345         subs            r2,r2,#1
1347         vadd.i32        q13,q8,q5
1348         vrev32.8        q7,q7
1349         .byte   0xc0,0x62,0xb9,0xf3     @ sha1h q3,q0           @ 0
1350         .byte   0x68,0x0c,0x02,0xf2     @ sha1c q0,q1,q12
1351         vadd.i32        q12,q8,q6
1352         .byte   0x4c,0x8c,0x3a,0xf2     @ sha1su0 q4,q5,q6
1353         .byte   0xc0,0x42,0xb9,0xf3     @ sha1h q2,q0           @ 1
1354         .byte   0x6a,0x0c,0x06,0xf2     @ sha1c q0,q3,q13
1355         vadd.i32        q13,q8,q7
1356         .byte   0x8e,0x83,0xba,0xf3     @ sha1su1 q4,q7
1357         .byte   0x4e,0xac,0x3c,0xf2     @ sha1su0 q5,q6,q7
1358         .byte   0xc0,0x62,0xb9,0xf3     @ sha1h q3,q0           @ 2
1359         .byte   0x68,0x0c,0x04,0xf2     @ sha1c q0,q2,q12
1360         vadd.i32        q12,q8,q4
1361         .byte   0x88,0xa3,0xba,0xf3     @ sha1su1 q5,q4
1362         .byte   0x48,0xcc,0x3e,0xf2     @ sha1su0 q6,q7,q4
1363         .byte   0xc0,0x42,0xb9,0xf3     @ sha1h q2,q0           @ 3
1364         .byte   0x6a,0x0c,0x06,0xf2     @ sha1c q0,q3,q13
1365         vadd.i32        q13,q9,q5
1366         .byte   0x8a,0xc3,0xba,0xf3     @ sha1su1 q6,q5
1367         .byte   0x4a,0xec,0x38,0xf2     @ sha1su0 q7,q4,q5
1368         .byte   0xc0,0x62,0xb9,0xf3     @ sha1h q3,q0           @ 4
1369         .byte   0x68,0x0c,0x04,0xf2     @ sha1c q0,q2,q12
1370         vadd.i32        q12,q9,q6
1371         .byte   0x8c,0xe3,0xba,0xf3     @ sha1su1 q7,q6
1372         .byte   0x4c,0x8c,0x3a,0xf2     @ sha1su0 q4,q5,q6
1373         .byte   0xc0,0x42,0xb9,0xf3     @ sha1h q2,q0           @ 5
1374         .byte   0x6a,0x0c,0x16,0xf2     @ sha1p q0,q3,q13
1375         vadd.i32        q13,q9,q7
1376         .byte   0x8e,0x83,0xba,0xf3     @ sha1su1 q4,q7
1377         .byte   0x4e,0xac,0x3c,0xf2     @ sha1su0 q5,q6,q7
1378         .byte   0xc0,0x62,0xb9,0xf3     @ sha1h q3,q0           @ 6
1379         .byte   0x68,0x0c,0x14,0xf2     @ sha1p q0,q2,q12
1380         vadd.i32        q12,q9,q4
1381         .byte   0x88,0xa3,0xba,0xf3     @ sha1su1 q5,q4
1382         .byte   0x48,0xcc,0x3e,0xf2     @ sha1su0 q6,q7,q4
1383         .byte   0xc0,0x42,0xb9,0xf3     @ sha1h q2,q0           @ 7
1384         .byte   0x6a,0x0c,0x16,0xf2     @ sha1p q0,q3,q13
1385         vadd.i32        q13,q9,q5
1386         .byte   0x8a,0xc3,0xba,0xf3     @ sha1su1 q6,q5
1387         .byte   0x4a,0xec,0x38,0xf2     @ sha1su0 q7,q4,q5
1388         .byte   0xc0,0x62,0xb9,0xf3     @ sha1h q3,q0           @ 8
1389         .byte   0x68,0x0c,0x14,0xf2     @ sha1p q0,q2,q12
1390         vadd.i32        q12,q10,q6
1391         .byte   0x8c,0xe3,0xba,0xf3     @ sha1su1 q7,q6
1392         .byte   0x4c,0x8c,0x3a,0xf2     @ sha1su0 q4,q5,q6
1393         .byte   0xc0,0x42,0xb9,0xf3     @ sha1h q2,q0           @ 9
1394         .byte   0x6a,0x0c,0x16,0xf2     @ sha1p q0,q3,q13
1395         vadd.i32        q13,q10,q7
1396         .byte   0x8e,0x83,0xba,0xf3     @ sha1su1 q4,q7
1397         .byte   0x4e,0xac,0x3c,0xf2     @ sha1su0 q5,q6,q7
1398         .byte   0xc0,0x62,0xb9,0xf3     @ sha1h q3,q0           @ 10
1399         .byte   0x68,0x0c,0x24,0xf2     @ sha1m q0,q2,q12
1400         vadd.i32        q12,q10,q4
1401         .byte   0x88,0xa3,0xba,0xf3     @ sha1su1 q5,q4
1402         .byte   0x48,0xcc,0x3e,0xf2     @ sha1su0 q6,q7,q4
1403         .byte   0xc0,0x42,0xb9,0xf3     @ sha1h q2,q0           @ 11
1404         .byte   0x6a,0x0c,0x26,0xf2     @ sha1m q0,q3,q13
1405         vadd.i32        q13,q10,q5
1406         .byte   0x8a,0xc3,0xba,0xf3     @ sha1su1 q6,q5
1407         .byte   0x4a,0xec,0x38,0xf2     @ sha1su0 q7,q4,q5
1408         .byte   0xc0,0x62,0xb9,0xf3     @ sha1h q3,q0           @ 12
1409         .byte   0x68,0x0c,0x24,0xf2     @ sha1m q0,q2,q12
1410         vadd.i32        q12,q10,q6
1411         .byte   0x8c,0xe3,0xba,0xf3     @ sha1su1 q7,q6
1412         .byte   0x4c,0x8c,0x3a,0xf2     @ sha1su0 q4,q5,q6
1413         .byte   0xc0,0x42,0xb9,0xf3     @ sha1h q2,q0           @ 13
1414         .byte   0x6a,0x0c,0x26,0xf2     @ sha1m q0,q3,q13
1415         vadd.i32        q13,q11,q7
1416         .byte   0x8e,0x83,0xba,0xf3     @ sha1su1 q4,q7
1417         .byte   0x4e,0xac,0x3c,0xf2     @ sha1su0 q5,q6,q7
1418         .byte   0xc0,0x62,0xb9,0xf3     @ sha1h q3,q0           @ 14
1419         .byte   0x68,0x0c,0x24,0xf2     @ sha1m q0,q2,q12
1420         vadd.i32        q12,q11,q4
1421         .byte   0x88,0xa3,0xba,0xf3     @ sha1su1 q5,q4
1422         .byte   0x48,0xcc,0x3e,0xf2     @ sha1su0 q6,q7,q4
1423         .byte   0xc0,0x42,0xb9,0xf3     @ sha1h q2,q0           @ 15
1424         .byte   0x6a,0x0c,0x16,0xf2     @ sha1p q0,q3,q13
1425         vadd.i32        q13,q11,q5
1426         .byte   0x8a,0xc3,0xba,0xf3     @ sha1su1 q6,q5
1427         .byte   0x4a,0xec,0x38,0xf2     @ sha1su0 q7,q4,q5
1428         .byte   0xc0,0x62,0xb9,0xf3     @ sha1h q3,q0           @ 16
1429         .byte   0x68,0x0c,0x14,0xf2     @ sha1p q0,q2,q12
1430         vadd.i32        q12,q11,q6
1431         .byte   0x8c,0xe3,0xba,0xf3     @ sha1su1 q7,q6
1432         .byte   0xc0,0x42,0xb9,0xf3     @ sha1h q2,q0           @ 17
1433         .byte   0x6a,0x0c,0x16,0xf2     @ sha1p q0,q3,q13
1434         vadd.i32        q13,q11,q7
1436         .byte   0xc0,0x62,0xb9,0xf3     @ sha1h q3,q0           @ 18
1437         .byte   0x68,0x0c,0x14,0xf2     @ sha1p q0,q2,q12
1439         .byte   0xc0,0x42,0xb9,0xf3     @ sha1h q2,q0           @ 19
1440         .byte   0x6a,0x0c,0x16,0xf2     @ sha1p q0,q3,q13
1442         vadd.i32        q1,q1,q2
1443         vadd.i32        q0,q0,q14
1444         bne             .Loop_v8
1446         vst1.32         {q0},[r0]!
1447         vst1.32         {d2[0]},[r0]
1449         vldmia  sp!,{d8-d15}
1450         bx      lr                                      @ bx lr
1451 .size   sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
1452 #endif
1453 #if __ARM_MAX_ARCH__>=7
1454 .comm   OPENSSL_armcap_P,4,4
1455 .hidden OPENSSL_armcap_P
1456 #endif