vfs: remove unused wrapper block_page_mkwrite()
[linux/fpc-iii.git] / arch / arm / crypto / sha1-armv4-large.S
blob99207c45ec10f56891cd40d53b8a5fe524337f0f
1 #define __ARM_ARCH__ __LINUX_ARM_ARCH__
2 @ ====================================================================
3 @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4 @ project. The module is, however, dual licensed under OpenSSL and
5 @ CRYPTOGAMS licenses depending on where you obtain it. For further
6 @ details see http://www.openssl.org/~appro/cryptogams/.
7 @ ====================================================================
9 @ sha1_block procedure for ARMv4.
11 @ January 2007.
13 @ Size/performance trade-off
14 @ ====================================================================
15 @ impl          size in bytes   comp cycles[*]  measured performance
16 @ ====================================================================
17 @ thumb         304             3212            4420
18 @ armv4-small   392/+29%        1958/+64%       2250/+96%
19 @ armv4-compact 740/+89%        1552/+26%       1840/+22%
20 @ armv4-large   1420/+92%       1307/+19%       1370/+34%[***]
21 @ full unroll   ~5100/+260%     ~1260/+4%       ~1300/+5%
22 @ ====================================================================
23 @ thumb         = same as 'small' but in Thumb instructions[**] and
24 @                 with recurring code in two private functions;
25 @ small         = detached Xload/update, loops are folded;
26 @ compact       = detached Xload/update, 5x unroll;
27 @ large         = interleaved Xload/update, 5x unroll;
28 @ full unroll   = interleaved Xload/update, full unroll, estimated[!];
30 @ [*]   Manually counted instructions in "grand" loop body. Measured
31 @       performance is affected by prologue and epilogue overhead,
32 @       i-cache availability, branch penalties, etc.
33 @ [**]  While each Thumb instruction is twice smaller, they are not as
34 @       diverse as ARM ones: e.g., there are only two arithmetic
35 @       instructions with 3 arguments, no [fixed] rotate, addressing
36 @       modes are limited. As result it takes more instructions to do
37 @       the same job in Thumb, therefore the code is never twice as
38 @       small and always slower.
39 @ [***] which is also ~35% better than compiler generated code. Dual-
40 @       issue Cortex A8 core was measured to process input block in
41 @       ~990 cycles.
43 @ August 2010.
45 @ Rescheduling for dual-issue pipeline resulted in 13% improvement on
46 @ Cortex A8 core and in absolute terms ~870 cycles per input block
47 @ [or 13.6 cycles per byte].
49 @ February 2011.
51 @ Profiler-assisted and platform-specific optimization resulted in 10%
52 @ improvement on Cortex A8 core and 12.2 cycles per byte.
54 #include <linux/linkage.h>
56 .text
58 .align  2
59 ENTRY(sha1_block_data_order)
60         stmdb   sp!,{r4-r12,lr}
61         add     r2,r1,r2,lsl#6  @ r2 to point at the end of r1
62         ldmia   r0,{r3,r4,r5,r6,r7}
63 .Lloop:
64         ldr     r8,.LK_00_19
65         mov     r14,sp
66         sub     sp,sp,#15*4
67         mov     r5,r5,ror#30
68         mov     r6,r6,ror#30
69         mov     r7,r7,ror#30            @ [6]
70 .L_00_15:
71 #if __ARM_ARCH__<7
72         ldrb    r10,[r1,#2]
73         ldrb    r9,[r1,#3]
74         ldrb    r11,[r1,#1]
75         add     r7,r8,r7,ror#2                  @ E+=K_00_19
76         ldrb    r12,[r1],#4
77         orr     r9,r9,r10,lsl#8
78         eor     r10,r5,r6                       @ F_xx_xx
79         orr     r9,r9,r11,lsl#16
80         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
81         orr     r9,r9,r12,lsl#24
82 #else
83         ldr     r9,[r1],#4                      @ handles unaligned
84         add     r7,r8,r7,ror#2                  @ E+=K_00_19
85         eor     r10,r5,r6                       @ F_xx_xx
86         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
87 #ifdef __ARMEL__
88         rev     r9,r9                           @ byte swap
89 #endif
90 #endif
91         and     r10,r4,r10,ror#2
92         add     r7,r7,r9                        @ E+=X[i]
93         eor     r10,r10,r6,ror#2                @ F_00_19(B,C,D)
94         str     r9,[r14,#-4]!
95         add     r7,r7,r10                       @ E+=F_00_19(B,C,D)
96 #if __ARM_ARCH__<7
97         ldrb    r10,[r1,#2]
98         ldrb    r9,[r1,#3]
99         ldrb    r11,[r1,#1]
100         add     r6,r8,r6,ror#2                  @ E+=K_00_19
101         ldrb    r12,[r1],#4
102         orr     r9,r9,r10,lsl#8
103         eor     r10,r4,r5                       @ F_xx_xx
104         orr     r9,r9,r11,lsl#16
105         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
106         orr     r9,r9,r12,lsl#24
107 #else
108         ldr     r9,[r1],#4                      @ handles unaligned
109         add     r6,r8,r6,ror#2                  @ E+=K_00_19
110         eor     r10,r4,r5                       @ F_xx_xx
111         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
112 #ifdef __ARMEL__
113         rev     r9,r9                           @ byte swap
114 #endif
115 #endif
116         and     r10,r3,r10,ror#2
117         add     r6,r6,r9                        @ E+=X[i]
118         eor     r10,r10,r5,ror#2                @ F_00_19(B,C,D)
119         str     r9,[r14,#-4]!
120         add     r6,r6,r10                       @ E+=F_00_19(B,C,D)
121 #if __ARM_ARCH__<7
122         ldrb    r10,[r1,#2]
123         ldrb    r9,[r1,#3]
124         ldrb    r11,[r1,#1]
125         add     r5,r8,r5,ror#2                  @ E+=K_00_19
126         ldrb    r12,[r1],#4
127         orr     r9,r9,r10,lsl#8
128         eor     r10,r3,r4                       @ F_xx_xx
129         orr     r9,r9,r11,lsl#16
130         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
131         orr     r9,r9,r12,lsl#24
132 #else
133         ldr     r9,[r1],#4                      @ handles unaligned
134         add     r5,r8,r5,ror#2                  @ E+=K_00_19
135         eor     r10,r3,r4                       @ F_xx_xx
136         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
137 #ifdef __ARMEL__
138         rev     r9,r9                           @ byte swap
139 #endif
140 #endif
141         and     r10,r7,r10,ror#2
142         add     r5,r5,r9                        @ E+=X[i]
143         eor     r10,r10,r4,ror#2                @ F_00_19(B,C,D)
144         str     r9,[r14,#-4]!
145         add     r5,r5,r10                       @ E+=F_00_19(B,C,D)
146 #if __ARM_ARCH__<7
147         ldrb    r10,[r1,#2]
148         ldrb    r9,[r1,#3]
149         ldrb    r11,[r1,#1]
150         add     r4,r8,r4,ror#2                  @ E+=K_00_19
151         ldrb    r12,[r1],#4
152         orr     r9,r9,r10,lsl#8
153         eor     r10,r7,r3                       @ F_xx_xx
154         orr     r9,r9,r11,lsl#16
155         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
156         orr     r9,r9,r12,lsl#24
157 #else
158         ldr     r9,[r1],#4                      @ handles unaligned
159         add     r4,r8,r4,ror#2                  @ E+=K_00_19
160         eor     r10,r7,r3                       @ F_xx_xx
161         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
162 #ifdef __ARMEL__
163         rev     r9,r9                           @ byte swap
164 #endif
165 #endif
166         and     r10,r6,r10,ror#2
167         add     r4,r4,r9                        @ E+=X[i]
168         eor     r10,r10,r3,ror#2                @ F_00_19(B,C,D)
169         str     r9,[r14,#-4]!
170         add     r4,r4,r10                       @ E+=F_00_19(B,C,D)
171 #if __ARM_ARCH__<7
172         ldrb    r10,[r1,#2]
173         ldrb    r9,[r1,#3]
174         ldrb    r11,[r1,#1]
175         add     r3,r8,r3,ror#2                  @ E+=K_00_19
176         ldrb    r12,[r1],#4
177         orr     r9,r9,r10,lsl#8
178         eor     r10,r6,r7                       @ F_xx_xx
179         orr     r9,r9,r11,lsl#16
180         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
181         orr     r9,r9,r12,lsl#24
182 #else
183         ldr     r9,[r1],#4                      @ handles unaligned
184         add     r3,r8,r3,ror#2                  @ E+=K_00_19
185         eor     r10,r6,r7                       @ F_xx_xx
186         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
187 #ifdef __ARMEL__
188         rev     r9,r9                           @ byte swap
189 #endif
190 #endif
191         and     r10,r5,r10,ror#2
192         add     r3,r3,r9                        @ E+=X[i]
193         eor     r10,r10,r7,ror#2                @ F_00_19(B,C,D)
194         str     r9,[r14,#-4]!
195         add     r3,r3,r10                       @ E+=F_00_19(B,C,D)
196         cmp     r14,sp
197         bne     .L_00_15                @ [((11+4)*5+2)*3]
198         sub     sp,sp,#25*4
199 #if __ARM_ARCH__<7
200         ldrb    r10,[r1,#2]
201         ldrb    r9,[r1,#3]
202         ldrb    r11,[r1,#1]
203         add     r7,r8,r7,ror#2                  @ E+=K_00_19
204         ldrb    r12,[r1],#4
205         orr     r9,r9,r10,lsl#8
206         eor     r10,r5,r6                       @ F_xx_xx
207         orr     r9,r9,r11,lsl#16
208         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
209         orr     r9,r9,r12,lsl#24
210 #else
211         ldr     r9,[r1],#4                      @ handles unaligned
212         add     r7,r8,r7,ror#2                  @ E+=K_00_19
213         eor     r10,r5,r6                       @ F_xx_xx
214         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
215 #ifdef __ARMEL__
216         rev     r9,r9                           @ byte swap
217 #endif
218 #endif
219         and     r10,r4,r10,ror#2
220         add     r7,r7,r9                        @ E+=X[i]
221         eor     r10,r10,r6,ror#2                @ F_00_19(B,C,D)
222         str     r9,[r14,#-4]!
223         add     r7,r7,r10                       @ E+=F_00_19(B,C,D)
224         ldr     r9,[r14,#15*4]
225         ldr     r10,[r14,#13*4]
226         ldr     r11,[r14,#7*4]
227         add     r6,r8,r6,ror#2                  @ E+=K_xx_xx
228         ldr     r12,[r14,#2*4]
229         eor     r9,r9,r10
230         eor     r11,r11,r12                     @ 1 cycle stall
231         eor     r10,r4,r5                       @ F_xx_xx
232         mov     r9,r9,ror#31
233         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
234         eor     r9,r9,r11,ror#31
235         str     r9,[r14,#-4]!
236         and r10,r3,r10,ror#2                                    @ F_xx_xx
237                                                 @ F_xx_xx
238         add     r6,r6,r9                        @ E+=X[i]
239         eor     r10,r10,r5,ror#2                @ F_00_19(B,C,D)
240         add     r6,r6,r10                       @ E+=F_00_19(B,C,D)
241         ldr     r9,[r14,#15*4]
242         ldr     r10,[r14,#13*4]
243         ldr     r11,[r14,#7*4]
244         add     r5,r8,r5,ror#2                  @ E+=K_xx_xx
245         ldr     r12,[r14,#2*4]
246         eor     r9,r9,r10
247         eor     r11,r11,r12                     @ 1 cycle stall
248         eor     r10,r3,r4                       @ F_xx_xx
249         mov     r9,r9,ror#31
250         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
251         eor     r9,r9,r11,ror#31
252         str     r9,[r14,#-4]!
253         and r10,r7,r10,ror#2                                    @ F_xx_xx
254                                                 @ F_xx_xx
255         add     r5,r5,r9                        @ E+=X[i]
256         eor     r10,r10,r4,ror#2                @ F_00_19(B,C,D)
257         add     r5,r5,r10                       @ E+=F_00_19(B,C,D)
258         ldr     r9,[r14,#15*4]
259         ldr     r10,[r14,#13*4]
260         ldr     r11,[r14,#7*4]
261         add     r4,r8,r4,ror#2                  @ E+=K_xx_xx
262         ldr     r12,[r14,#2*4]
263         eor     r9,r9,r10
264         eor     r11,r11,r12                     @ 1 cycle stall
265         eor     r10,r7,r3                       @ F_xx_xx
266         mov     r9,r9,ror#31
267         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
268         eor     r9,r9,r11,ror#31
269         str     r9,[r14,#-4]!
270         and r10,r6,r10,ror#2                                    @ F_xx_xx
271                                                 @ F_xx_xx
272         add     r4,r4,r9                        @ E+=X[i]
273         eor     r10,r10,r3,ror#2                @ F_00_19(B,C,D)
274         add     r4,r4,r10                       @ E+=F_00_19(B,C,D)
275         ldr     r9,[r14,#15*4]
276         ldr     r10,[r14,#13*4]
277         ldr     r11,[r14,#7*4]
278         add     r3,r8,r3,ror#2                  @ E+=K_xx_xx
279         ldr     r12,[r14,#2*4]
280         eor     r9,r9,r10
281         eor     r11,r11,r12                     @ 1 cycle stall
282         eor     r10,r6,r7                       @ F_xx_xx
283         mov     r9,r9,ror#31
284         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
285         eor     r9,r9,r11,ror#31
286         str     r9,[r14,#-4]!
287         and r10,r5,r10,ror#2                                    @ F_xx_xx
288                                                 @ F_xx_xx
289         add     r3,r3,r9                        @ E+=X[i]
290         eor     r10,r10,r7,ror#2                @ F_00_19(B,C,D)
291         add     r3,r3,r10                       @ E+=F_00_19(B,C,D)
293         ldr     r8,.LK_20_39            @ [+15+16*4]
294         cmn     sp,#0                   @ [+3], clear carry to denote 20_39
295 .L_20_39_or_60_79:
296         ldr     r9,[r14,#15*4]
297         ldr     r10,[r14,#13*4]
298         ldr     r11,[r14,#7*4]
299         add     r7,r8,r7,ror#2                  @ E+=K_xx_xx
300         ldr     r12,[r14,#2*4]
301         eor     r9,r9,r10
302         eor     r11,r11,r12                     @ 1 cycle stall
303         eor     r10,r5,r6                       @ F_xx_xx
304         mov     r9,r9,ror#31
305         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
306         eor     r9,r9,r11,ror#31
307         str     r9,[r14,#-4]!
308         eor r10,r4,r10,ror#2                                    @ F_xx_xx
309                                                 @ F_xx_xx
310         add     r7,r7,r9                        @ E+=X[i]
311         add     r7,r7,r10                       @ E+=F_20_39(B,C,D)
312         ldr     r9,[r14,#15*4]
313         ldr     r10,[r14,#13*4]
314         ldr     r11,[r14,#7*4]
315         add     r6,r8,r6,ror#2                  @ E+=K_xx_xx
316         ldr     r12,[r14,#2*4]
317         eor     r9,r9,r10
318         eor     r11,r11,r12                     @ 1 cycle stall
319         eor     r10,r4,r5                       @ F_xx_xx
320         mov     r9,r9,ror#31
321         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
322         eor     r9,r9,r11,ror#31
323         str     r9,[r14,#-4]!
324         eor r10,r3,r10,ror#2                                    @ F_xx_xx
325                                                 @ F_xx_xx
326         add     r6,r6,r9                        @ E+=X[i]
327         add     r6,r6,r10                       @ E+=F_20_39(B,C,D)
328         ldr     r9,[r14,#15*4]
329         ldr     r10,[r14,#13*4]
330         ldr     r11,[r14,#7*4]
331         add     r5,r8,r5,ror#2                  @ E+=K_xx_xx
332         ldr     r12,[r14,#2*4]
333         eor     r9,r9,r10
334         eor     r11,r11,r12                     @ 1 cycle stall
335         eor     r10,r3,r4                       @ F_xx_xx
336         mov     r9,r9,ror#31
337         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
338         eor     r9,r9,r11,ror#31
339         str     r9,[r14,#-4]!
340         eor r10,r7,r10,ror#2                                    @ F_xx_xx
341                                                 @ F_xx_xx
342         add     r5,r5,r9                        @ E+=X[i]
343         add     r5,r5,r10                       @ E+=F_20_39(B,C,D)
344         ldr     r9,[r14,#15*4]
345         ldr     r10,[r14,#13*4]
346         ldr     r11,[r14,#7*4]
347         add     r4,r8,r4,ror#2                  @ E+=K_xx_xx
348         ldr     r12,[r14,#2*4]
349         eor     r9,r9,r10
350         eor     r11,r11,r12                     @ 1 cycle stall
351         eor     r10,r7,r3                       @ F_xx_xx
352         mov     r9,r9,ror#31
353         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
354         eor     r9,r9,r11,ror#31
355         str     r9,[r14,#-4]!
356         eor r10,r6,r10,ror#2                                    @ F_xx_xx
357                                                 @ F_xx_xx
358         add     r4,r4,r9                        @ E+=X[i]
359         add     r4,r4,r10                       @ E+=F_20_39(B,C,D)
360         ldr     r9,[r14,#15*4]
361         ldr     r10,[r14,#13*4]
362         ldr     r11,[r14,#7*4]
363         add     r3,r8,r3,ror#2                  @ E+=K_xx_xx
364         ldr     r12,[r14,#2*4]
365         eor     r9,r9,r10
366         eor     r11,r11,r12                     @ 1 cycle stall
367         eor     r10,r6,r7                       @ F_xx_xx
368         mov     r9,r9,ror#31
369         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
370         eor     r9,r9,r11,ror#31
371         str     r9,[r14,#-4]!
372         eor r10,r5,r10,ror#2                                    @ F_xx_xx
373                                                 @ F_xx_xx
374         add     r3,r3,r9                        @ E+=X[i]
375         add     r3,r3,r10                       @ E+=F_20_39(B,C,D)
376  ARM(   teq     r14,sp          )       @ preserve carry
377  THUMB( mov     r11,sp          )
378  THUMB( teq     r14,r11         )       @ preserve carry
379         bne     .L_20_39_or_60_79       @ [+((12+3)*5+2)*4]
380         bcs     .L_done                 @ [+((12+3)*5+2)*4], spare 300 bytes
382         ldr     r8,.LK_40_59
383         sub     sp,sp,#20*4             @ [+2]
384 .L_40_59:
385         ldr     r9,[r14,#15*4]
386         ldr     r10,[r14,#13*4]
387         ldr     r11,[r14,#7*4]
388         add     r7,r8,r7,ror#2                  @ E+=K_xx_xx
389         ldr     r12,[r14,#2*4]
390         eor     r9,r9,r10
391         eor     r11,r11,r12                     @ 1 cycle stall
392         eor     r10,r5,r6                       @ F_xx_xx
393         mov     r9,r9,ror#31
394         add     r7,r7,r3,ror#27                 @ E+=ROR(A,27)
395         eor     r9,r9,r11,ror#31
396         str     r9,[r14,#-4]!
397         and r10,r4,r10,ror#2                                    @ F_xx_xx
398         and r11,r5,r6                                   @ F_xx_xx
399         add     r7,r7,r9                        @ E+=X[i]
400         add     r7,r7,r10                       @ E+=F_40_59(B,C,D)
401         add     r7,r7,r11,ror#2
402         ldr     r9,[r14,#15*4]
403         ldr     r10,[r14,#13*4]
404         ldr     r11,[r14,#7*4]
405         add     r6,r8,r6,ror#2                  @ E+=K_xx_xx
406         ldr     r12,[r14,#2*4]
407         eor     r9,r9,r10
408         eor     r11,r11,r12                     @ 1 cycle stall
409         eor     r10,r4,r5                       @ F_xx_xx
410         mov     r9,r9,ror#31
411         add     r6,r6,r7,ror#27                 @ E+=ROR(A,27)
412         eor     r9,r9,r11,ror#31
413         str     r9,[r14,#-4]!
414         and r10,r3,r10,ror#2                                    @ F_xx_xx
415         and r11,r4,r5                                   @ F_xx_xx
416         add     r6,r6,r9                        @ E+=X[i]
417         add     r6,r6,r10                       @ E+=F_40_59(B,C,D)
418         add     r6,r6,r11,ror#2
419         ldr     r9,[r14,#15*4]
420         ldr     r10,[r14,#13*4]
421         ldr     r11,[r14,#7*4]
422         add     r5,r8,r5,ror#2                  @ E+=K_xx_xx
423         ldr     r12,[r14,#2*4]
424         eor     r9,r9,r10
425         eor     r11,r11,r12                     @ 1 cycle stall
426         eor     r10,r3,r4                       @ F_xx_xx
427         mov     r9,r9,ror#31
428         add     r5,r5,r6,ror#27                 @ E+=ROR(A,27)
429         eor     r9,r9,r11,ror#31
430         str     r9,[r14,#-4]!
431         and r10,r7,r10,ror#2                                    @ F_xx_xx
432         and r11,r3,r4                                   @ F_xx_xx
433         add     r5,r5,r9                        @ E+=X[i]
434         add     r5,r5,r10                       @ E+=F_40_59(B,C,D)
435         add     r5,r5,r11,ror#2
436         ldr     r9,[r14,#15*4]
437         ldr     r10,[r14,#13*4]
438         ldr     r11,[r14,#7*4]
439         add     r4,r8,r4,ror#2                  @ E+=K_xx_xx
440         ldr     r12,[r14,#2*4]
441         eor     r9,r9,r10
442         eor     r11,r11,r12                     @ 1 cycle stall
443         eor     r10,r7,r3                       @ F_xx_xx
444         mov     r9,r9,ror#31
445         add     r4,r4,r5,ror#27                 @ E+=ROR(A,27)
446         eor     r9,r9,r11,ror#31
447         str     r9,[r14,#-4]!
448         and r10,r6,r10,ror#2                                    @ F_xx_xx
449         and r11,r7,r3                                   @ F_xx_xx
450         add     r4,r4,r9                        @ E+=X[i]
451         add     r4,r4,r10                       @ E+=F_40_59(B,C,D)
452         add     r4,r4,r11,ror#2
453         ldr     r9,[r14,#15*4]
454         ldr     r10,[r14,#13*4]
455         ldr     r11,[r14,#7*4]
456         add     r3,r8,r3,ror#2                  @ E+=K_xx_xx
457         ldr     r12,[r14,#2*4]
458         eor     r9,r9,r10
459         eor     r11,r11,r12                     @ 1 cycle stall
460         eor     r10,r6,r7                       @ F_xx_xx
461         mov     r9,r9,ror#31
462         add     r3,r3,r4,ror#27                 @ E+=ROR(A,27)
463         eor     r9,r9,r11,ror#31
464         str     r9,[r14,#-4]!
465         and r10,r5,r10,ror#2                                    @ F_xx_xx
466         and r11,r6,r7                                   @ F_xx_xx
467         add     r3,r3,r9                        @ E+=X[i]
468         add     r3,r3,r10                       @ E+=F_40_59(B,C,D)
469         add     r3,r3,r11,ror#2
470         cmp     r14,sp
471         bne     .L_40_59                @ [+((12+5)*5+2)*4]
473         ldr     r8,.LK_60_79
474         sub     sp,sp,#20*4
475         cmp     sp,#0                   @ set carry to denote 60_79
476         b       .L_20_39_or_60_79       @ [+4], spare 300 bytes
477 .L_done:
478         add     sp,sp,#80*4             @ "deallocate" stack frame
479         ldmia   r0,{r8,r9,r10,r11,r12}
480         add     r3,r8,r3
481         add     r4,r9,r4
482         add     r5,r10,r5,ror#2
483         add     r6,r11,r6,ror#2
484         add     r7,r12,r7,ror#2
485         stmia   r0,{r3,r4,r5,r6,r7}
486         teq     r1,r2
487         bne     .Lloop                  @ [+18], total 1307
489         ldmia   sp!,{r4-r12,pc}
490 .align  2
491 .LK_00_19:      .word   0x5a827999
492 .LK_20_39:      .word   0x6ed9eba1
493 .LK_40_59:      .word   0x8f1bbcdc
494 .LK_60_79:      .word   0xca62c1d6
495 ENDPROC(sha1_block_data_order)
496 .asciz  "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
497 .align  2