6 .global sha1_block_data_order
7 .type sha1_block_data_order,%function
10 sha1_block_data_order:
11 #if __ARM_MAX_ARCH__>=7
12 sub r3,pc,#8 @ sha1_block_data_order
13 ldr r12,.LOPENSSL_armcap
14 ldr r12,[r3,r12] @ OPENSSL_armcap_P
21 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
22 ldmia r0,{r3,r4,r5,r6,r7}
29 mov r7,r7,ror#30 @ [6]
35 add r7,r8,r7,ror#2 @ E+=K_00_19
38 eor r10,r5,r6 @ F_xx_xx
40 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
43 ldr r9,[r1],#4 @ handles unaligned
44 add r7,r8,r7,ror#2 @ E+=K_00_19
45 eor r10,r5,r6 @ F_xx_xx
46 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
52 add r7,r7,r9 @ E+=X[i]
53 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
55 add r7,r7,r10 @ E+=F_00_19(B,C,D)
60 add r6,r8,r6,ror#2 @ E+=K_00_19
63 eor r10,r4,r5 @ F_xx_xx
65 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
68 ldr r9,[r1],#4 @ handles unaligned
69 add r6,r8,r6,ror#2 @ E+=K_00_19
70 eor r10,r4,r5 @ F_xx_xx
71 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
77 add r6,r6,r9 @ E+=X[i]
78 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
80 add r6,r6,r10 @ E+=F_00_19(B,C,D)
85 add r5,r8,r5,ror#2 @ E+=K_00_19
88 eor r10,r3,r4 @ F_xx_xx
90 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
93 ldr r9,[r1],#4 @ handles unaligned
94 add r5,r8,r5,ror#2 @ E+=K_00_19
95 eor r10,r3,r4 @ F_xx_xx
96 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
102 add r5,r5,r9 @ E+=X[i]
103 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
105 add r5,r5,r10 @ E+=F_00_19(B,C,D)
110 add r4,r8,r4,ror#2 @ E+=K_00_19
113 eor r10,r7,r3 @ F_xx_xx
115 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
118 ldr r9,[r1],#4 @ handles unaligned
119 add r4,r8,r4,ror#2 @ E+=K_00_19
120 eor r10,r7,r3 @ F_xx_xx
121 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
123 rev r9,r9 @ byte swap
127 add r4,r4,r9 @ E+=X[i]
128 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
130 add r4,r4,r10 @ E+=F_00_19(B,C,D)
135 add r3,r8,r3,ror#2 @ E+=K_00_19
138 eor r10,r6,r7 @ F_xx_xx
140 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
143 ldr r9,[r1],#4 @ handles unaligned
144 add r3,r8,r3,ror#2 @ E+=K_00_19
145 eor r10,r6,r7 @ F_xx_xx
146 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
148 rev r9,r9 @ byte swap
152 add r3,r3,r9 @ E+=X[i]
153 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
155 add r3,r3,r10 @ E+=F_00_19(B,C,D)
157 bne .L_00_15 @ [((11+4)*5+2)*3]
163 add r7,r8,r7,ror#2 @ E+=K_00_19
166 eor r10,r5,r6 @ F_xx_xx
168 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
171 ldr r9,[r1],#4 @ handles unaligned
172 add r7,r8,r7,ror#2 @ E+=K_00_19
173 eor r10,r5,r6 @ F_xx_xx
174 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
176 rev r9,r9 @ byte swap
180 add r7,r7,r9 @ E+=X[i]
181 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
183 add r7,r7,r10 @ E+=F_00_19(B,C,D)
187 add r6,r8,r6,ror#2 @ E+=K_xx_xx
190 eor r11,r11,r12 @ 1 cycle stall
191 eor r10,r4,r5 @ F_xx_xx
193 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
196 and r10,r3,r10,ror#2 @ F_xx_xx
198 add r6,r6,r9 @ E+=X[i]
199 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
200 add r6,r6,r10 @ E+=F_00_19(B,C,D)
204 add r5,r8,r5,ror#2 @ E+=K_xx_xx
207 eor r11,r11,r12 @ 1 cycle stall
208 eor r10,r3,r4 @ F_xx_xx
210 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
213 and r10,r7,r10,ror#2 @ F_xx_xx
215 add r5,r5,r9 @ E+=X[i]
216 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
217 add r5,r5,r10 @ E+=F_00_19(B,C,D)
221 add r4,r8,r4,ror#2 @ E+=K_xx_xx
224 eor r11,r11,r12 @ 1 cycle stall
225 eor r10,r7,r3 @ F_xx_xx
227 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
230 and r10,r6,r10,ror#2 @ F_xx_xx
232 add r4,r4,r9 @ E+=X[i]
233 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
234 add r4,r4,r10 @ E+=F_00_19(B,C,D)
238 add r3,r8,r3,ror#2 @ E+=K_xx_xx
241 eor r11,r11,r12 @ 1 cycle stall
242 eor r10,r6,r7 @ F_xx_xx
244 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
247 and r10,r5,r10,ror#2 @ F_xx_xx
249 add r3,r3,r9 @ E+=X[i]
250 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
251 add r3,r3,r10 @ E+=F_00_19(B,C,D)
253 ldr r8,.LK_20_39 @ [+15+16*4]
254 cmn sp,#0 @ [+3], clear carry to denote 20_39
259 add r7,r8,r7,ror#2 @ E+=K_xx_xx
262 eor r11,r11,r12 @ 1 cycle stall
263 eor r10,r5,r6 @ F_xx_xx
265 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
268 eor r10,r4,r10,ror#2 @ F_xx_xx
270 add r7,r7,r9 @ E+=X[i]
271 add r7,r7,r10 @ E+=F_20_39(B,C,D)
275 add r6,r8,r6,ror#2 @ E+=K_xx_xx
278 eor r11,r11,r12 @ 1 cycle stall
279 eor r10,r4,r5 @ F_xx_xx
281 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
284 eor r10,r3,r10,ror#2 @ F_xx_xx
286 add r6,r6,r9 @ E+=X[i]
287 add r6,r6,r10 @ E+=F_20_39(B,C,D)
291 add r5,r8,r5,ror#2 @ E+=K_xx_xx
294 eor r11,r11,r12 @ 1 cycle stall
295 eor r10,r3,r4 @ F_xx_xx
297 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
300 eor r10,r7,r10,ror#2 @ F_xx_xx
302 add r5,r5,r9 @ E+=X[i]
303 add r5,r5,r10 @ E+=F_20_39(B,C,D)
307 add r4,r8,r4,ror#2 @ E+=K_xx_xx
310 eor r11,r11,r12 @ 1 cycle stall
311 eor r10,r7,r3 @ F_xx_xx
313 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
316 eor r10,r6,r10,ror#2 @ F_xx_xx
318 add r4,r4,r9 @ E+=X[i]
319 add r4,r4,r10 @ E+=F_20_39(B,C,D)
323 add r3,r8,r3,ror#2 @ E+=K_xx_xx
326 eor r11,r11,r12 @ 1 cycle stall
327 eor r10,r6,r7 @ F_xx_xx
329 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
332 eor r10,r5,r10,ror#2 @ F_xx_xx
334 add r3,r3,r9 @ E+=X[i]
335 add r3,r3,r10 @ E+=F_20_39(B,C,D)
336 teq r14,sp @ preserve carry
337 bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
338 bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
341 sub sp,sp,#20*4 @ [+2]
346 add r7,r8,r7,ror#2 @ E+=K_xx_xx
349 eor r11,r11,r12 @ 1 cycle stall
350 eor r10,r5,r6 @ F_xx_xx
352 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
355 and r10,r4,r10,ror#2 @ F_xx_xx
356 and r11,r5,r6 @ F_xx_xx
357 add r7,r7,r9 @ E+=X[i]
358 add r7,r7,r10 @ E+=F_40_59(B,C,D)
363 add r6,r8,r6,ror#2 @ E+=K_xx_xx
366 eor r11,r11,r12 @ 1 cycle stall
367 eor r10,r4,r5 @ F_xx_xx
369 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
372 and r10,r3,r10,ror#2 @ F_xx_xx
373 and r11,r4,r5 @ F_xx_xx
374 add r6,r6,r9 @ E+=X[i]
375 add r6,r6,r10 @ E+=F_40_59(B,C,D)
380 add r5,r8,r5,ror#2 @ E+=K_xx_xx
383 eor r11,r11,r12 @ 1 cycle stall
384 eor r10,r3,r4 @ F_xx_xx
386 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
389 and r10,r7,r10,ror#2 @ F_xx_xx
390 and r11,r3,r4 @ F_xx_xx
391 add r5,r5,r9 @ E+=X[i]
392 add r5,r5,r10 @ E+=F_40_59(B,C,D)
397 add r4,r8,r4,ror#2 @ E+=K_xx_xx
400 eor r11,r11,r12 @ 1 cycle stall
401 eor r10,r7,r3 @ F_xx_xx
403 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
406 and r10,r6,r10,ror#2 @ F_xx_xx
407 and r11,r7,r3 @ F_xx_xx
408 add r4,r4,r9 @ E+=X[i]
409 add r4,r4,r10 @ E+=F_40_59(B,C,D)
414 add r3,r8,r3,ror#2 @ E+=K_xx_xx
417 eor r11,r11,r12 @ 1 cycle stall
418 eor r10,r6,r7 @ F_xx_xx
420 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
423 and r10,r5,r10,ror#2 @ F_xx_xx
424 and r11,r6,r7 @ F_xx_xx
425 add r3,r3,r9 @ E+=X[i]
426 add r3,r3,r10 @ E+=F_40_59(B,C,D)
429 bne .L_40_59 @ [+((12+5)*5+2)*4]
433 cmp sp,#0 @ set carry to denote 60_79
434 b .L_20_39_or_60_79 @ [+4], spare 300 bytes
436 add sp,sp,#80*4 @ "deallocate" stack frame
437 ldmia r0,{r8,r9,r10,r11,r12}
443 stmia r0,{r3,r4,r5,r6,r7}
445 bne .Lloop @ [+18], total 1307
448 ldmia sp!,{r4-r12,pc}
450 ldmia sp!,{r4-r12,lr}
452 moveq pc,lr @ be binary compatible with V4, yet
453 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
455 .size sha1_block_data_order,.-sha1_block_data_order
458 .LK_00_19: .word 0x5a827999
459 .LK_20_39: .word 0x6ed9eba1
460 .LK_40_59: .word 0x8f1bbcdc
461 .LK_60_79: .word 0xca62c1d6
462 #if __ARM_MAX_ARCH__>=7
464 .word OPENSSL_armcap_P-sha1_block_data_order
466 .asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
468 #if __ARM_MAX_ARCH__>=7
472 .type sha1_block_data_order_neon,%function
474 sha1_block_data_order_neon:
476 stmdb sp!,{r4-r12,lr}
477 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
478 @ dmb @ errata #451034 on early Cortex A8
479 @ vstmdb sp!,{d8-d15} @ ABI specification says so
481 sub sp,sp,#64 @ alloca
483 bic sp,sp,#15 @ align for 128-bit stores
485 ldmia r0,{r3,r4,r5,r6,r7} @ load context
488 vld1.8 {q0-q1},[r1]! @ handles unaligned
491 vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19
492 vrev32.8 q0,q0 @ yes, even on
493 vrev32.8 q1,q1 @ big-endian...
498 vst1.32 {q8},[r12,:128]!
500 vst1.32 {q9},[r12,:128]!
501 vst1.32 {q10},[r12,:128]!
502 ldr r9,[sp] @ big RAW stall
525 vst1.32 {q13},[r12,:128]!
529 vext.8 q13,q15,q12,#4
558 vld1.32 {d28[],d29[]},[r8,:32]!
573 vst1.32 {q13},[r12,:128]!
576 vext.8 q13,q15,q12,#4
619 vst1.32 {q13},[r12,:128]!
622 vext.8 q13,q15,q12,#4
652 vext.8 q12,q10,q15,#4
665 vst1.32 {q13},[r12,:128]!
668 vext.8 q13,q15,q12,#4
691 vext.8 q12,q10,q11,#8
710 vst1.32 {q13},[r12,:128]!
751 vst1.32 {q13},[r12,:128]!
780 vld1.32 {d28[],d29[]},[r8,:32]!
788 vst1.32 {q13},[r12,:128]!
824 vst1.32 {q13},[r12,:128]!
860 vst1.32 {q13},[r12,:128]!
897 vst1.32 {q13},[r12,:128]!
933 vst1.32 {q13},[r12,:128]!
970 vld1.32 {d28[],d29[]},[r8,:32]!
978 vst1.32 {q13},[r12,:128]!
1003 vext.8 q12,q10,q11,#8
1013 vadd.i32 q13,q11,q14
1022 vst1.32 {q13},[r12,:128]!
1048 vext.8 q12,q11,q0,#8
1067 vst1.32 {q13},[r12,:128]!
1111 vst1.32 {q13},[r12,:128]!
1155 vst1.32 {q13},[r12,:128]!
1175 vst1.32 {q13},[r12,:128]!
1180 vld1.8 {q0-q1},[r1]!
1183 vld1.8 {q2-q3},[r1]!
1186 vld1.32 {d28[],d29[]},[r8,:32]!
1216 vst1.32 {q8},[r12,:128]!
1247 vst1.32 {q9},[r12,:128]!
1278 vst1.32 {q10},[r12,:128]!
1302 ldmia r0,{r9,r10,r11,r12} @ accumulate context
1311 stmia r0,{r3,r4,r5,r6,r7}
1315 @ vldmia sp!,{d8-d15}
1316 ldmia sp!,{r4-r12,pc}
1317 .size sha1_block_data_order_neon,.-sha1_block_data_order_neon
1319 #if __ARM_MAX_ARCH__>=7
1320 .type sha1_block_data_order_armv8,%function
1322 sha1_block_data_order_armv8:
1324 vstmdb sp!,{d8-d15} @ ABI specification says so
1329 vld1.32 {d2[0]},[r0]
1331 vld1.32 {d16[],d17[]},[r3,:32]!
1332 vld1.32 {d18[],d19[]},[r3,:32]!
1333 vld1.32 {d20[],d21[]},[r3,:32]!
1334 vld1.32 {d22[],d23[]},[r3,:32]
1337 vld1.8 {q4-q5},[r1]!
1338 vld1.8 {q6-q7},[r1]!
1344 vmov q14,q0 @ offload
1349 .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 0
1350 .byte 0x68,0x0c,0x02,0xf2 @ sha1c q0,q1,q12
1352 .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
1353 .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 1
1354 .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13
1356 .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
1357 .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
1358 .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 2
1359 .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12
1361 .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
1362 .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
1363 .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 3
1364 .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13
1366 .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
1367 .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
1368 .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 4
1369 .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12
1371 .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
1372 .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
1373 .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 5
1374 .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
1376 .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
1377 .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
1378 .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 6
1379 .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
1381 .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
1382 .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
1383 .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 7
1384 .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
1386 .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
1387 .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
1388 .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 8
1389 .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
1391 .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
1392 .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
1393 .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 9
1394 .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
1396 .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
1397 .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
1398 .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 10
1399 .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12
1401 .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
1402 .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
1403 .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 11
1404 .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13
1406 .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
1407 .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
1408 .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 12
1409 .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12
1411 .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
1412 .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
1413 .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 13
1414 .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13
1416 .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
1417 .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
1418 .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 14
1419 .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12
1421 .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
1422 .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
1423 .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 15
1424 .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
1426 .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
1427 .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
1428 .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 16
1429 .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
1431 .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
1432 .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 17
1433 .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
1436 .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 18
1437 .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
1439 .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 19
1440 .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
1447 vst1.32 {d2[0]},[r0]
1451 .size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
1453 #if __ARM_MAX_ARCH__>=7
1454 .comm OPENSSL_armcap_P,4,4
1455 .hidden OPENSSL_armcap_P