7 .global sha1_block_data_order
8 .type sha1_block_data_order,%function
11 sha1_block_data_order:
12 #if __ARM_MAX_ARCH__>=7
13 sub r3,pc,#8 @ sha1_block_data_order
14 ldr r12,.LOPENSSL_armcap
15 ldr r12,[r3,r12] @ OPENSSL_armcap_P
22 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
23 ldmia r0,{r3,r4,r5,r6,r7}
30 mov r7,r7,ror#30 @ [6]
36 add r7,r8,r7,ror#2 @ E+=K_00_19
39 eor r10,r5,r6 @ F_xx_xx
41 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
44 ldr r9,[r1],#4 @ handles unaligned
45 add r7,r8,r7,ror#2 @ E+=K_00_19
46 eor r10,r5,r6 @ F_xx_xx
47 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
53 add r7,r7,r9 @ E+=X[i]
54 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
56 add r7,r7,r10 @ E+=F_00_19(B,C,D)
61 add r6,r8,r6,ror#2 @ E+=K_00_19
64 eor r10,r4,r5 @ F_xx_xx
66 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
69 ldr r9,[r1],#4 @ handles unaligned
70 add r6,r8,r6,ror#2 @ E+=K_00_19
71 eor r10,r4,r5 @ F_xx_xx
72 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
78 add r6,r6,r9 @ E+=X[i]
79 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
81 add r6,r6,r10 @ E+=F_00_19(B,C,D)
86 add r5,r8,r5,ror#2 @ E+=K_00_19
89 eor r10,r3,r4 @ F_xx_xx
91 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
94 ldr r9,[r1],#4 @ handles unaligned
95 add r5,r8,r5,ror#2 @ E+=K_00_19
96 eor r10,r3,r4 @ F_xx_xx
97 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
103 add r5,r5,r9 @ E+=X[i]
104 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
106 add r5,r5,r10 @ E+=F_00_19(B,C,D)
111 add r4,r8,r4,ror#2 @ E+=K_00_19
114 eor r10,r7,r3 @ F_xx_xx
116 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
119 ldr r9,[r1],#4 @ handles unaligned
120 add r4,r8,r4,ror#2 @ E+=K_00_19
121 eor r10,r7,r3 @ F_xx_xx
122 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
124 rev r9,r9 @ byte swap
128 add r4,r4,r9 @ E+=X[i]
129 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
131 add r4,r4,r10 @ E+=F_00_19(B,C,D)
136 add r3,r8,r3,ror#2 @ E+=K_00_19
139 eor r10,r6,r7 @ F_xx_xx
141 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
144 ldr r9,[r1],#4 @ handles unaligned
145 add r3,r8,r3,ror#2 @ E+=K_00_19
146 eor r10,r6,r7 @ F_xx_xx
147 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
149 rev r9,r9 @ byte swap
153 add r3,r3,r9 @ E+=X[i]
154 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
156 add r3,r3,r10 @ E+=F_00_19(B,C,D)
158 bne .L_00_15 @ [((11+4)*5+2)*3]
164 add r7,r8,r7,ror#2 @ E+=K_00_19
167 eor r10,r5,r6 @ F_xx_xx
169 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
172 ldr r9,[r1],#4 @ handles unaligned
173 add r7,r8,r7,ror#2 @ E+=K_00_19
174 eor r10,r5,r6 @ F_xx_xx
175 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
177 rev r9,r9 @ byte swap
181 add r7,r7,r9 @ E+=X[i]
182 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
184 add r7,r7,r10 @ E+=F_00_19(B,C,D)
188 add r6,r8,r6,ror#2 @ E+=K_xx_xx
191 eor r11,r11,r12 @ 1 cycle stall
192 eor r10,r4,r5 @ F_xx_xx
194 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
197 and r10,r3,r10,ror#2 @ F_xx_xx
199 add r6,r6,r9 @ E+=X[i]
200 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
201 add r6,r6,r10 @ E+=F_00_19(B,C,D)
205 add r5,r8,r5,ror#2 @ E+=K_xx_xx
208 eor r11,r11,r12 @ 1 cycle stall
209 eor r10,r3,r4 @ F_xx_xx
211 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
214 and r10,r7,r10,ror#2 @ F_xx_xx
216 add r5,r5,r9 @ E+=X[i]
217 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
218 add r5,r5,r10 @ E+=F_00_19(B,C,D)
222 add r4,r8,r4,ror#2 @ E+=K_xx_xx
225 eor r11,r11,r12 @ 1 cycle stall
226 eor r10,r7,r3 @ F_xx_xx
228 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
231 and r10,r6,r10,ror#2 @ F_xx_xx
233 add r4,r4,r9 @ E+=X[i]
234 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
235 add r4,r4,r10 @ E+=F_00_19(B,C,D)
239 add r3,r8,r3,ror#2 @ E+=K_xx_xx
242 eor r11,r11,r12 @ 1 cycle stall
243 eor r10,r6,r7 @ F_xx_xx
245 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
248 and r10,r5,r10,ror#2 @ F_xx_xx
250 add r3,r3,r9 @ E+=X[i]
251 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
252 add r3,r3,r10 @ E+=F_00_19(B,C,D)
254 ldr r8,.LK_20_39 @ [+15+16*4]
255 cmn sp,#0 @ [+3], clear carry to denote 20_39
260 add r7,r8,r7,ror#2 @ E+=K_xx_xx
263 eor r11,r11,r12 @ 1 cycle stall
264 eor r10,r5,r6 @ F_xx_xx
266 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
269 eor r10,r4,r10,ror#2 @ F_xx_xx
271 add r7,r7,r9 @ E+=X[i]
272 add r7,r7,r10 @ E+=F_20_39(B,C,D)
276 add r6,r8,r6,ror#2 @ E+=K_xx_xx
279 eor r11,r11,r12 @ 1 cycle stall
280 eor r10,r4,r5 @ F_xx_xx
282 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
285 eor r10,r3,r10,ror#2 @ F_xx_xx
287 add r6,r6,r9 @ E+=X[i]
288 add r6,r6,r10 @ E+=F_20_39(B,C,D)
292 add r5,r8,r5,ror#2 @ E+=K_xx_xx
295 eor r11,r11,r12 @ 1 cycle stall
296 eor r10,r3,r4 @ F_xx_xx
298 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
301 eor r10,r7,r10,ror#2 @ F_xx_xx
303 add r5,r5,r9 @ E+=X[i]
304 add r5,r5,r10 @ E+=F_20_39(B,C,D)
308 add r4,r8,r4,ror#2 @ E+=K_xx_xx
311 eor r11,r11,r12 @ 1 cycle stall
312 eor r10,r7,r3 @ F_xx_xx
314 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
317 eor r10,r6,r10,ror#2 @ F_xx_xx
319 add r4,r4,r9 @ E+=X[i]
320 add r4,r4,r10 @ E+=F_20_39(B,C,D)
324 add r3,r8,r3,ror#2 @ E+=K_xx_xx
327 eor r11,r11,r12 @ 1 cycle stall
328 eor r10,r6,r7 @ F_xx_xx
330 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
333 eor r10,r5,r10,ror#2 @ F_xx_xx
335 add r3,r3,r9 @ E+=X[i]
336 add r3,r3,r10 @ E+=F_20_39(B,C,D)
337 teq r14,sp @ preserve carry
338 bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
339 bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
342 sub sp,sp,#20*4 @ [+2]
347 add r7,r8,r7,ror#2 @ E+=K_xx_xx
350 eor r11,r11,r12 @ 1 cycle stall
351 eor r10,r5,r6 @ F_xx_xx
353 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
356 and r10,r4,r10,ror#2 @ F_xx_xx
357 and r11,r5,r6 @ F_xx_xx
358 add r7,r7,r9 @ E+=X[i]
359 add r7,r7,r10 @ E+=F_40_59(B,C,D)
364 add r6,r8,r6,ror#2 @ E+=K_xx_xx
367 eor r11,r11,r12 @ 1 cycle stall
368 eor r10,r4,r5 @ F_xx_xx
370 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
373 and r10,r3,r10,ror#2 @ F_xx_xx
374 and r11,r4,r5 @ F_xx_xx
375 add r6,r6,r9 @ E+=X[i]
376 add r6,r6,r10 @ E+=F_40_59(B,C,D)
381 add r5,r8,r5,ror#2 @ E+=K_xx_xx
384 eor r11,r11,r12 @ 1 cycle stall
385 eor r10,r3,r4 @ F_xx_xx
387 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
390 and r10,r7,r10,ror#2 @ F_xx_xx
391 and r11,r3,r4 @ F_xx_xx
392 add r5,r5,r9 @ E+=X[i]
393 add r5,r5,r10 @ E+=F_40_59(B,C,D)
398 add r4,r8,r4,ror#2 @ E+=K_xx_xx
401 eor r11,r11,r12 @ 1 cycle stall
402 eor r10,r7,r3 @ F_xx_xx
404 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
407 and r10,r6,r10,ror#2 @ F_xx_xx
408 and r11,r7,r3 @ F_xx_xx
409 add r4,r4,r9 @ E+=X[i]
410 add r4,r4,r10 @ E+=F_40_59(B,C,D)
415 add r3,r8,r3,ror#2 @ E+=K_xx_xx
418 eor r11,r11,r12 @ 1 cycle stall
419 eor r10,r6,r7 @ F_xx_xx
421 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
424 and r10,r5,r10,ror#2 @ F_xx_xx
425 and r11,r6,r7 @ F_xx_xx
426 add r3,r3,r9 @ E+=X[i]
427 add r3,r3,r10 @ E+=F_40_59(B,C,D)
430 bne .L_40_59 @ [+((12+5)*5+2)*4]
434 cmp sp,#0 @ set carry to denote 60_79
435 b .L_20_39_or_60_79 @ [+4], spare 300 bytes
437 add sp,sp,#80*4 @ "deallocate" stack frame
438 ldmia r0,{r8,r9,r10,r11,r12}
444 stmia r0,{r3,r4,r5,r6,r7}
446 bne .Lloop @ [+18], total 1307
449 ldmia sp!,{r4-r12,pc}
451 ldmia sp!,{r4-r12,lr}
453 moveq pc,lr @ be binary compatible with V4, yet
454 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
456 .size sha1_block_data_order,.-sha1_block_data_order
459 .LK_00_19: .word 0x5a827999
460 .LK_20_39: .word 0x6ed9eba1
461 .LK_40_59: .word 0x8f1bbcdc
462 .LK_60_79: .word 0xca62c1d6
463 #if __ARM_MAX_ARCH__>=7
465 .word OPENSSL_armcap_P-sha1_block_data_order
467 .asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
469 #if __ARM_MAX_ARCH__>=7
473 .type sha1_block_data_order_neon,%function
475 sha1_block_data_order_neon:
477 stmdb sp!,{r4-r12,lr}
478 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
479 @ dmb @ errata #451034 on early Cortex A8
480 @ vstmdb sp!,{d8-d15} @ ABI specification says so
482 sub sp,sp,#64 @ alloca
484 bic sp,sp,#15 @ align for 128-bit stores
486 ldmia r0,{r3,r4,r5,r6,r7} @ load context
489 vld1.8 {q0-q1},[r1]! @ handles unaligned
492 vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19
493 vrev32.8 q0,q0 @ yes, even on
494 vrev32.8 q1,q1 @ big-endian...
499 vst1.32 {q8},[r12,:128]!
501 vst1.32 {q9},[r12,:128]!
502 vst1.32 {q10},[r12,:128]!
503 ldr r9,[sp] @ big RAW stall
526 vst1.32 {q13},[r12,:128]!
530 vext.8 q13,q15,q12,#4
559 vld1.32 {d28[],d29[]},[r8,:32]!
574 vst1.32 {q13},[r12,:128]!
577 vext.8 q13,q15,q12,#4
620 vst1.32 {q13},[r12,:128]!
623 vext.8 q13,q15,q12,#4
653 vext.8 q12,q10,q15,#4
666 vst1.32 {q13},[r12,:128]!
669 vext.8 q13,q15,q12,#4
692 vext.8 q12,q10,q11,#8
711 vst1.32 {q13},[r12,:128]!
752 vst1.32 {q13},[r12,:128]!
781 vld1.32 {d28[],d29[]},[r8,:32]!
789 vst1.32 {q13},[r12,:128]!
825 vst1.32 {q13},[r12,:128]!
861 vst1.32 {q13},[r12,:128]!
898 vst1.32 {q13},[r12,:128]!
934 vst1.32 {q13},[r12,:128]!
971 vld1.32 {d28[],d29[]},[r8,:32]!
979 vst1.32 {q13},[r12,:128]!
1004 vext.8 q12,q10,q11,#8
1014 vadd.i32 q13,q11,q14
1023 vst1.32 {q13},[r12,:128]!
1049 vext.8 q12,q11,q0,#8
1068 vst1.32 {q13},[r12,:128]!
1112 vst1.32 {q13},[r12,:128]!
1156 vst1.32 {q13},[r12,:128]!
1176 vst1.32 {q13},[r12,:128]!
1181 vld1.8 {q0-q1},[r1]!
1184 vld1.8 {q2-q3},[r1]!
1187 vld1.32 {d28[],d29[]},[r8,:32]!
1217 vst1.32 {q8},[r12,:128]!
1248 vst1.32 {q9},[r12,:128]!
1279 vst1.32 {q10},[r12,:128]!
1303 ldmia r0,{r9,r10,r11,r12} @ accumulate context
1312 stmia r0,{r3,r4,r5,r6,r7}
1316 @ vldmia sp!,{d8-d15}
1317 ldmia sp!,{r4-r12,pc}
1318 .size sha1_block_data_order_neon,.-sha1_block_data_order_neon
1320 #if __ARM_MAX_ARCH__>=7
1321 .type sha1_block_data_order_armv8,%function
1323 sha1_block_data_order_armv8:
1325 vstmdb sp!,{d8-d15} @ ABI specification says so
1330 vld1.32 {d2[0]},[r0]
1332 vld1.32 {d16[],d17[]},[r3,:32]!
1333 vld1.32 {d18[],d19[]},[r3,:32]!
1334 vld1.32 {d20[],d21[]},[r3,:32]!
1335 vld1.32 {d22[],d23[]},[r3,:32]
1338 vld1.8 {q4-q5},[r1]!
1339 vld1.8 {q6-q7},[r1]!
1345 vmov q14,q0 @ offload
1350 .inst 0xf3b962c0 @ sha1h q3,q0 @ 0
1351 .inst 0xf2020c68 @ sha1c q0,q1,q12
1353 .inst 0xf23a8c4c @ sha1su0 q4,q5,q6
1354 .inst 0xf3b942c0 @ sha1h q2,q0 @ 1
1355 .inst 0xf2060c6a @ sha1c q0,q3,q13
1357 .inst 0xf3ba838e @ sha1su1 q4,q7
1358 .inst 0xf23cac4e @ sha1su0 q5,q6,q7
1359 .inst 0xf3b962c0 @ sha1h q3,q0 @ 2
1360 .inst 0xf2040c68 @ sha1c q0,q2,q12
1362 .inst 0xf3baa388 @ sha1su1 q5,q4
1363 .inst 0xf23ecc48 @ sha1su0 q6,q7,q4
1364 .inst 0xf3b942c0 @ sha1h q2,q0 @ 3
1365 .inst 0xf2060c6a @ sha1c q0,q3,q13
1367 .inst 0xf3bac38a @ sha1su1 q6,q5
1368 .inst 0xf238ec4a @ sha1su0 q7,q4,q5
1369 .inst 0xf3b962c0 @ sha1h q3,q0 @ 4
1370 .inst 0xf2040c68 @ sha1c q0,q2,q12
1372 .inst 0xf3bae38c @ sha1su1 q7,q6
1373 .inst 0xf23a8c4c @ sha1su0 q4,q5,q6
1374 .inst 0xf3b942c0 @ sha1h q2,q0 @ 5
1375 .inst 0xf2160c6a @ sha1p q0,q3,q13
1377 .inst 0xf3ba838e @ sha1su1 q4,q7
1378 .inst 0xf23cac4e @ sha1su0 q5,q6,q7
1379 .inst 0xf3b962c0 @ sha1h q3,q0 @ 6
1380 .inst 0xf2140c68 @ sha1p q0,q2,q12
1382 .inst 0xf3baa388 @ sha1su1 q5,q4
1383 .inst 0xf23ecc48 @ sha1su0 q6,q7,q4
1384 .inst 0xf3b942c0 @ sha1h q2,q0 @ 7
1385 .inst 0xf2160c6a @ sha1p q0,q3,q13
1387 .inst 0xf3bac38a @ sha1su1 q6,q5
1388 .inst 0xf238ec4a @ sha1su0 q7,q4,q5
1389 .inst 0xf3b962c0 @ sha1h q3,q0 @ 8
1390 .inst 0xf2140c68 @ sha1p q0,q2,q12
1392 .inst 0xf3bae38c @ sha1su1 q7,q6
1393 .inst 0xf23a8c4c @ sha1su0 q4,q5,q6
1394 .inst 0xf3b942c0 @ sha1h q2,q0 @ 9
1395 .inst 0xf2160c6a @ sha1p q0,q3,q13
1397 .inst 0xf3ba838e @ sha1su1 q4,q7
1398 .inst 0xf23cac4e @ sha1su0 q5,q6,q7
1399 .inst 0xf3b962c0 @ sha1h q3,q0 @ 10
1400 .inst 0xf2240c68 @ sha1m q0,q2,q12
1402 .inst 0xf3baa388 @ sha1su1 q5,q4
1403 .inst 0xf23ecc48 @ sha1su0 q6,q7,q4
1404 .inst 0xf3b942c0 @ sha1h q2,q0 @ 11
1405 .inst 0xf2260c6a @ sha1m q0,q3,q13
1407 .inst 0xf3bac38a @ sha1su1 q6,q5
1408 .inst 0xf238ec4a @ sha1su0 q7,q4,q5
1409 .inst 0xf3b962c0 @ sha1h q3,q0 @ 12
1410 .inst 0xf2240c68 @ sha1m q0,q2,q12
1412 .inst 0xf3bae38c @ sha1su1 q7,q6
1413 .inst 0xf23a8c4c @ sha1su0 q4,q5,q6
1414 .inst 0xf3b942c0 @ sha1h q2,q0 @ 13
1415 .inst 0xf2260c6a @ sha1m q0,q3,q13
1417 .inst 0xf3ba838e @ sha1su1 q4,q7
1418 .inst 0xf23cac4e @ sha1su0 q5,q6,q7
1419 .inst 0xf3b962c0 @ sha1h q3,q0 @ 14
1420 .inst 0xf2240c68 @ sha1m q0,q2,q12
1422 .inst 0xf3baa388 @ sha1su1 q5,q4
1423 .inst 0xf23ecc48 @ sha1su0 q6,q7,q4
1424 .inst 0xf3b942c0 @ sha1h q2,q0 @ 15
1425 .inst 0xf2160c6a @ sha1p q0,q3,q13
1427 .inst 0xf3bac38a @ sha1su1 q6,q5
1428 .inst 0xf238ec4a @ sha1su0 q7,q4,q5
1429 .inst 0xf3b962c0 @ sha1h q3,q0 @ 16
1430 .inst 0xf2140c68 @ sha1p q0,q2,q12
1432 .inst 0xf3bae38c @ sha1su1 q7,q6
1433 .inst 0xf3b942c0 @ sha1h q2,q0 @ 17
1434 .inst 0xf2160c6a @ sha1p q0,q3,q13
1437 .inst 0xf3b962c0 @ sha1h q3,q0 @ 18
1438 .inst 0xf2140c68 @ sha1p q0,q2,q12
1440 .inst 0xf3b942c0 @ sha1h q2,q0 @ 19
1441 .inst 0xf2160c6a @ sha1p q0,q3,q13
1448 vst1.32 {d2[0]},[r0]
1452 .size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
1454 #if __ARM_MAX_ARCH__>=7
1455 .comm OPENSSL_armcap_P,4,4