2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 /* included by aes-ce.S and aes-neon.S */
17 * There are several ways to instantiate this code:
18 * - no interleave, all inline
19 * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
20 * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
21 * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
22 * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
24 * Macros imported by this code:
25 * - enc_prepare - setup NEON registers for encryption
26 * - dec_prepare - setup NEON registers for decryption
27 * - enc_switch_key - change to new key after having prepared for encryption
28 * - encrypt_block - encrypt a single block
29 * - decrypt block - decrypt a single block
30 * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
31 * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
32 * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
33 * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
36 #if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
37 #define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
38 #define FRAME_POP ldp x29, x30, [sp],#16
43 encrypt_block2x v0, v1, w3, x2, x6, w7
45 ENDPROC(aes_encrypt_block2x)
48 decrypt_block2x v0, v1, w3, x2, x6, w7
50 ENDPROC(aes_decrypt_block2x)
55 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
57 ENDPROC(aes_encrypt_block4x)
60 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
62 ENDPROC(aes_decrypt_block4x)
65 #error INTERLEAVE should equal 2 or 4
68 .macro do_encrypt_block2x
69 bl aes_encrypt_block2x
72 .macro do_decrypt_block2x
73 bl aes_decrypt_block2x
76 .macro do_encrypt_block4x
77 bl aes_encrypt_block4x
80 .macro do_decrypt_block4x
81 bl aes_decrypt_block4x
88 .macro do_encrypt_block2x
89 encrypt_block2x v0, v1, w3, x2, x6, w7
92 .macro do_decrypt_block2x
93 decrypt_block2x v0, v1, w3, x2, x6, w7
96 .macro do_encrypt_block4x
97 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
100 .macro do_decrypt_block4x
101 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
107 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
108 * int blocks, int first)
109 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
110 * int blocks, int first)
113 AES_ENTRY(aes_ecb_encrypt)
115 cbz w5, .LecbencloopNx
117 enc_prepare w3, x2, x5
121 subs w4, w4, #INTERLEAVE
124 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
126 st1 {v0.16b-v1.16b}, [x0], #32
128 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
130 st1 {v0.16b-v3.16b}, [x0], #64
134 adds w4, w4, #INTERLEAVE
138 ld1 {v0.16b}, [x1], #16 /* get next pt block */
139 encrypt_block v0, w3, x2, x5, w6
140 st1 {v0.16b}, [x0], #16
146 AES_ENDPROC(aes_ecb_encrypt)
149 AES_ENTRY(aes_ecb_decrypt)
151 cbz w5, .LecbdecloopNx
153 dec_prepare w3, x2, x5
157 subs w4, w4, #INTERLEAVE
160 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
162 st1 {v0.16b-v1.16b}, [x0], #32
164 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
166 st1 {v0.16b-v3.16b}, [x0], #64
170 adds w4, w4, #INTERLEAVE
174 ld1 {v0.16b}, [x1], #16 /* get next ct block */
175 decrypt_block v0, w3, x2, x5, w6
176 st1 {v0.16b}, [x0], #16
182 AES_ENDPROC(aes_ecb_decrypt)
186 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
187 * int blocks, u8 iv[], int first)
188 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
189 * int blocks, u8 iv[], int first)
192 AES_ENTRY(aes_cbc_encrypt)
195 ld1 {v0.16b}, [x5] /* get iv */
196 enc_prepare w3, x2, x6
199 ld1 {v1.16b}, [x1], #16 /* get next pt block */
200 eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
201 encrypt_block v0, w3, x2, x6, w7
202 st1 {v0.16b}, [x0], #16
205 st1 {v0.16b}, [x5] /* return iv */
207 AES_ENDPROC(aes_cbc_encrypt)
210 AES_ENTRY(aes_cbc_decrypt)
212 cbz w6, .LcbcdecloopNx
214 ld1 {v7.16b}, [x5] /* get iv */
215 dec_prepare w3, x2, x6
219 subs w4, w4, #INTERLEAVE
222 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
226 eor v0.16b, v0.16b, v7.16b
227 eor v1.16b, v1.16b, v2.16b
229 st1 {v0.16b-v1.16b}, [x0], #32
231 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
237 eor v0.16b, v0.16b, v7.16b
238 eor v1.16b, v1.16b, v4.16b
239 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
240 eor v2.16b, v2.16b, v5.16b
241 eor v3.16b, v3.16b, v6.16b
242 st1 {v0.16b-v3.16b}, [x0], #64
246 adds w4, w4, #INTERLEAVE
250 ld1 {v1.16b}, [x1], #16 /* get next ct block */
251 mov v0.16b, v1.16b /* ...and copy to v0 */
252 decrypt_block v0, w3, x2, x6, w7
253 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
254 mov v7.16b, v1.16b /* ct is next iv */
255 st1 {v0.16b}, [x0], #16
260 st1 {v7.16b}, [x5] /* return iv */
262 AES_ENDPROC(aes_cbc_decrypt)
266 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
267 * int blocks, u8 ctr[], int first)
270 AES_ENTRY(aes_ctr_encrypt)
272 cbz w6, .Lctrnotfirst /* 1st time around? */
273 enc_prepare w3, x2, x6
277 umov x8, v4.d[1] /* keep swabbed ctr in reg */
280 cmn w8, w4 /* 32 bit overflow? */
283 subs w4, w4, #INTERLEAVE
294 ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
296 eor v0.16b, v0.16b, v2.16b
297 eor v1.16b, v1.16b, v3.16b
298 st1 {v0.16b-v1.16b}, [x0], #32
300 ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
303 add v7.4s, v7.4s, v8.4s
311 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
313 eor v0.16b, v5.16b, v0.16b
314 ld1 {v5.16b}, [x1], #16 /* get 1 input block */
315 eor v1.16b, v6.16b, v1.16b
316 eor v2.16b, v7.16b, v2.16b
317 eor v3.16b, v5.16b, v3.16b
318 st1 {v0.16b-v3.16b}, [x0], #64
319 add x8, x8, #INTERLEAVE
326 adds w4, w4, #INTERLEAVE
331 encrypt_block v0, w3, x2, x6, w7
333 adds x8, x8, #1 /* increment BE ctr */
336 bcs .Lctrcarry /* overflow? */
340 bmi .Lctrtailblock /* blocks <0 means tail block */
341 ld1 {v3.16b}, [x1], #16
342 eor v3.16b, v0.16b, v3.16b
343 st1 {v3.16b}, [x0], #16
347 st1 {v4.16b}, [x5] /* return next CTR value */
357 umov x7, v4.d[0] /* load upper word of ctr */
358 rev x7, x7 /* ... to handle the carry */
363 AES_ENDPROC(aes_ctr_encrypt)
368 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
369 * int blocks, u8 const rk2[], u8 iv[], int first)
370 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
371 * int blocks, u8 const rk2[], u8 iv[], int first)
374 .macro next_tweak, out, in, const, tmp
375 sshr \tmp\().2d, \in\().2d, #63
376 and \tmp\().16b, \tmp\().16b, \const\().16b
377 add \out\().2d, \in\().2d, \in\().2d
378 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
379 eor \out\().16b, \out\().16b, \tmp\().16b
383 CPU_LE( .quad 1, 0x87 )
384 CPU_BE( .quad 0x87, 1 )
386 AES_ENTRY(aes_xts_encrypt)
388 cbz w7, .LxtsencloopNx
391 enc_prepare w3, x5, x6
392 encrypt_block v4, w3, x5, x6, w7 /* first tweak */
393 enc_switch_key w3, x2, x6
399 next_tweak v4, v4, v7, v8
402 subs w4, w4, #INTERLEAVE
405 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
406 next_tweak v5, v4, v7, v8
407 eor v0.16b, v0.16b, v4.16b
408 eor v1.16b, v1.16b, v5.16b
410 eor v0.16b, v0.16b, v4.16b
411 eor v1.16b, v1.16b, v5.16b
412 st1 {v0.16b-v1.16b}, [x0], #32
413 cbz w4, .LxtsencoutNx
414 next_tweak v4, v5, v7, v8
420 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
421 next_tweak v5, v4, v7, v8
422 eor v0.16b, v0.16b, v4.16b
423 next_tweak v6, v5, v7, v8
424 eor v1.16b, v1.16b, v5.16b
425 eor v2.16b, v2.16b, v6.16b
426 next_tweak v7, v6, v7, v8
427 eor v3.16b, v3.16b, v7.16b
429 eor v3.16b, v3.16b, v7.16b
430 eor v0.16b, v0.16b, v4.16b
431 eor v1.16b, v1.16b, v5.16b
432 eor v2.16b, v2.16b, v6.16b
433 st1 {v0.16b-v3.16b}, [x0], #64
439 adds w4, w4, #INTERLEAVE
443 ld1 {v1.16b}, [x1], #16
444 eor v0.16b, v1.16b, v4.16b
445 encrypt_block v0, w3, x2, x6, w7
446 eor v0.16b, v0.16b, v4.16b
447 st1 {v0.16b}, [x0], #16
450 next_tweak v4, v4, v7, v8
455 AES_ENDPROC(aes_xts_encrypt)
458 AES_ENTRY(aes_xts_decrypt)
460 cbz w7, .LxtsdecloopNx
463 enc_prepare w3, x5, x6
464 encrypt_block v4, w3, x5, x6, w7 /* first tweak */
465 dec_prepare w3, x2, x6
471 next_tweak v4, v4, v7, v8
474 subs w4, w4, #INTERLEAVE
477 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
478 next_tweak v5, v4, v7, v8
479 eor v0.16b, v0.16b, v4.16b
480 eor v1.16b, v1.16b, v5.16b
482 eor v0.16b, v0.16b, v4.16b
483 eor v1.16b, v1.16b, v5.16b
484 st1 {v0.16b-v1.16b}, [x0], #32
485 cbz w4, .LxtsdecoutNx
486 next_tweak v4, v5, v7, v8
492 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
493 next_tweak v5, v4, v7, v8
494 eor v0.16b, v0.16b, v4.16b
495 next_tweak v6, v5, v7, v8
496 eor v1.16b, v1.16b, v5.16b
497 eor v2.16b, v2.16b, v6.16b
498 next_tweak v7, v6, v7, v8
499 eor v3.16b, v3.16b, v7.16b
501 eor v3.16b, v3.16b, v7.16b
502 eor v0.16b, v0.16b, v4.16b
503 eor v1.16b, v1.16b, v5.16b
504 eor v2.16b, v2.16b, v6.16b
505 st1 {v0.16b-v3.16b}, [x0], #64
511 adds w4, w4, #INTERLEAVE
515 ld1 {v1.16b}, [x1], #16
516 eor v0.16b, v1.16b, v4.16b
517 decrypt_block v0, w3, x2, x6, w7
518 eor v0.16b, v0.16b, v4.16b
519 st1 {v0.16b}, [x0], #16
522 next_tweak v4, v4, v7, v8
527 AES_ENDPROC(aes_xts_decrypt)
530 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
531 * int blocks, u8 dg[], int enc_before, int enc_after)
533 AES_ENTRY(aes_mac_update)
534 ld1 {v0.16b}, [x4] /* get dg */
535 enc_prepare w2, x1, x7
540 ld1 {v1.16b}, [x0], #16 /* get next pt block */
541 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
544 csinv x5, x6, xzr, eq
548 encrypt_block v0, w2, x1, x7, w8
552 st1 {v0.16b}, [x4] /* return dg */
554 AES_ENDPROC(aes_mac_update)