2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 /* included by aes-ce.S and aes-neon.S */
17 * There are several ways to instantiate this code:
18 * - no interleave, all inline
19 * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
20 * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
21 * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
22 * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
24 * Macros imported by this code:
25 * - enc_prepare - setup NEON registers for encryption
26 * - dec_prepare - setup NEON registers for decryption
27 * - enc_switch_key - change to new key after having prepared for encryption
28 * - encrypt_block - encrypt a single block
29 * - decrypt block - decrypt a single block
30 * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
31 * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
32 * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
33 * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
36 #if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
37 #define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
38 #define FRAME_POP ldp x29, x30, [sp],#16
43 encrypt_block2x v0, v1, w3, x2, x6, w7
45 ENDPROC(aes_encrypt_block2x)
48 decrypt_block2x v0, v1, w3, x2, x6, w7
50 ENDPROC(aes_decrypt_block2x)
55 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
57 ENDPROC(aes_encrypt_block4x)
60 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
62 ENDPROC(aes_decrypt_block4x)
65 #error INTERLEAVE should equal 2 or 4
68 .macro do_encrypt_block2x
69 bl aes_encrypt_block2x
72 .macro do_decrypt_block2x
73 bl aes_decrypt_block2x
76 .macro do_encrypt_block4x
77 bl aes_encrypt_block4x
80 .macro do_decrypt_block4x
81 bl aes_decrypt_block4x
88 .macro do_encrypt_block2x
89 encrypt_block2x v0, v1, w3, x2, x6, w7
92 .macro do_decrypt_block2x
93 decrypt_block2x v0, v1, w3, x2, x6, w7
96 .macro do_encrypt_block4x
97 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
100 .macro do_decrypt_block4x
101 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
107 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
108 * int blocks, int first)
109 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
110 * int blocks, int first)
113 AES_ENTRY(aes_ecb_encrypt)
115 cbz w5, .LecbencloopNx
117 enc_prepare w3, x2, x5
121 subs w4, w4, #INTERLEAVE
124 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
126 st1 {v0.16b-v1.16b}, [x0], #32
128 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
130 st1 {v0.16b-v3.16b}, [x0], #64
134 adds w4, w4, #INTERLEAVE
138 ld1 {v0.16b}, [x1], #16 /* get next pt block */
139 encrypt_block v0, w3, x2, x5, w6
140 st1 {v0.16b}, [x0], #16
146 AES_ENDPROC(aes_ecb_encrypt)
149 AES_ENTRY(aes_ecb_decrypt)
151 cbz w5, .LecbdecloopNx
153 dec_prepare w3, x2, x5
157 subs w4, w4, #INTERLEAVE
160 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
162 st1 {v0.16b-v1.16b}, [x0], #32
164 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
166 st1 {v0.16b-v3.16b}, [x0], #64
170 adds w4, w4, #INTERLEAVE
174 ld1 {v0.16b}, [x1], #16 /* get next ct block */
175 decrypt_block v0, w3, x2, x5, w6
176 st1 {v0.16b}, [x0], #16
182 AES_ENDPROC(aes_ecb_decrypt)
186 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
187 * int blocks, u8 iv[], int first)
188 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
189 * int blocks, u8 iv[], int first)
192 AES_ENTRY(aes_cbc_encrypt)
195 ld1 {v0.16b}, [x5] /* get iv */
196 enc_prepare w3, x2, x5
199 ld1 {v1.16b}, [x1], #16 /* get next pt block */
200 eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
201 encrypt_block v0, w3, x2, x5, w6
202 st1 {v0.16b}, [x0], #16
206 AES_ENDPROC(aes_cbc_encrypt)
209 AES_ENTRY(aes_cbc_decrypt)
211 cbz w6, .LcbcdecloopNx
213 ld1 {v7.16b}, [x5] /* get iv */
214 dec_prepare w3, x2, x5
218 subs w4, w4, #INTERLEAVE
221 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
225 eor v0.16b, v0.16b, v7.16b
226 eor v1.16b, v1.16b, v2.16b
228 st1 {v0.16b-v1.16b}, [x0], #32
230 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
236 eor v0.16b, v0.16b, v7.16b
237 eor v1.16b, v1.16b, v4.16b
238 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
239 eor v2.16b, v2.16b, v5.16b
240 eor v3.16b, v3.16b, v6.16b
241 st1 {v0.16b-v3.16b}, [x0], #64
245 adds w4, w4, #INTERLEAVE
249 ld1 {v1.16b}, [x1], #16 /* get next ct block */
250 mov v0.16b, v1.16b /* ...and copy to v0 */
251 decrypt_block v0, w3, x2, x5, w6
252 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
253 mov v7.16b, v1.16b /* ct is next iv */
254 st1 {v0.16b}, [x0], #16
260 AES_ENDPROC(aes_cbc_decrypt)
264 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
265 * int blocks, u8 ctr[], int first)
268 AES_ENTRY(aes_ctr_encrypt)
270 cbnz w6, .Lctrfirst /* 1st time around? */
271 umov x5, v4.d[1] /* keep swabbed ctr in reg */
274 cmn w5, w4 /* 32 bit overflow? */
276 add x5, x5, #1 /* increment BE ctr */
282 enc_prepare w3, x2, x6
284 umov x5, v4.d[1] /* keep swabbed ctr in reg */
287 cmn w5, w4 /* 32 bit overflow? */
290 subs w4, w4, #INTERLEAVE
301 ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
303 eor v0.16b, v0.16b, v2.16b
304 eor v1.16b, v1.16b, v3.16b
305 st1 {v0.16b-v1.16b}, [x0], #32
307 ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
310 add v7.4s, v7.4s, v8.4s
318 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
320 eor v0.16b, v5.16b, v0.16b
321 ld1 {v5.16b}, [x1], #16 /* get 1 input block */
322 eor v1.16b, v6.16b, v1.16b
323 eor v2.16b, v7.16b, v2.16b
324 eor v3.16b, v5.16b, v3.16b
325 st1 {v0.16b-v3.16b}, [x0], #64
326 add x5, x5, #INTERLEAVE
339 adds w4, w4, #INTERLEAVE
344 encrypt_block v0, w3, x2, x6, w7
346 bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
347 ld1 {v3.16b}, [x1], #16
348 eor v3.16b, v0.16b, v3.16b
349 st1 {v3.16b}, [x0], #16
352 adds x5, x5, #1 /* increment BE ctr */
355 bcc .Lctrloop /* no overflow? */
356 umov x7, v4.d[0] /* load upper word of ctr */
357 rev x7, x7 /* ... to handle the carry */
364 eor v3.8b, v0.8b, v3.8b
369 AES_ENDPROC(aes_ctr_encrypt)
374 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
375 * int blocks, u8 const rk2[], u8 iv[], int first)
376 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
377 * int blocks, u8 const rk2[], u8 iv[], int first)
380 .macro next_tweak, out, in, const, tmp
381 sshr \tmp\().2d, \in\().2d, #63
382 and \tmp\().16b, \tmp\().16b, \const\().16b
383 add \out\().2d, \in\().2d, \in\().2d
384 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
385 eor \out\().16b, \out\().16b, \tmp\().16b
391 AES_ENTRY(aes_xts_encrypt)
393 cbz w7, .LxtsencloopNx
396 enc_prepare w3, x5, x6
397 encrypt_block v4, w3, x5, x6, w7 /* first tweak */
398 enc_switch_key w3, x2, x6
404 next_tweak v4, v4, v7, v8
407 subs w4, w4, #INTERLEAVE
410 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
411 next_tweak v5, v4, v7, v8
412 eor v0.16b, v0.16b, v4.16b
413 eor v1.16b, v1.16b, v5.16b
415 eor v0.16b, v0.16b, v4.16b
416 eor v1.16b, v1.16b, v5.16b
417 st1 {v0.16b-v1.16b}, [x0], #32
418 cbz w4, .LxtsencoutNx
419 next_tweak v4, v5, v7, v8
425 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
426 next_tweak v5, v4, v7, v8
427 eor v0.16b, v0.16b, v4.16b
428 next_tweak v6, v5, v7, v8
429 eor v1.16b, v1.16b, v5.16b
430 eor v2.16b, v2.16b, v6.16b
431 next_tweak v7, v6, v7, v8
432 eor v3.16b, v3.16b, v7.16b
434 eor v3.16b, v3.16b, v7.16b
435 eor v0.16b, v0.16b, v4.16b
436 eor v1.16b, v1.16b, v5.16b
437 eor v2.16b, v2.16b, v6.16b
438 st1 {v0.16b-v3.16b}, [x0], #64
444 adds w4, w4, #INTERLEAVE
448 ld1 {v1.16b}, [x1], #16
449 eor v0.16b, v1.16b, v4.16b
450 encrypt_block v0, w3, x2, x6, w7
451 eor v0.16b, v0.16b, v4.16b
452 st1 {v0.16b}, [x0], #16
455 next_tweak v4, v4, v7, v8
460 AES_ENDPROC(aes_xts_encrypt)
463 AES_ENTRY(aes_xts_decrypt)
465 cbz w7, .LxtsdecloopNx
468 enc_prepare w3, x5, x6
469 encrypt_block v4, w3, x5, x6, w7 /* first tweak */
470 dec_prepare w3, x2, x6
476 next_tweak v4, v4, v7, v8
479 subs w4, w4, #INTERLEAVE
482 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
483 next_tweak v5, v4, v7, v8
484 eor v0.16b, v0.16b, v4.16b
485 eor v1.16b, v1.16b, v5.16b
487 eor v0.16b, v0.16b, v4.16b
488 eor v1.16b, v1.16b, v5.16b
489 st1 {v0.16b-v1.16b}, [x0], #32
490 cbz w4, .LxtsdecoutNx
491 next_tweak v4, v5, v7, v8
497 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
498 next_tweak v5, v4, v7, v8
499 eor v0.16b, v0.16b, v4.16b
500 next_tweak v6, v5, v7, v8
501 eor v1.16b, v1.16b, v5.16b
502 eor v2.16b, v2.16b, v6.16b
503 next_tweak v7, v6, v7, v8
504 eor v3.16b, v3.16b, v7.16b
506 eor v3.16b, v3.16b, v7.16b
507 eor v0.16b, v0.16b, v4.16b
508 eor v1.16b, v1.16b, v5.16b
509 eor v2.16b, v2.16b, v6.16b
510 st1 {v0.16b-v3.16b}, [x0], #64
516 adds w4, w4, #INTERLEAVE
520 ld1 {v1.16b}, [x1], #16
521 eor v0.16b, v1.16b, v4.16b
522 decrypt_block v0, w3, x2, x6, w7
523 eor v0.16b, v0.16b, v4.16b
524 st1 {v0.16b}, [x0], #16
527 next_tweak v4, v4, v7, v8
532 AES_ENDPROC(aes_xts_decrypt)