1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
8 /* included by aes-ce.S and aes-neon.S */
14 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
16 ENDPROC(aes_encrypt_block4x)
19 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
21 ENDPROC(aes_decrypt_block4x)
24 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
26 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
30 AES_ENTRY(aes_ecb_encrypt)
31 stp x29, x30, [sp, #-16]!
34 enc_prepare w3, x2, x5
39 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
40 bl aes_encrypt_block4x
41 st1 {v0.16b-v3.16b}, [x0], #64
47 ld1 {v0.16b}, [x1], #16 /* get next pt block */
48 encrypt_block v0, w3, x2, x5, w6
49 st1 {v0.16b}, [x0], #16
53 ldp x29, x30, [sp], #16
55 AES_ENDPROC(aes_ecb_encrypt)
58 AES_ENTRY(aes_ecb_decrypt)
59 stp x29, x30, [sp, #-16]!
62 dec_prepare w3, x2, x5
67 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
68 bl aes_decrypt_block4x
69 st1 {v0.16b-v3.16b}, [x0], #64
75 ld1 {v0.16b}, [x1], #16 /* get next ct block */
76 decrypt_block v0, w3, x2, x5, w6
77 st1 {v0.16b}, [x0], #16
81 ldp x29, x30, [sp], #16
83 AES_ENDPROC(aes_ecb_decrypt)
87 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
88 * int blocks, u8 iv[])
89 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
90 * int blocks, u8 iv[])
93 AES_ENTRY(aes_cbc_encrypt)
94 ld1 {v4.16b}, [x5] /* get iv */
95 enc_prepare w3, x2, x6
100 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
101 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
102 encrypt_block v0, w3, x2, x6, w7
103 eor v1.16b, v1.16b, v0.16b
104 encrypt_block v1, w3, x2, x6, w7
105 eor v2.16b, v2.16b, v1.16b
106 encrypt_block v2, w3, x2, x6, w7
107 eor v3.16b, v3.16b, v2.16b
108 encrypt_block v3, w3, x2, x6, w7
109 st1 {v0.16b-v3.16b}, [x0], #64
116 ld1 {v0.16b}, [x1], #16 /* get next pt block */
117 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
118 encrypt_block v4, w3, x2, x6, w7
119 st1 {v4.16b}, [x0], #16
123 st1 {v4.16b}, [x5] /* return iv */
125 AES_ENDPROC(aes_cbc_encrypt)
128 AES_ENTRY(aes_cbc_decrypt)
129 stp x29, x30, [sp, #-16]!
132 ld1 {v7.16b}, [x5] /* get iv */
133 dec_prepare w3, x2, x6
138 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
142 bl aes_decrypt_block4x
144 eor v0.16b, v0.16b, v7.16b
145 eor v1.16b, v1.16b, v4.16b
146 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
147 eor v2.16b, v2.16b, v5.16b
148 eor v3.16b, v3.16b, v6.16b
149 st1 {v0.16b-v3.16b}, [x0], #64
155 ld1 {v1.16b}, [x1], #16 /* get next ct block */
156 mov v0.16b, v1.16b /* ...and copy to v0 */
157 decrypt_block v0, w3, x2, x6, w7
158 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
159 mov v7.16b, v1.16b /* ct is next iv */
160 st1 {v0.16b}, [x0], #16
164 st1 {v7.16b}, [x5] /* return iv */
165 ldp x29, x30, [sp], #16
167 AES_ENDPROC(aes_cbc_decrypt)
171 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
172 * int rounds, int bytes, u8 const iv[])
173 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
174 * int rounds, int bytes, u8 const iv[])
177 AES_ENTRY(aes_cbc_cts_encrypt)
178 adr_l x8, .Lcts_permute_table
186 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
189 ld1 {v5.16b}, [x5] /* get iv */
190 enc_prepare w3, x2, x6
192 eor v0.16b, v0.16b, v5.16b /* xor with iv */
193 tbl v1.16b, {v1.16b}, v4.16b
194 encrypt_block v0, w3, x2, x6, w7
196 eor v1.16b, v1.16b, v0.16b
197 tbl v0.16b, {v0.16b}, v3.16b
198 encrypt_block v1, w3, x2, x6, w7
201 st1 {v0.16b}, [x4] /* overlapping stores */
204 AES_ENDPROC(aes_cbc_cts_encrypt)
206 AES_ENTRY(aes_cbc_cts_decrypt)
207 adr_l x8, .Lcts_permute_table
215 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
218 ld1 {v5.16b}, [x5] /* get iv */
219 dec_prepare w3, x2, x6
221 tbl v2.16b, {v1.16b}, v4.16b
222 decrypt_block v0, w3, x2, x6, w7
223 eor v2.16b, v2.16b, v0.16b
225 tbx v0.16b, {v1.16b}, v4.16b
226 tbl v2.16b, {v2.16b}, v3.16b
227 decrypt_block v0, w3, x2, x6, w7
228 eor v0.16b, v0.16b, v5.16b /* xor with iv */
231 st1 {v2.16b}, [x4] /* overlapping stores */
234 AES_ENDPROC(aes_cbc_cts_decrypt)
236 .section ".rodata", "a"
239 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
240 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
241 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
242 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
243 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
244 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
249 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
250 * int blocks, u8 ctr[])
253 AES_ENTRY(aes_ctr_encrypt)
254 stp x29, x30, [sp, #-16]!
257 enc_prepare w3, x2, x6
260 umov x6, v4.d[1] /* keep swabbed ctr in reg */
262 cmn w6, w4 /* 32 bit overflow? */
280 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
281 bl aes_encrypt_block4x
282 eor v0.16b, v5.16b, v0.16b
283 ld1 {v5.16b}, [x1], #16 /* get 1 input block */
284 eor v1.16b, v6.16b, v1.16b
285 eor v2.16b, v7.16b, v2.16b
286 eor v3.16b, v5.16b, v3.16b
287 st1 {v0.16b-v3.16b}, [x0], #64
298 encrypt_block v0, w3, x2, x8, w7
300 adds x6, x6, #1 /* increment BE ctr */
303 bcs .Lctrcarry /* overflow? */
307 bmi .Lctrtailblock /* blocks <0 means tail block */
308 ld1 {v3.16b}, [x1], #16
309 eor v3.16b, v0.16b, v3.16b
310 st1 {v3.16b}, [x0], #16
314 st1 {v4.16b}, [x5] /* return next CTR value */
315 ldp x29, x30, [sp], #16
323 umov x7, v4.d[0] /* load upper word of ctr */
324 rev x7, x7 /* ... to handle the carry */
329 AES_ENDPROC(aes_ctr_encrypt)
333 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
334 * int blocks, u8 const rk2[], u8 iv[], int first)
335 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
336 * int blocks, u8 const rk2[], u8 iv[], int first)
339 .macro next_tweak, out, in, tmp
340 sshr \tmp\().2d, \in\().2d, #63
341 and \tmp\().16b, \tmp\().16b, xtsmask.16b
342 add \out\().2d, \in\().2d, \in\().2d
343 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
344 eor \out\().16b, \out\().16b, \tmp\().16b
347 .macro xts_load_mask, tmp
348 movi xtsmask.2s, #0x1
349 movi \tmp\().2s, #0x87
350 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
353 AES_ENTRY(aes_xts_encrypt)
354 stp x29, x30, [sp, #-16]!
359 cbz w7, .Lxtsencnotfirst
361 enc_prepare w3, x5, x8
362 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
363 enc_switch_key w3, x2, x8
367 enc_prepare w3, x2, x8
369 next_tweak v4, v4, v8
373 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
374 next_tweak v5, v4, v8
375 eor v0.16b, v0.16b, v4.16b
376 next_tweak v6, v5, v8
377 eor v1.16b, v1.16b, v5.16b
378 eor v2.16b, v2.16b, v6.16b
379 next_tweak v7, v6, v8
380 eor v3.16b, v3.16b, v7.16b
381 bl aes_encrypt_block4x
382 eor v3.16b, v3.16b, v7.16b
383 eor v0.16b, v0.16b, v4.16b
384 eor v1.16b, v1.16b, v5.16b
385 eor v2.16b, v2.16b, v6.16b
386 st1 {v0.16b-v3.16b}, [x0], #64
395 ld1 {v1.16b}, [x1], #16
396 eor v0.16b, v1.16b, v4.16b
397 encrypt_block v0, w3, x2, x8, w7
398 eor v0.16b, v0.16b, v4.16b
399 st1 {v0.16b}, [x0], #16
402 next_tweak v4, v4, v8
406 ldp x29, x30, [sp], #16
408 AES_ENDPROC(aes_xts_encrypt)
411 AES_ENTRY(aes_xts_decrypt)
412 stp x29, x30, [sp, #-16]!
417 cbz w7, .Lxtsdecnotfirst
419 enc_prepare w3, x5, x8
420 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
421 dec_prepare w3, x2, x8
425 dec_prepare w3, x2, x8
427 next_tweak v4, v4, v8
431 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
432 next_tweak v5, v4, v8
433 eor v0.16b, v0.16b, v4.16b
434 next_tweak v6, v5, v8
435 eor v1.16b, v1.16b, v5.16b
436 eor v2.16b, v2.16b, v6.16b
437 next_tweak v7, v6, v8
438 eor v3.16b, v3.16b, v7.16b
439 bl aes_decrypt_block4x
440 eor v3.16b, v3.16b, v7.16b
441 eor v0.16b, v0.16b, v4.16b
442 eor v1.16b, v1.16b, v5.16b
443 eor v2.16b, v2.16b, v6.16b
444 st1 {v0.16b-v3.16b}, [x0], #64
453 ld1 {v1.16b}, [x1], #16
454 eor v0.16b, v1.16b, v4.16b
455 decrypt_block v0, w3, x2, x8, w7
456 eor v0.16b, v0.16b, v4.16b
457 st1 {v0.16b}, [x0], #16
460 next_tweak v4, v4, v8
464 ldp x29, x30, [sp], #16
466 AES_ENDPROC(aes_xts_decrypt)
469 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
470 * int blocks, u8 dg[], int enc_before, int enc_after)
472 AES_ENTRY(aes_mac_update)
482 ld1 {v0.16b}, [x23] /* get dg */
483 enc_prepare w2, x1, x7
486 encrypt_block v0, w2, x1, x7, w8
491 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
492 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
493 encrypt_block v0, w21, x20, x7, w8
494 eor v0.16b, v0.16b, v2.16b
495 encrypt_block v0, w21, x20, x7, w8
496 eor v0.16b, v0.16b, v3.16b
497 encrypt_block v0, w21, x20, x7, w8
498 eor v0.16b, v0.16b, v4.16b
500 csinv x5, x24, xzr, eq
502 encrypt_block v0, w21, x20, x7, w8
503 st1 {v0.16b}, [x23] /* return dg */
504 cond_yield_neon .Lmacrestart
510 ld1 {v1.16b}, [x19], #16 /* get next pt block */
511 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
514 csinv x5, x24, xzr, eq
518 encrypt_block v0, w21, x20, x7, w8
522 st1 {v0.16b}, [x23] /* return dg */
527 ld1 {v0.16b}, [x23] /* get dg */
528 enc_prepare w21, x20, x0
530 AES_ENDPROC(aes_mac_update)