1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
8 /* included by aes-ce.S and aes-neon.S */
25 SYM_FUNC_START_LOCAL(aes_encrypt_block4x)
26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
28 SYM_FUNC_END(aes_encrypt_block4x)
30 SYM_FUNC_START_LOCAL(aes_decrypt_block4x)
31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
33 SYM_FUNC_END(aes_decrypt_block4x)
36 SYM_FUNC_START_LOCAL(aes_encrypt_block5x)
37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
39 SYM_FUNC_END(aes_encrypt_block5x)
41 SYM_FUNC_START_LOCAL(aes_decrypt_block5x)
42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
44 SYM_FUNC_END(aes_decrypt_block5x)
48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
50 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
54 AES_ENTRY(aes_ecb_encrypt)
55 stp x29, x30, [sp, #-16]!
58 enc_prepare w3, x2, x5
61 subs w4, w4, #MAX_STRIDE
63 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
64 ST4( bl aes_encrypt_block4x )
65 ST5( ld1 {v4.16b}, [x1], #16 )
66 ST5( bl aes_encrypt_block5x )
67 st1 {v0.16b-v3.16b}, [x0], #64
68 ST5( st1 {v4.16b}, [x0], #16 )
71 adds w4, w4, #MAX_STRIDE
74 ld1 {v0.16b}, [x1], #16 /* get next pt block */
75 encrypt_block v0, w3, x2, x5, w6
76 st1 {v0.16b}, [x0], #16
80 ldp x29, x30, [sp], #16
82 AES_ENDPROC(aes_ecb_encrypt)
85 AES_ENTRY(aes_ecb_decrypt)
86 stp x29, x30, [sp, #-16]!
89 dec_prepare w3, x2, x5
92 subs w4, w4, #MAX_STRIDE
94 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
95 ST4( bl aes_decrypt_block4x )
96 ST5( ld1 {v4.16b}, [x1], #16 )
97 ST5( bl aes_decrypt_block5x )
98 st1 {v0.16b-v3.16b}, [x0], #64
99 ST5( st1 {v4.16b}, [x0], #16 )
102 adds w4, w4, #MAX_STRIDE
105 ld1 {v0.16b}, [x1], #16 /* get next ct block */
106 decrypt_block v0, w3, x2, x5, w6
107 st1 {v0.16b}, [x0], #16
111 ldp x29, x30, [sp], #16
113 AES_ENDPROC(aes_ecb_decrypt)
117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
118 * int blocks, u8 iv[])
119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
120 * int blocks, u8 iv[])
121 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
122 * int rounds, int blocks, u8 iv[],
124 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
125 * int rounds, int blocks, u8 iv[],
129 AES_ENTRY(aes_essiv_cbc_encrypt)
130 ld1 {v4.16b}, [x5] /* get iv */
132 mov w8, #14 /* AES-256: 14 rounds */
133 enc_prepare w8, x6, x7
134 encrypt_block v4, w8, x6, x7, w9
135 enc_switch_key w3, x2, x6
138 AES_ENTRY(aes_cbc_encrypt)
139 ld1 {v4.16b}, [x5] /* get iv */
140 enc_prepare w3, x2, x6
145 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
146 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
147 encrypt_block v0, w3, x2, x6, w7
148 eor v1.16b, v1.16b, v0.16b
149 encrypt_block v1, w3, x2, x6, w7
150 eor v2.16b, v2.16b, v1.16b
151 encrypt_block v2, w3, x2, x6, w7
152 eor v3.16b, v3.16b, v2.16b
153 encrypt_block v3, w3, x2, x6, w7
154 st1 {v0.16b-v3.16b}, [x0], #64
161 ld1 {v0.16b}, [x1], #16 /* get next pt block */
162 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
163 encrypt_block v4, w3, x2, x6, w7
164 st1 {v4.16b}, [x0], #16
168 st1 {v4.16b}, [x5] /* return iv */
170 AES_ENDPROC(aes_cbc_encrypt)
171 AES_ENDPROC(aes_essiv_cbc_encrypt)
173 AES_ENTRY(aes_essiv_cbc_decrypt)
174 stp x29, x30, [sp, #-16]!
177 ld1 {cbciv.16b}, [x5] /* get iv */
179 mov w8, #14 /* AES-256: 14 rounds */
180 enc_prepare w8, x6, x7
181 encrypt_block cbciv, w8, x6, x7, w9
184 AES_ENTRY(aes_cbc_decrypt)
185 stp x29, x30, [sp, #-16]!
188 ld1 {cbciv.16b}, [x5] /* get iv */
190 dec_prepare w3, x2, x6
193 subs w4, w4, #MAX_STRIDE
195 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
197 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
201 bl aes_decrypt_block5x
203 eor v0.16b, v0.16b, cbciv.16b
204 eor v1.16b, v1.16b, v5.16b
205 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
206 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
207 eor v2.16b, v2.16b, v6.16b
208 eor v3.16b, v3.16b, v7.16b
209 eor v4.16b, v4.16b, v5.16b
214 bl aes_decrypt_block4x
216 eor v0.16b, v0.16b, cbciv.16b
217 eor v1.16b, v1.16b, v4.16b
218 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
219 eor v2.16b, v2.16b, v5.16b
220 eor v3.16b, v3.16b, v6.16b
222 st1 {v0.16b-v3.16b}, [x0], #64
223 ST5( st1 {v4.16b}, [x0], #16 )
226 adds w4, w4, #MAX_STRIDE
229 ld1 {v1.16b}, [x1], #16 /* get next ct block */
230 mov v0.16b, v1.16b /* ...and copy to v0 */
231 decrypt_block v0, w3, x2, x6, w7
232 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
233 mov cbciv.16b, v1.16b /* ct is next iv */
234 st1 {v0.16b}, [x0], #16
238 st1 {cbciv.16b}, [x5] /* return iv */
239 ldp x29, x30, [sp], #16
241 AES_ENDPROC(aes_cbc_decrypt)
242 AES_ENDPROC(aes_essiv_cbc_decrypt)
246 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
247 * int rounds, int bytes, u8 const iv[])
248 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
249 * int rounds, int bytes, u8 const iv[])
252 AES_ENTRY(aes_cbc_cts_encrypt)
253 adr_l x8, .Lcts_permute_table
261 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
264 ld1 {v5.16b}, [x5] /* get iv */
265 enc_prepare w3, x2, x6
267 eor v0.16b, v0.16b, v5.16b /* xor with iv */
268 tbl v1.16b, {v1.16b}, v4.16b
269 encrypt_block v0, w3, x2, x6, w7
271 eor v1.16b, v1.16b, v0.16b
272 tbl v0.16b, {v0.16b}, v3.16b
273 encrypt_block v1, w3, x2, x6, w7
276 st1 {v0.16b}, [x4] /* overlapping stores */
279 AES_ENDPROC(aes_cbc_cts_encrypt)
281 AES_ENTRY(aes_cbc_cts_decrypt)
282 adr_l x8, .Lcts_permute_table
290 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
293 ld1 {v5.16b}, [x5] /* get iv */
294 dec_prepare w3, x2, x6
296 decrypt_block v0, w3, x2, x6, w7
297 tbl v2.16b, {v0.16b}, v3.16b
298 eor v2.16b, v2.16b, v1.16b
300 tbx v0.16b, {v1.16b}, v4.16b
301 decrypt_block v0, w3, x2, x6, w7
302 eor v0.16b, v0.16b, v5.16b /* xor with iv */
305 st1 {v2.16b}, [x4] /* overlapping stores */
308 AES_ENDPROC(aes_cbc_cts_decrypt)
310 .section ".rodata", "a"
313 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
314 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
315 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
316 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
317 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
318 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
323 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
324 * int blocks, u8 ctr[])
327 AES_ENTRY(aes_ctr_encrypt)
328 stp x29, x30, [sp, #-16]!
331 enc_prepare w3, x2, x6
334 umov x6, vctr.d[1] /* keep swabbed ctr in reg */
336 cmn w6, w4 /* 32 bit overflow? */
339 subs w4, w4, #MAX_STRIDE
351 ST5( mov v4.16b, vctr.16b )
354 ST5( add w10, w6, #4 )
358 ST5( mov v4.s[3], w10 )
359 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
360 ST4( bl aes_encrypt_block4x )
361 ST5( bl aes_encrypt_block5x )
362 eor v0.16b, v5.16b, v0.16b
363 ST4( ld1 {v5.16b}, [x1], #16 )
364 eor v1.16b, v6.16b, v1.16b
365 ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
366 eor v2.16b, v7.16b, v2.16b
367 eor v3.16b, v5.16b, v3.16b
368 ST5( eor v4.16b, v6.16b, v4.16b )
369 st1 {v0.16b-v3.16b}, [x0], #64
370 ST5( st1 {v4.16b}, [x0], #16 )
371 add x6, x6, #MAX_STRIDE
377 adds w4, w4, #MAX_STRIDE
381 encrypt_block v0, w3, x2, x8, w7
383 adds x6, x6, #1 /* increment BE ctr */
386 bcs .Lctrcarry /* overflow? */
390 bmi .Lctrtailblock /* blocks <0 means tail block */
391 ld1 {v3.16b}, [x1], #16
392 eor v3.16b, v0.16b, v3.16b
393 st1 {v3.16b}, [x0], #16
397 st1 {vctr.16b}, [x5] /* return next CTR value */
398 ldp x29, x30, [sp], #16
406 umov x7, vctr.d[0] /* load upper word of ctr */
407 rev x7, x7 /* ... to handle the carry */
412 AES_ENDPROC(aes_ctr_encrypt)
416 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
417 * int bytes, u8 const rk2[], u8 iv[], int first)
418 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
419 * int bytes, u8 const rk2[], u8 iv[], int first)
422 .macro next_tweak, out, in, tmp
423 sshr \tmp\().2d, \in\().2d, #63
424 and \tmp\().16b, \tmp\().16b, xtsmask.16b
425 add \out\().2d, \in\().2d, \in\().2d
426 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
427 eor \out\().16b, \out\().16b, \tmp\().16b
430 .macro xts_load_mask, tmp
431 movi xtsmask.2s, #0x1
432 movi \tmp\().2s, #0x87
433 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
436 AES_ENTRY(aes_xts_encrypt)
437 stp x29, x30, [sp, #-16]!
442 cbz w7, .Lxtsencnotfirst
444 enc_prepare w3, x5, x8
445 xts_cts_skip_tw w7, .LxtsencNx
446 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
447 enc_switch_key w3, x2, x8
451 enc_prepare w3, x2, x8
453 next_tweak v4, v4, v8
457 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
458 next_tweak v5, v4, v8
459 eor v0.16b, v0.16b, v4.16b
460 next_tweak v6, v5, v8
461 eor v1.16b, v1.16b, v5.16b
462 eor v2.16b, v2.16b, v6.16b
463 next_tweak v7, v6, v8
464 eor v3.16b, v3.16b, v7.16b
465 bl aes_encrypt_block4x
466 eor v3.16b, v3.16b, v7.16b
467 eor v0.16b, v0.16b, v4.16b
468 eor v1.16b, v1.16b, v5.16b
469 eor v2.16b, v2.16b, v6.16b
470 st1 {v0.16b-v3.16b}, [x0], #64
481 ld1 {v0.16b}, [x1], #16
483 eor v0.16b, v0.16b, v4.16b
484 encrypt_block v0, w3, x2, x8, w7
485 eor v0.16b, v0.16b, v4.16b
488 next_tweak v4, v4, v8
490 st1 {v0.16b}, [x0], #16
496 ldp x29, x30, [sp], #16
503 adr_l x8, .Lcts_permute_table
505 add x1, x1, w4, sxtw /* rewind input pointer */
506 add w4, w4, #16 /* # bytes in final block */
510 add x4, x0, x4 /* output address of final block */
512 ld1 {v1.16b}, [x1] /* load final block */
516 tbl v2.16b, {v0.16b}, v2.16b
517 tbx v0.16b, {v1.16b}, v3.16b
518 st1 {v2.16b}, [x4] /* overlapping stores */
521 AES_ENDPROC(aes_xts_encrypt)
523 AES_ENTRY(aes_xts_decrypt)
524 stp x29, x30, [sp, #-16]!
527 /* subtract 16 bytes if we are doing CTS */
534 xts_cts_skip_tw w7, .Lxtsdecskiptw
535 cbz w7, .Lxtsdecnotfirst
537 enc_prepare w3, x5, x8
538 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
540 dec_prepare w3, x2, x8
544 dec_prepare w3, x2, x8
546 next_tweak v4, v4, v8
550 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
551 next_tweak v5, v4, v8
552 eor v0.16b, v0.16b, v4.16b
553 next_tweak v6, v5, v8
554 eor v1.16b, v1.16b, v5.16b
555 eor v2.16b, v2.16b, v6.16b
556 next_tweak v7, v6, v8
557 eor v3.16b, v3.16b, v7.16b
558 bl aes_decrypt_block4x
559 eor v3.16b, v3.16b, v7.16b
560 eor v0.16b, v0.16b, v4.16b
561 eor v1.16b, v1.16b, v5.16b
562 eor v2.16b, v2.16b, v6.16b
563 st1 {v0.16b-v3.16b}, [x0], #64
573 ld1 {v0.16b}, [x1], #16
576 eor v0.16b, v0.16b, v4.16b
577 decrypt_block v0, w3, x2, x8, w7
578 eor v0.16b, v0.16b, v4.16b
579 st1 {v0.16b}, [x0], #16
582 next_tweak v4, v4, v8
586 ldp x29, x30, [sp], #16
590 adr_l x8, .Lcts_permute_table
592 add x1, x1, w4, sxtw /* rewind input pointer */
593 add w4, w4, #16 /* # bytes in final block */
597 add x4, x0, x4 /* output address of final block */
599 next_tweak v5, v4, v8
601 ld1 {v1.16b}, [x1] /* load final block */
605 eor v0.16b, v0.16b, v5.16b
606 decrypt_block v0, w3, x2, x8, w7
607 eor v0.16b, v0.16b, v5.16b
609 tbl v2.16b, {v0.16b}, v2.16b
610 tbx v0.16b, {v1.16b}, v3.16b
612 st1 {v2.16b}, [x4] /* overlapping stores */
615 AES_ENDPROC(aes_xts_decrypt)
618 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
619 * int blocks, u8 dg[], int enc_before, int enc_after)
621 AES_ENTRY(aes_mac_update)
631 ld1 {v0.16b}, [x23] /* get dg */
632 enc_prepare w2, x1, x7
635 encrypt_block v0, w2, x1, x7, w8
640 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
641 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
642 encrypt_block v0, w21, x20, x7, w8
643 eor v0.16b, v0.16b, v2.16b
644 encrypt_block v0, w21, x20, x7, w8
645 eor v0.16b, v0.16b, v3.16b
646 encrypt_block v0, w21, x20, x7, w8
647 eor v0.16b, v0.16b, v4.16b
649 csinv x5, x24, xzr, eq
651 encrypt_block v0, w21, x20, x7, w8
652 st1 {v0.16b}, [x23] /* return dg */
653 cond_yield_neon .Lmacrestart
659 ld1 {v1.16b}, [x19], #16 /* get next pt block */
660 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
663 csinv x5, x24, xzr, eq
667 encrypt_block v0, w21, x20, x7, w8
671 st1 {v0.16b}, [x23] /* return dg */
676 ld1 {v0.16b}, [x23] /* get dg */
677 enc_prepare w21, x20, x0
679 AES_ENDPROC(aes_mac_update)