1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
5 * Copyright (C) 2013 - 2017 Linaro Ltd.
6 * Copyright (C) 2024 Google LLC
8 * Author: Ard Biesheuvel <ardb@kernel.org>
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
17 .macro load_round_keys, rk, nr, tmp
19 add \tmp, \rk, w\tmp, sxtw #4
20 ld1 {v10.4s-v13.4s}, [\rk]
21 ld1 {v14.4s-v17.4s}, [\tmp], #64
22 ld1 {v18.4s-v21.4s}, [\tmp], #64
23 ld1 {v3.4s-v5.4s}, [\tmp]
26 .macro dround, va, vb, vk
27 aese \va\().16b, \vk\().16b
28 aesmc \va\().16b, \va\().16b
29 aese \vb\().16b, \vk\().16b
30 aesmc \vb\().16b, \vb\().16b
33 .macro aes_encrypt, va, vb, nr
40 .L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3
43 aese \va\().16b, v4.16b
44 aese \vb\().16b, v4.16b
47 .macro aes_ccm_do_crypt,enc
48 load_round_keys x3, w4, x10
50 ld1 {v0.16b}, [x5] /* load mac */
51 cbz x2, ce_aes_ccm_final
52 ldr x8, [x6, #8] /* load lower ctr */
53 CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
55 ld1 {v1.8b}, [x6] /* load upper ctr */
59 ins v1.d[1], x9 /* no carry in lower ctr */
61 aes_encrypt v0, v1, w4
64 bmi ce_aes_ccm_crypt_tail
65 ld1 {v2.16b}, [x1], #16 /* load next input block */
67 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
68 eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
70 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
71 eor v6.16b, v2.16b, v5.16b /* final round enc */
73 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
74 st1 {v6.16b}, [x0], #16 /* write output block */
77 str x8, [x6, #8] /* store lsb end of ctr (BE) */
78 cbnz x7, ce_aes_ccm_final
79 st1 {v0.16b}, [x5] /* store mac */
83 SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
84 eor v0.16b, v0.16b, v5.16b /* final round mac */
85 eor v1.16b, v1.16b, v5.16b /* final round enc */
87 add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
88 add x0, x0, w2, sxtw /* rewind the output pointer */
90 adr_l x8, .Lpermute /* load permute vectors */
93 ld1 {v7.16b-v8.16b}, [x9]
96 ld1 {v2.16b}, [x1] /* load a full block of input */
97 tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
98 eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */
99 bif v2.16b, v7.16b, v22.16b /* select plaintext */
100 tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
101 tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */
102 eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */
104 st1 {v7.16b}, [x0] /* store output block */
107 SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
108 ld1 {v1.16b}, [x7] /* load 1st ctriv */
110 aes_encrypt v0, v1, w4
112 /* final round key cancels out */
113 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
114 0: st1 {v0.16b}, [x5] /* store result */
116 SYM_FUNC_END(ce_aes_ccm_crypt_tail)
119 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
120 * u8 const rk[], u32 rounds, u8 mac[],
121 * u8 ctr[], u8 const final_iv[]);
122 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
123 * u8 const rk[], u32 rounds, u8 mac[],
124 * u8 ctr[], u8 const final_iv[]);
126 SYM_FUNC_START(ce_aes_ccm_encrypt)
129 SYM_FUNC_END(ce_aes_ccm_encrypt)
131 SYM_FUNC_START(ce_aes_ccm_decrypt)
134 SYM_FUNC_END(ce_aes_ccm_decrypt)
136 .section ".rodata", "a"
140 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
141 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf