1 // SPDX-License-Identifier: GPL-2.0
3 * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
4 * including ChaCha20 (RFC7539)
6 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
7 * Copyright (C) 2015 Martin Willi
10 #include <crypto/algapi.h>
11 #include <crypto/internal/chacha.h>
12 #include <crypto/internal/simd.h>
13 #include <crypto/internal/skcipher.h>
14 #include <linux/jump_label.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
18 #include <asm/cputype.h>
19 #include <asm/hwcap.h>
23 asmlinkage
void chacha_block_xor_neon(const u32
*state
, u8
*dst
, const u8
*src
,
25 asmlinkage
void chacha_4block_xor_neon(const u32
*state
, u8
*dst
, const u8
*src
,
26 int nrounds
, unsigned int nbytes
);
27 asmlinkage
void hchacha_block_arm(const u32
*state
, u32
*out
, int nrounds
);
28 asmlinkage
void hchacha_block_neon(const u32
*state
, u32
*out
, int nrounds
);
30 asmlinkage
void chacha_doarm(u8
*dst
, const u8
*src
, unsigned int bytes
,
31 const u32
*state
, int nrounds
);
33 static __ro_after_init
DEFINE_STATIC_KEY_FALSE(use_neon
);
35 static inline bool neon_usable(void)
37 return static_branch_likely(&use_neon
) && crypto_simd_usable();
40 static void chacha_doneon(u32
*state
, u8
*dst
, const u8
*src
,
41 unsigned int bytes
, int nrounds
)
43 u8 buf
[CHACHA_BLOCK_SIZE
];
45 while (bytes
> CHACHA_BLOCK_SIZE
) {
46 unsigned int l
= min(bytes
, CHACHA_BLOCK_SIZE
* 4U);
48 chacha_4block_xor_neon(state
, dst
, src
, nrounds
, l
);
52 state
[12] += DIV_ROUND_UP(l
, CHACHA_BLOCK_SIZE
);
58 if (bytes
!= CHACHA_BLOCK_SIZE
)
59 s
= d
= memcpy(buf
, src
, bytes
);
60 chacha_block_xor_neon(state
, d
, s
, nrounds
);
62 memcpy(dst
, buf
, bytes
);
67 void hchacha_block_arch(const u32
*state
, u32
*stream
, int nrounds
)
69 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) || !neon_usable()) {
70 hchacha_block_arm(state
, stream
, nrounds
);
73 hchacha_block_neon(state
, stream
, nrounds
);
77 EXPORT_SYMBOL(hchacha_block_arch
);
79 void chacha_init_arch(u32
*state
, const u32
*key
, const u8
*iv
)
81 chacha_init_generic(state
, key
, iv
);
83 EXPORT_SYMBOL(chacha_init_arch
);
85 void chacha_crypt_arch(u32
*state
, u8
*dst
, const u8
*src
, unsigned int bytes
,
88 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) || !neon_usable() ||
89 bytes
<= CHACHA_BLOCK_SIZE
) {
90 chacha_doarm(dst
, src
, bytes
, state
, nrounds
);
91 state
[12] += DIV_ROUND_UP(bytes
, CHACHA_BLOCK_SIZE
);
96 unsigned int todo
= min_t(unsigned int, bytes
, SZ_4K
);
99 chacha_doneon(state
, dst
, src
, todo
, nrounds
);
107 EXPORT_SYMBOL(chacha_crypt_arch
);
109 static int chacha_stream_xor(struct skcipher_request
*req
,
110 const struct chacha_ctx
*ctx
, const u8
*iv
,
113 struct skcipher_walk walk
;
117 err
= skcipher_walk_virt(&walk
, req
, false);
119 chacha_init_generic(state
, ctx
->key
, iv
);
121 while (walk
.nbytes
> 0) {
122 unsigned int nbytes
= walk
.nbytes
;
124 if (nbytes
< walk
.total
)
125 nbytes
= round_down(nbytes
, walk
.stride
);
127 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) || !neon
) {
128 chacha_doarm(walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
129 nbytes
, state
, ctx
->nrounds
);
130 state
[12] += DIV_ROUND_UP(nbytes
, CHACHA_BLOCK_SIZE
);
133 chacha_doneon(state
, walk
.dst
.virt
.addr
,
134 walk
.src
.virt
.addr
, nbytes
, ctx
->nrounds
);
137 err
= skcipher_walk_done(&walk
, walk
.nbytes
- nbytes
);
143 static int do_chacha(struct skcipher_request
*req
, bool neon
)
145 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
146 struct chacha_ctx
*ctx
= crypto_skcipher_ctx(tfm
);
148 return chacha_stream_xor(req
, ctx
, req
->iv
, neon
);
151 static int chacha_arm(struct skcipher_request
*req
)
153 return do_chacha(req
, false);
156 static int chacha_neon(struct skcipher_request
*req
)
158 return do_chacha(req
, neon_usable());
161 static int do_xchacha(struct skcipher_request
*req
, bool neon
)
163 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
164 struct chacha_ctx
*ctx
= crypto_skcipher_ctx(tfm
);
165 struct chacha_ctx subctx
;
169 chacha_init_generic(state
, ctx
->key
, req
->iv
);
171 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) || !neon
) {
172 hchacha_block_arm(state
, subctx
.key
, ctx
->nrounds
);
175 hchacha_block_neon(state
, subctx
.key
, ctx
->nrounds
);
178 subctx
.nrounds
= ctx
->nrounds
;
180 memcpy(&real_iv
[0], req
->iv
+ 24, 8);
181 memcpy(&real_iv
[8], req
->iv
+ 16, 8);
182 return chacha_stream_xor(req
, &subctx
, real_iv
, neon
);
185 static int xchacha_arm(struct skcipher_request
*req
)
187 return do_xchacha(req
, false);
190 static int xchacha_neon(struct skcipher_request
*req
)
192 return do_xchacha(req
, neon_usable());
195 static struct skcipher_alg arm_algs
[] = {
197 .base
.cra_name
= "chacha20",
198 .base
.cra_driver_name
= "chacha20-arm",
199 .base
.cra_priority
= 200,
200 .base
.cra_blocksize
= 1,
201 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
202 .base
.cra_module
= THIS_MODULE
,
204 .min_keysize
= CHACHA_KEY_SIZE
,
205 .max_keysize
= CHACHA_KEY_SIZE
,
206 .ivsize
= CHACHA_IV_SIZE
,
207 .chunksize
= CHACHA_BLOCK_SIZE
,
208 .setkey
= chacha20_setkey
,
209 .encrypt
= chacha_arm
,
210 .decrypt
= chacha_arm
,
212 .base
.cra_name
= "xchacha20",
213 .base
.cra_driver_name
= "xchacha20-arm",
214 .base
.cra_priority
= 200,
215 .base
.cra_blocksize
= 1,
216 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
217 .base
.cra_module
= THIS_MODULE
,
219 .min_keysize
= CHACHA_KEY_SIZE
,
220 .max_keysize
= CHACHA_KEY_SIZE
,
221 .ivsize
= XCHACHA_IV_SIZE
,
222 .chunksize
= CHACHA_BLOCK_SIZE
,
223 .setkey
= chacha20_setkey
,
224 .encrypt
= xchacha_arm
,
225 .decrypt
= xchacha_arm
,
227 .base
.cra_name
= "xchacha12",
228 .base
.cra_driver_name
= "xchacha12-arm",
229 .base
.cra_priority
= 200,
230 .base
.cra_blocksize
= 1,
231 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
232 .base
.cra_module
= THIS_MODULE
,
234 .min_keysize
= CHACHA_KEY_SIZE
,
235 .max_keysize
= CHACHA_KEY_SIZE
,
236 .ivsize
= XCHACHA_IV_SIZE
,
237 .chunksize
= CHACHA_BLOCK_SIZE
,
238 .setkey
= chacha12_setkey
,
239 .encrypt
= xchacha_arm
,
240 .decrypt
= xchacha_arm
,
244 static struct skcipher_alg neon_algs
[] = {
246 .base
.cra_name
= "chacha20",
247 .base
.cra_driver_name
= "chacha20-neon",
248 .base
.cra_priority
= 300,
249 .base
.cra_blocksize
= 1,
250 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
251 .base
.cra_module
= THIS_MODULE
,
253 .min_keysize
= CHACHA_KEY_SIZE
,
254 .max_keysize
= CHACHA_KEY_SIZE
,
255 .ivsize
= CHACHA_IV_SIZE
,
256 .chunksize
= CHACHA_BLOCK_SIZE
,
257 .walksize
= 4 * CHACHA_BLOCK_SIZE
,
258 .setkey
= chacha20_setkey
,
259 .encrypt
= chacha_neon
,
260 .decrypt
= chacha_neon
,
262 .base
.cra_name
= "xchacha20",
263 .base
.cra_driver_name
= "xchacha20-neon",
264 .base
.cra_priority
= 300,
265 .base
.cra_blocksize
= 1,
266 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
267 .base
.cra_module
= THIS_MODULE
,
269 .min_keysize
= CHACHA_KEY_SIZE
,
270 .max_keysize
= CHACHA_KEY_SIZE
,
271 .ivsize
= XCHACHA_IV_SIZE
,
272 .chunksize
= CHACHA_BLOCK_SIZE
,
273 .walksize
= 4 * CHACHA_BLOCK_SIZE
,
274 .setkey
= chacha20_setkey
,
275 .encrypt
= xchacha_neon
,
276 .decrypt
= xchacha_neon
,
278 .base
.cra_name
= "xchacha12",
279 .base
.cra_driver_name
= "xchacha12-neon",
280 .base
.cra_priority
= 300,
281 .base
.cra_blocksize
= 1,
282 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
283 .base
.cra_module
= THIS_MODULE
,
285 .min_keysize
= CHACHA_KEY_SIZE
,
286 .max_keysize
= CHACHA_KEY_SIZE
,
287 .ivsize
= XCHACHA_IV_SIZE
,
288 .chunksize
= CHACHA_BLOCK_SIZE
,
289 .walksize
= 4 * CHACHA_BLOCK_SIZE
,
290 .setkey
= chacha12_setkey
,
291 .encrypt
= xchacha_neon
,
292 .decrypt
= xchacha_neon
,
296 static int __init
chacha_simd_mod_init(void)
300 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER
)) {
301 err
= crypto_register_skciphers(arm_algs
, ARRAY_SIZE(arm_algs
));
306 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) && (elf_hwcap
& HWCAP_NEON
)) {
309 switch (read_cpuid_part()) {
310 case ARM_CPU_PART_CORTEX_A7
:
311 case ARM_CPU_PART_CORTEX_A5
:
313 * The Cortex-A7 and Cortex-A5 do not perform well with
314 * the NEON implementation but do incredibly with the
315 * scalar one and use less power.
317 for (i
= 0; i
< ARRAY_SIZE(neon_algs
); i
++)
318 neon_algs
[i
].base
.cra_priority
= 0;
321 static_branch_enable(&use_neon
);
324 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER
)) {
325 err
= crypto_register_skciphers(neon_algs
, ARRAY_SIZE(neon_algs
));
327 crypto_unregister_skciphers(arm_algs
, ARRAY_SIZE(arm_algs
));
333 static void __exit
chacha_simd_mod_fini(void)
335 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER
)) {
336 crypto_unregister_skciphers(arm_algs
, ARRAY_SIZE(arm_algs
));
337 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) && (elf_hwcap
& HWCAP_NEON
))
338 crypto_unregister_skciphers(neon_algs
, ARRAY_SIZE(neon_algs
));
342 module_init(chacha_simd_mod_init
);
343 module_exit(chacha_simd_mod_fini
);
345 MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
346 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
347 MODULE_LICENSE("GPL v2");
348 MODULE_ALIAS_CRYPTO("chacha20");
349 MODULE_ALIAS_CRYPTO("chacha20-arm");
350 MODULE_ALIAS_CRYPTO("xchacha20");
351 MODULE_ALIAS_CRYPTO("xchacha20-arm");
352 MODULE_ALIAS_CRYPTO("xchacha12");
353 MODULE_ALIAS_CRYPTO("xchacha12-arm");
354 #ifdef CONFIG_KERNEL_MODE_NEON
355 MODULE_ALIAS_CRYPTO("chacha20-neon");
356 MODULE_ALIAS_CRYPTO("xchacha20-neon");
357 MODULE_ALIAS_CRYPTO("xchacha12-neon");