1 // SPDX-License-Identifier: GPL-2.0
3 * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
4 * including ChaCha20 (RFC7539)
6 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
7 * Copyright (C) 2015 Martin Willi
10 #include <crypto/algapi.h>
11 #include <crypto/internal/chacha.h>
12 #include <crypto/internal/simd.h>
13 #include <crypto/internal/skcipher.h>
14 #include <linux/jump_label.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
18 #include <asm/cputype.h>
19 #include <asm/hwcap.h>
23 asmlinkage
void chacha_block_xor_neon(const u32
*state
, u8
*dst
, const u8
*src
,
25 asmlinkage
void chacha_4block_xor_neon(const u32
*state
, u8
*dst
, const u8
*src
,
27 asmlinkage
void hchacha_block_arm(const u32
*state
, u32
*out
, int nrounds
);
28 asmlinkage
void hchacha_block_neon(const u32
*state
, u32
*out
, int nrounds
);
30 asmlinkage
void chacha_doarm(u8
*dst
, const u8
*src
, unsigned int bytes
,
31 const u32
*state
, int nrounds
);
33 static __ro_after_init
DEFINE_STATIC_KEY_FALSE(use_neon
);
35 static inline bool neon_usable(void)
37 return static_branch_likely(&use_neon
) && crypto_simd_usable();
40 static void chacha_doneon(u32
*state
, u8
*dst
, const u8
*src
,
41 unsigned int bytes
, int nrounds
)
43 u8 buf
[CHACHA_BLOCK_SIZE
];
45 while (bytes
>= CHACHA_BLOCK_SIZE
* 4) {
46 chacha_4block_xor_neon(state
, dst
, src
, nrounds
);
47 bytes
-= CHACHA_BLOCK_SIZE
* 4;
48 src
+= CHACHA_BLOCK_SIZE
* 4;
49 dst
+= CHACHA_BLOCK_SIZE
* 4;
52 while (bytes
>= CHACHA_BLOCK_SIZE
) {
53 chacha_block_xor_neon(state
, dst
, src
, nrounds
);
54 bytes
-= CHACHA_BLOCK_SIZE
;
55 src
+= CHACHA_BLOCK_SIZE
;
56 dst
+= CHACHA_BLOCK_SIZE
;
60 memcpy(buf
, src
, bytes
);
61 chacha_block_xor_neon(state
, buf
, buf
, nrounds
);
62 memcpy(dst
, buf
, bytes
);
66 void hchacha_block_arch(const u32
*state
, u32
*stream
, int nrounds
)
68 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) || !neon_usable()) {
69 hchacha_block_arm(state
, stream
, nrounds
);
72 hchacha_block_neon(state
, stream
, nrounds
);
76 EXPORT_SYMBOL(hchacha_block_arch
);
78 void chacha_init_arch(u32
*state
, const u32
*key
, const u8
*iv
)
80 chacha_init_generic(state
, key
, iv
);
82 EXPORT_SYMBOL(chacha_init_arch
);
84 void chacha_crypt_arch(u32
*state
, u8
*dst
, const u8
*src
, unsigned int bytes
,
87 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) || !neon_usable() ||
88 bytes
<= CHACHA_BLOCK_SIZE
) {
89 chacha_doarm(dst
, src
, bytes
, state
, nrounds
);
90 state
[12] += DIV_ROUND_UP(bytes
, CHACHA_BLOCK_SIZE
);
95 chacha_doneon(state
, dst
, src
, bytes
, nrounds
);
98 EXPORT_SYMBOL(chacha_crypt_arch
);
100 static int chacha_stream_xor(struct skcipher_request
*req
,
101 const struct chacha_ctx
*ctx
, const u8
*iv
,
104 struct skcipher_walk walk
;
108 err
= skcipher_walk_virt(&walk
, req
, false);
110 chacha_init_generic(state
, ctx
->key
, iv
);
112 while (walk
.nbytes
> 0) {
113 unsigned int nbytes
= walk
.nbytes
;
115 if (nbytes
< walk
.total
)
116 nbytes
= round_down(nbytes
, walk
.stride
);
118 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) || !neon
) {
119 chacha_doarm(walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
120 nbytes
, state
, ctx
->nrounds
);
121 state
[12] += DIV_ROUND_UP(nbytes
, CHACHA_BLOCK_SIZE
);
124 chacha_doneon(state
, walk
.dst
.virt
.addr
,
125 walk
.src
.virt
.addr
, nbytes
, ctx
->nrounds
);
128 err
= skcipher_walk_done(&walk
, walk
.nbytes
- nbytes
);
134 static int do_chacha(struct skcipher_request
*req
, bool neon
)
136 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
137 struct chacha_ctx
*ctx
= crypto_skcipher_ctx(tfm
);
139 return chacha_stream_xor(req
, ctx
, req
->iv
, neon
);
142 static int chacha_arm(struct skcipher_request
*req
)
144 return do_chacha(req
, false);
147 static int chacha_neon(struct skcipher_request
*req
)
149 return do_chacha(req
, neon_usable());
152 static int do_xchacha(struct skcipher_request
*req
, bool neon
)
154 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
155 struct chacha_ctx
*ctx
= crypto_skcipher_ctx(tfm
);
156 struct chacha_ctx subctx
;
160 chacha_init_generic(state
, ctx
->key
, req
->iv
);
162 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) || !neon
) {
163 hchacha_block_arm(state
, subctx
.key
, ctx
->nrounds
);
166 hchacha_block_neon(state
, subctx
.key
, ctx
->nrounds
);
169 subctx
.nrounds
= ctx
->nrounds
;
171 memcpy(&real_iv
[0], req
->iv
+ 24, 8);
172 memcpy(&real_iv
[8], req
->iv
+ 16, 8);
173 return chacha_stream_xor(req
, &subctx
, real_iv
, neon
);
176 static int xchacha_arm(struct skcipher_request
*req
)
178 return do_xchacha(req
, false);
181 static int xchacha_neon(struct skcipher_request
*req
)
183 return do_xchacha(req
, neon_usable());
186 static struct skcipher_alg arm_algs
[] = {
188 .base
.cra_name
= "chacha20",
189 .base
.cra_driver_name
= "chacha20-arm",
190 .base
.cra_priority
= 200,
191 .base
.cra_blocksize
= 1,
192 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
193 .base
.cra_module
= THIS_MODULE
,
195 .min_keysize
= CHACHA_KEY_SIZE
,
196 .max_keysize
= CHACHA_KEY_SIZE
,
197 .ivsize
= CHACHA_IV_SIZE
,
198 .chunksize
= CHACHA_BLOCK_SIZE
,
199 .setkey
= chacha20_setkey
,
200 .encrypt
= chacha_arm
,
201 .decrypt
= chacha_arm
,
203 .base
.cra_name
= "xchacha20",
204 .base
.cra_driver_name
= "xchacha20-arm",
205 .base
.cra_priority
= 200,
206 .base
.cra_blocksize
= 1,
207 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
208 .base
.cra_module
= THIS_MODULE
,
210 .min_keysize
= CHACHA_KEY_SIZE
,
211 .max_keysize
= CHACHA_KEY_SIZE
,
212 .ivsize
= XCHACHA_IV_SIZE
,
213 .chunksize
= CHACHA_BLOCK_SIZE
,
214 .setkey
= chacha20_setkey
,
215 .encrypt
= xchacha_arm
,
216 .decrypt
= xchacha_arm
,
218 .base
.cra_name
= "xchacha12",
219 .base
.cra_driver_name
= "xchacha12-arm",
220 .base
.cra_priority
= 200,
221 .base
.cra_blocksize
= 1,
222 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
223 .base
.cra_module
= THIS_MODULE
,
225 .min_keysize
= CHACHA_KEY_SIZE
,
226 .max_keysize
= CHACHA_KEY_SIZE
,
227 .ivsize
= XCHACHA_IV_SIZE
,
228 .chunksize
= CHACHA_BLOCK_SIZE
,
229 .setkey
= chacha12_setkey
,
230 .encrypt
= xchacha_arm
,
231 .decrypt
= xchacha_arm
,
235 static struct skcipher_alg neon_algs
[] = {
237 .base
.cra_name
= "chacha20",
238 .base
.cra_driver_name
= "chacha20-neon",
239 .base
.cra_priority
= 300,
240 .base
.cra_blocksize
= 1,
241 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
242 .base
.cra_module
= THIS_MODULE
,
244 .min_keysize
= CHACHA_KEY_SIZE
,
245 .max_keysize
= CHACHA_KEY_SIZE
,
246 .ivsize
= CHACHA_IV_SIZE
,
247 .chunksize
= CHACHA_BLOCK_SIZE
,
248 .walksize
= 4 * CHACHA_BLOCK_SIZE
,
249 .setkey
= chacha20_setkey
,
250 .encrypt
= chacha_neon
,
251 .decrypt
= chacha_neon
,
253 .base
.cra_name
= "xchacha20",
254 .base
.cra_driver_name
= "xchacha20-neon",
255 .base
.cra_priority
= 300,
256 .base
.cra_blocksize
= 1,
257 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
258 .base
.cra_module
= THIS_MODULE
,
260 .min_keysize
= CHACHA_KEY_SIZE
,
261 .max_keysize
= CHACHA_KEY_SIZE
,
262 .ivsize
= XCHACHA_IV_SIZE
,
263 .chunksize
= CHACHA_BLOCK_SIZE
,
264 .walksize
= 4 * CHACHA_BLOCK_SIZE
,
265 .setkey
= chacha20_setkey
,
266 .encrypt
= xchacha_neon
,
267 .decrypt
= xchacha_neon
,
269 .base
.cra_name
= "xchacha12",
270 .base
.cra_driver_name
= "xchacha12-neon",
271 .base
.cra_priority
= 300,
272 .base
.cra_blocksize
= 1,
273 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
274 .base
.cra_module
= THIS_MODULE
,
276 .min_keysize
= CHACHA_KEY_SIZE
,
277 .max_keysize
= CHACHA_KEY_SIZE
,
278 .ivsize
= XCHACHA_IV_SIZE
,
279 .chunksize
= CHACHA_BLOCK_SIZE
,
280 .walksize
= 4 * CHACHA_BLOCK_SIZE
,
281 .setkey
= chacha12_setkey
,
282 .encrypt
= xchacha_neon
,
283 .decrypt
= xchacha_neon
,
287 static int __init
chacha_simd_mod_init(void)
291 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER
)) {
292 err
= crypto_register_skciphers(arm_algs
, ARRAY_SIZE(arm_algs
));
297 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) && (elf_hwcap
& HWCAP_NEON
)) {
300 switch (read_cpuid_part()) {
301 case ARM_CPU_PART_CORTEX_A7
:
302 case ARM_CPU_PART_CORTEX_A5
:
304 * The Cortex-A7 and Cortex-A5 do not perform well with
305 * the NEON implementation but do incredibly with the
306 * scalar one and use less power.
308 for (i
= 0; i
< ARRAY_SIZE(neon_algs
); i
++)
309 neon_algs
[i
].base
.cra_priority
= 0;
312 static_branch_enable(&use_neon
);
315 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER
)) {
316 err
= crypto_register_skciphers(neon_algs
, ARRAY_SIZE(neon_algs
));
318 crypto_unregister_skciphers(arm_algs
, ARRAY_SIZE(arm_algs
));
324 static void __exit
chacha_simd_mod_fini(void)
326 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER
)) {
327 crypto_unregister_skciphers(arm_algs
, ARRAY_SIZE(arm_algs
));
328 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON
) && (elf_hwcap
& HWCAP_NEON
))
329 crypto_unregister_skciphers(neon_algs
, ARRAY_SIZE(neon_algs
));
333 module_init(chacha_simd_mod_init
);
334 module_exit(chacha_simd_mod_fini
);
336 MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
337 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
338 MODULE_LICENSE("GPL v2");
339 MODULE_ALIAS_CRYPTO("chacha20");
340 MODULE_ALIAS_CRYPTO("chacha20-arm");
341 MODULE_ALIAS_CRYPTO("xchacha20");
342 MODULE_ALIAS_CRYPTO("xchacha20-arm");
343 MODULE_ALIAS_CRYPTO("xchacha12");
344 MODULE_ALIAS_CRYPTO("xchacha12-arm");
345 #ifdef CONFIG_KERNEL_MODE_NEON
346 MODULE_ALIAS_CRYPTO("chacha20-neon");
347 MODULE_ALIAS_CRYPTO("xchacha20-neon");
348 MODULE_ALIAS_CRYPTO("xchacha12-neon");