1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * x64 SIMD accelerated ChaCha and XChaCha stream ciphers,
4 * including ChaCha20 (RFC7539)
6 * Copyright (C) 2015 Martin Willi
9 #include <crypto/algapi.h>
10 #include <crypto/internal/chacha.h>
11 #include <crypto/internal/simd.h>
12 #include <crypto/internal/skcipher.h>
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/sizes.h>
18 asmlinkage
void chacha_block_xor_ssse3(u32
*state
, u8
*dst
, const u8
*src
,
19 unsigned int len
, int nrounds
);
20 asmlinkage
void chacha_4block_xor_ssse3(u32
*state
, u8
*dst
, const u8
*src
,
21 unsigned int len
, int nrounds
);
22 asmlinkage
void hchacha_block_ssse3(const u32
*state
, u32
*out
, int nrounds
);
24 asmlinkage
void chacha_2block_xor_avx2(u32
*state
, u8
*dst
, const u8
*src
,
25 unsigned int len
, int nrounds
);
26 asmlinkage
void chacha_4block_xor_avx2(u32
*state
, u8
*dst
, const u8
*src
,
27 unsigned int len
, int nrounds
);
28 asmlinkage
void chacha_8block_xor_avx2(u32
*state
, u8
*dst
, const u8
*src
,
29 unsigned int len
, int nrounds
);
31 asmlinkage
void chacha_2block_xor_avx512vl(u32
*state
, u8
*dst
, const u8
*src
,
32 unsigned int len
, int nrounds
);
33 asmlinkage
void chacha_4block_xor_avx512vl(u32
*state
, u8
*dst
, const u8
*src
,
34 unsigned int len
, int nrounds
);
35 asmlinkage
void chacha_8block_xor_avx512vl(u32
*state
, u8
*dst
, const u8
*src
,
36 unsigned int len
, int nrounds
);
38 static __ro_after_init
DEFINE_STATIC_KEY_FALSE(chacha_use_simd
);
39 static __ro_after_init
DEFINE_STATIC_KEY_FALSE(chacha_use_avx2
);
40 static __ro_after_init
DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl
);
42 static unsigned int chacha_advance(unsigned int len
, unsigned int maxblocks
)
44 len
= min(len
, maxblocks
* CHACHA_BLOCK_SIZE
);
45 return round_up(len
, CHACHA_BLOCK_SIZE
) / CHACHA_BLOCK_SIZE
;
48 static void chacha_dosimd(u32
*state
, u8
*dst
, const u8
*src
,
49 unsigned int bytes
, int nrounds
)
51 if (IS_ENABLED(CONFIG_AS_AVX512
) &&
52 static_branch_likely(&chacha_use_avx512vl
)) {
53 while (bytes
>= CHACHA_BLOCK_SIZE
* 8) {
54 chacha_8block_xor_avx512vl(state
, dst
, src
, bytes
,
56 bytes
-= CHACHA_BLOCK_SIZE
* 8;
57 src
+= CHACHA_BLOCK_SIZE
* 8;
58 dst
+= CHACHA_BLOCK_SIZE
* 8;
61 if (bytes
> CHACHA_BLOCK_SIZE
* 4) {
62 chacha_8block_xor_avx512vl(state
, dst
, src
, bytes
,
64 state
[12] += chacha_advance(bytes
, 8);
67 if (bytes
> CHACHA_BLOCK_SIZE
* 2) {
68 chacha_4block_xor_avx512vl(state
, dst
, src
, bytes
,
70 state
[12] += chacha_advance(bytes
, 4);
74 chacha_2block_xor_avx512vl(state
, dst
, src
, bytes
,
76 state
[12] += chacha_advance(bytes
, 2);
81 if (static_branch_likely(&chacha_use_avx2
)) {
82 while (bytes
>= CHACHA_BLOCK_SIZE
* 8) {
83 chacha_8block_xor_avx2(state
, dst
, src
, bytes
, nrounds
);
84 bytes
-= CHACHA_BLOCK_SIZE
* 8;
85 src
+= CHACHA_BLOCK_SIZE
* 8;
86 dst
+= CHACHA_BLOCK_SIZE
* 8;
89 if (bytes
> CHACHA_BLOCK_SIZE
* 4) {
90 chacha_8block_xor_avx2(state
, dst
, src
, bytes
, nrounds
);
91 state
[12] += chacha_advance(bytes
, 8);
94 if (bytes
> CHACHA_BLOCK_SIZE
* 2) {
95 chacha_4block_xor_avx2(state
, dst
, src
, bytes
, nrounds
);
96 state
[12] += chacha_advance(bytes
, 4);
99 if (bytes
> CHACHA_BLOCK_SIZE
) {
100 chacha_2block_xor_avx2(state
, dst
, src
, bytes
, nrounds
);
101 state
[12] += chacha_advance(bytes
, 2);
106 while (bytes
>= CHACHA_BLOCK_SIZE
* 4) {
107 chacha_4block_xor_ssse3(state
, dst
, src
, bytes
, nrounds
);
108 bytes
-= CHACHA_BLOCK_SIZE
* 4;
109 src
+= CHACHA_BLOCK_SIZE
* 4;
110 dst
+= CHACHA_BLOCK_SIZE
* 4;
113 if (bytes
> CHACHA_BLOCK_SIZE
) {
114 chacha_4block_xor_ssse3(state
, dst
, src
, bytes
, nrounds
);
115 state
[12] += chacha_advance(bytes
, 4);
119 chacha_block_xor_ssse3(state
, dst
, src
, bytes
, nrounds
);
124 void hchacha_block_arch(const u32
*state
, u32
*stream
, int nrounds
)
126 if (!static_branch_likely(&chacha_use_simd
) || !crypto_simd_usable()) {
127 hchacha_block_generic(state
, stream
, nrounds
);
130 hchacha_block_ssse3(state
, stream
, nrounds
);
134 EXPORT_SYMBOL(hchacha_block_arch
);
136 void chacha_init_arch(u32
*state
, const u32
*key
, const u8
*iv
)
138 chacha_init_generic(state
, key
, iv
);
140 EXPORT_SYMBOL(chacha_init_arch
);
142 void chacha_crypt_arch(u32
*state
, u8
*dst
, const u8
*src
, unsigned int bytes
,
145 if (!static_branch_likely(&chacha_use_simd
) || !crypto_simd_usable() ||
146 bytes
<= CHACHA_BLOCK_SIZE
)
147 return chacha_crypt_generic(state
, dst
, src
, bytes
, nrounds
);
150 unsigned int todo
= min_t(unsigned int, bytes
, SZ_4K
);
153 chacha_dosimd(state
, dst
, src
, todo
, nrounds
);
161 EXPORT_SYMBOL(chacha_crypt_arch
);
163 static int chacha_simd_stream_xor(struct skcipher_request
*req
,
164 const struct chacha_ctx
*ctx
, const u8
*iv
)
166 u32 state
[CHACHA_STATE_WORDS
] __aligned(8);
167 struct skcipher_walk walk
;
170 err
= skcipher_walk_virt(&walk
, req
, false);
172 chacha_init_generic(state
, ctx
->key
, iv
);
174 while (walk
.nbytes
> 0) {
175 unsigned int nbytes
= walk
.nbytes
;
177 if (nbytes
< walk
.total
)
178 nbytes
= round_down(nbytes
, walk
.stride
);
180 if (!static_branch_likely(&chacha_use_simd
) ||
181 !crypto_simd_usable()) {
182 chacha_crypt_generic(state
, walk
.dst
.virt
.addr
,
183 walk
.src
.virt
.addr
, nbytes
,
187 chacha_dosimd(state
, walk
.dst
.virt
.addr
,
188 walk
.src
.virt
.addr
, nbytes
,
192 err
= skcipher_walk_done(&walk
, walk
.nbytes
- nbytes
);
198 static int chacha_simd(struct skcipher_request
*req
)
200 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
201 struct chacha_ctx
*ctx
= crypto_skcipher_ctx(tfm
);
203 return chacha_simd_stream_xor(req
, ctx
, req
->iv
);
206 static int xchacha_simd(struct skcipher_request
*req
)
208 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
209 struct chacha_ctx
*ctx
= crypto_skcipher_ctx(tfm
);
210 u32 state
[CHACHA_STATE_WORDS
] __aligned(8);
211 struct chacha_ctx subctx
;
214 chacha_init_generic(state
, ctx
->key
, req
->iv
);
216 if (req
->cryptlen
> CHACHA_BLOCK_SIZE
&& crypto_simd_usable()) {
218 hchacha_block_ssse3(state
, subctx
.key
, ctx
->nrounds
);
221 hchacha_block_generic(state
, subctx
.key
, ctx
->nrounds
);
223 subctx
.nrounds
= ctx
->nrounds
;
225 memcpy(&real_iv
[0], req
->iv
+ 24, 8);
226 memcpy(&real_iv
[8], req
->iv
+ 16, 8);
227 return chacha_simd_stream_xor(req
, &subctx
, real_iv
);
230 static struct skcipher_alg algs
[] = {
232 .base
.cra_name
= "chacha20",
233 .base
.cra_driver_name
= "chacha20-simd",
234 .base
.cra_priority
= 300,
235 .base
.cra_blocksize
= 1,
236 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
237 .base
.cra_module
= THIS_MODULE
,
239 .min_keysize
= CHACHA_KEY_SIZE
,
240 .max_keysize
= CHACHA_KEY_SIZE
,
241 .ivsize
= CHACHA_IV_SIZE
,
242 .chunksize
= CHACHA_BLOCK_SIZE
,
243 .setkey
= chacha20_setkey
,
244 .encrypt
= chacha_simd
,
245 .decrypt
= chacha_simd
,
247 .base
.cra_name
= "xchacha20",
248 .base
.cra_driver_name
= "xchacha20-simd",
249 .base
.cra_priority
= 300,
250 .base
.cra_blocksize
= 1,
251 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
252 .base
.cra_module
= THIS_MODULE
,
254 .min_keysize
= CHACHA_KEY_SIZE
,
255 .max_keysize
= CHACHA_KEY_SIZE
,
256 .ivsize
= XCHACHA_IV_SIZE
,
257 .chunksize
= CHACHA_BLOCK_SIZE
,
258 .setkey
= chacha20_setkey
,
259 .encrypt
= xchacha_simd
,
260 .decrypt
= xchacha_simd
,
262 .base
.cra_name
= "xchacha12",
263 .base
.cra_driver_name
= "xchacha12-simd",
264 .base
.cra_priority
= 300,
265 .base
.cra_blocksize
= 1,
266 .base
.cra_ctxsize
= sizeof(struct chacha_ctx
),
267 .base
.cra_module
= THIS_MODULE
,
269 .min_keysize
= CHACHA_KEY_SIZE
,
270 .max_keysize
= CHACHA_KEY_SIZE
,
271 .ivsize
= XCHACHA_IV_SIZE
,
272 .chunksize
= CHACHA_BLOCK_SIZE
,
273 .setkey
= chacha12_setkey
,
274 .encrypt
= xchacha_simd
,
275 .decrypt
= xchacha_simd
,
279 static int __init
chacha_simd_mod_init(void)
281 if (!boot_cpu_has(X86_FEATURE_SSSE3
))
284 static_branch_enable(&chacha_use_simd
);
286 if (boot_cpu_has(X86_FEATURE_AVX
) &&
287 boot_cpu_has(X86_FEATURE_AVX2
) &&
288 cpu_has_xfeatures(XFEATURE_MASK_SSE
| XFEATURE_MASK_YMM
, NULL
)) {
289 static_branch_enable(&chacha_use_avx2
);
291 if (IS_ENABLED(CONFIG_AS_AVX512
) &&
292 boot_cpu_has(X86_FEATURE_AVX512VL
) &&
293 boot_cpu_has(X86_FEATURE_AVX512BW
)) /* kmovq */
294 static_branch_enable(&chacha_use_avx512vl
);
296 return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER
) ?
297 crypto_register_skciphers(algs
, ARRAY_SIZE(algs
)) : 0;
300 static void __exit
chacha_simd_mod_fini(void)
302 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER
) && boot_cpu_has(X86_FEATURE_SSSE3
))
303 crypto_unregister_skciphers(algs
, ARRAY_SIZE(algs
));
306 module_init(chacha_simd_mod_init
);
307 module_exit(chacha_simd_mod_fini
);
309 MODULE_LICENSE("GPL");
310 MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
311 MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)");
312 MODULE_ALIAS_CRYPTO("chacha20");
313 MODULE_ALIAS_CRYPTO("chacha20-simd");
314 MODULE_ALIAS_CRYPTO("xchacha20");
315 MODULE_ALIAS_CRYPTO("xchacha20-simd");
316 MODULE_ALIAS_CRYPTO("xchacha12");
317 MODULE_ALIAS_CRYPTO("xchacha12-simd");