2 * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
4 * Copyright (C) 2015 Martin Willi
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
12 #include <crypto/algapi.h>
13 #include <crypto/chacha20.h>
14 #include <linux/crypto.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 #include <asm/fpu/api.h>
20 #define CHACHA20_STATE_ALIGN 16
22 asmlinkage
void chacha20_block_xor_ssse3(u32
*state
, u8
*dst
, const u8
*src
);
23 asmlinkage
void chacha20_4block_xor_ssse3(u32
*state
, u8
*dst
, const u8
*src
);
25 asmlinkage
void chacha20_8block_xor_avx2(u32
*state
, u8
*dst
, const u8
*src
);
26 static bool chacha20_use_avx2
;
29 static void chacha20_dosimd(u32
*state
, u8
*dst
, const u8
*src
,
32 u8 buf
[CHACHA20_BLOCK_SIZE
];
35 if (chacha20_use_avx2
) {
36 while (bytes
>= CHACHA20_BLOCK_SIZE
* 8) {
37 chacha20_8block_xor_avx2(state
, dst
, src
);
38 bytes
-= CHACHA20_BLOCK_SIZE
* 8;
39 src
+= CHACHA20_BLOCK_SIZE
* 8;
40 dst
+= CHACHA20_BLOCK_SIZE
* 8;
45 while (bytes
>= CHACHA20_BLOCK_SIZE
* 4) {
46 chacha20_4block_xor_ssse3(state
, dst
, src
);
47 bytes
-= CHACHA20_BLOCK_SIZE
* 4;
48 src
+= CHACHA20_BLOCK_SIZE
* 4;
49 dst
+= CHACHA20_BLOCK_SIZE
* 4;
52 while (bytes
>= CHACHA20_BLOCK_SIZE
) {
53 chacha20_block_xor_ssse3(state
, dst
, src
);
54 bytes
-= CHACHA20_BLOCK_SIZE
;
55 src
+= CHACHA20_BLOCK_SIZE
;
56 dst
+= CHACHA20_BLOCK_SIZE
;
60 memcpy(buf
, src
, bytes
);
61 chacha20_block_xor_ssse3(state
, buf
, buf
);
62 memcpy(dst
, buf
, bytes
);
66 static int chacha20_simd(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
67 struct scatterlist
*src
, unsigned int nbytes
)
69 u32
*state
, state_buf
[16 + (CHACHA20_STATE_ALIGN
/ sizeof(u32
)) - 1];
70 struct blkcipher_walk walk
;
73 if (nbytes
<= CHACHA20_BLOCK_SIZE
|| !may_use_simd())
74 return crypto_chacha20_crypt(desc
, dst
, src
, nbytes
);
76 state
= (u32
*)roundup((uintptr_t)state_buf
, CHACHA20_STATE_ALIGN
);
78 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
79 err
= blkcipher_walk_virt_block(desc
, &walk
, CHACHA20_BLOCK_SIZE
);
81 crypto_chacha20_init(state
, crypto_blkcipher_ctx(desc
->tfm
), walk
.iv
);
85 while (walk
.nbytes
>= CHACHA20_BLOCK_SIZE
) {
86 chacha20_dosimd(state
, walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
87 rounddown(walk
.nbytes
, CHACHA20_BLOCK_SIZE
));
88 err
= blkcipher_walk_done(desc
, &walk
,
89 walk
.nbytes
% CHACHA20_BLOCK_SIZE
);
93 chacha20_dosimd(state
, walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
95 err
= blkcipher_walk_done(desc
, &walk
, 0);
103 static struct crypto_alg alg
= {
104 .cra_name
= "chacha20",
105 .cra_driver_name
= "chacha20-simd",
107 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
109 .cra_type
= &crypto_blkcipher_type
,
110 .cra_ctxsize
= sizeof(struct chacha20_ctx
),
111 .cra_alignmask
= sizeof(u32
) - 1,
112 .cra_module
= THIS_MODULE
,
115 .min_keysize
= CHACHA20_KEY_SIZE
,
116 .max_keysize
= CHACHA20_KEY_SIZE
,
117 .ivsize
= CHACHA20_IV_SIZE
,
119 .setkey
= crypto_chacha20_setkey
,
120 .encrypt
= chacha20_simd
,
121 .decrypt
= chacha20_simd
,
126 static int __init
chacha20_simd_mod_init(void)
128 if (!boot_cpu_has(X86_FEATURE_SSSE3
))
131 #ifdef CONFIG_AS_AVX2
132 chacha20_use_avx2
= boot_cpu_has(X86_FEATURE_AVX
) &&
133 boot_cpu_has(X86_FEATURE_AVX2
) &&
134 cpu_has_xfeatures(XFEATURE_MASK_SSE
| XFEATURE_MASK_YMM
, NULL
);
136 return crypto_register_alg(&alg
);
139 static void __exit
chacha20_simd_mod_fini(void)
141 crypto_unregister_alg(&alg
);
144 module_init(chacha20_simd_mod_init
);
145 module_exit(chacha20_simd_mod_fini
);
147 MODULE_LICENSE("GPL");
148 MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
149 MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
150 MODULE_ALIAS_CRYPTO("chacha20");
151 MODULE_ALIAS_CRYPTO("chacha20-simd");