2 * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
4 * Copyright (C) 2015 Martin Willi
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
12 #include <crypto/algapi.h>
13 #include <crypto/chacha20.h>
14 #include <crypto/internal/skcipher.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 #include <asm/fpu/api.h>
20 #define CHACHA20_STATE_ALIGN 16
22 asmlinkage
void chacha20_block_xor_ssse3(u32
*state
, u8
*dst
, const u8
*src
);
23 asmlinkage
void chacha20_4block_xor_ssse3(u32
*state
, u8
*dst
, const u8
*src
);
25 asmlinkage
void chacha20_8block_xor_avx2(u32
*state
, u8
*dst
, const u8
*src
);
26 static bool chacha20_use_avx2
;
29 static void chacha20_dosimd(u32
*state
, u8
*dst
, const u8
*src
,
32 u8 buf
[CHACHA20_BLOCK_SIZE
];
35 if (chacha20_use_avx2
) {
36 while (bytes
>= CHACHA20_BLOCK_SIZE
* 8) {
37 chacha20_8block_xor_avx2(state
, dst
, src
);
38 bytes
-= CHACHA20_BLOCK_SIZE
* 8;
39 src
+= CHACHA20_BLOCK_SIZE
* 8;
40 dst
+= CHACHA20_BLOCK_SIZE
* 8;
45 while (bytes
>= CHACHA20_BLOCK_SIZE
* 4) {
46 chacha20_4block_xor_ssse3(state
, dst
, src
);
47 bytes
-= CHACHA20_BLOCK_SIZE
* 4;
48 src
+= CHACHA20_BLOCK_SIZE
* 4;
49 dst
+= CHACHA20_BLOCK_SIZE
* 4;
52 while (bytes
>= CHACHA20_BLOCK_SIZE
) {
53 chacha20_block_xor_ssse3(state
, dst
, src
);
54 bytes
-= CHACHA20_BLOCK_SIZE
;
55 src
+= CHACHA20_BLOCK_SIZE
;
56 dst
+= CHACHA20_BLOCK_SIZE
;
60 memcpy(buf
, src
, bytes
);
61 chacha20_block_xor_ssse3(state
, buf
, buf
);
62 memcpy(dst
, buf
, bytes
);
66 static int chacha20_simd(struct skcipher_request
*req
)
68 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
69 struct chacha20_ctx
*ctx
= crypto_skcipher_ctx(tfm
);
70 u32
*state
, state_buf
[16 + 2] __aligned(8);
71 struct skcipher_walk walk
;
74 BUILD_BUG_ON(CHACHA20_STATE_ALIGN
!= 16);
75 state
= PTR_ALIGN(state_buf
+ 0, CHACHA20_STATE_ALIGN
);
77 if (req
->cryptlen
<= CHACHA20_BLOCK_SIZE
|| !may_use_simd())
78 return crypto_chacha20_crypt(req
);
80 err
= skcipher_walk_virt(&walk
, req
, true);
82 crypto_chacha20_init(state
, ctx
, walk
.iv
);
86 while (walk
.nbytes
>= CHACHA20_BLOCK_SIZE
) {
87 chacha20_dosimd(state
, walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
88 rounddown(walk
.nbytes
, CHACHA20_BLOCK_SIZE
));
89 err
= skcipher_walk_done(&walk
,
90 walk
.nbytes
% CHACHA20_BLOCK_SIZE
);
94 chacha20_dosimd(state
, walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
96 err
= skcipher_walk_done(&walk
, 0);
104 static struct skcipher_alg alg
= {
105 .base
.cra_name
= "chacha20",
106 .base
.cra_driver_name
= "chacha20-simd",
107 .base
.cra_priority
= 300,
108 .base
.cra_blocksize
= 1,
109 .base
.cra_ctxsize
= sizeof(struct chacha20_ctx
),
110 .base
.cra_alignmask
= sizeof(u32
) - 1,
111 .base
.cra_module
= THIS_MODULE
,
113 .min_keysize
= CHACHA20_KEY_SIZE
,
114 .max_keysize
= CHACHA20_KEY_SIZE
,
115 .ivsize
= CHACHA20_IV_SIZE
,
116 .chunksize
= CHACHA20_BLOCK_SIZE
,
117 .setkey
= crypto_chacha20_setkey
,
118 .encrypt
= chacha20_simd
,
119 .decrypt
= chacha20_simd
,
122 static int __init
chacha20_simd_mod_init(void)
124 if (!boot_cpu_has(X86_FEATURE_SSSE3
))
127 #ifdef CONFIG_AS_AVX2
128 chacha20_use_avx2
= boot_cpu_has(X86_FEATURE_AVX
) &&
129 boot_cpu_has(X86_FEATURE_AVX2
) &&
130 cpu_has_xfeatures(XFEATURE_MASK_SSE
| XFEATURE_MASK_YMM
, NULL
);
132 return crypto_register_skcipher(&alg
);
135 static void __exit
chacha20_simd_mod_fini(void)
137 crypto_unregister_skcipher(&alg
);
140 module_init(chacha20_simd_mod_init
);
141 module_exit(chacha20_simd_mod_fini
);
143 MODULE_LICENSE("GPL");
144 MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
145 MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
146 MODULE_ALIAS_CRYPTO("chacha20");
147 MODULE_ALIAS_CRYPTO("chacha20-simd");