2 * Glue Code for 3-way parallel assembler optimized version of Twofish
4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
23 #include <asm/crypto/glue_helper.h>
24 #include <asm/crypto/twofish.h>
25 #include <crypto/algapi.h>
26 #include <crypto/b128ops.h>
27 #include <crypto/internal/skcipher.h>
28 #include <crypto/twofish.h>
29 #include <linux/crypto.h>
30 #include <linux/init.h>
31 #include <linux/module.h>
32 #include <linux/types.h>
34 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way
);
35 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way
);
37 static int twofish_setkey_skcipher(struct crypto_skcipher
*tfm
,
38 const u8
*key
, unsigned int keylen
)
40 return twofish_setkey(&tfm
->base
, key
, keylen
);
43 static inline void twofish_enc_blk_3way(struct twofish_ctx
*ctx
, u8
*dst
,
46 __twofish_enc_blk_3way(ctx
, dst
, src
, false);
49 static inline void twofish_enc_blk_xor_3way(struct twofish_ctx
*ctx
, u8
*dst
,
52 __twofish_enc_blk_3way(ctx
, dst
, src
, true);
55 void twofish_dec_blk_cbc_3way(void *ctx
, u128
*dst
, const u128
*src
)
62 twofish_dec_blk_3way(ctx
, (u8
*)dst
, (u8
*)src
);
64 u128_xor(&dst
[1], &dst
[1], &ivs
[0]);
65 u128_xor(&dst
[2], &dst
[2], &ivs
[1]);
67 EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way
);
69 void twofish_enc_blk_ctr(void *ctx
, u128
*dst
, const u128
*src
, le128
*iv
)
76 le128_to_be128(&ctrblk
, iv
);
79 twofish_enc_blk(ctx
, (u8
*)&ctrblk
, (u8
*)&ctrblk
);
80 u128_xor(dst
, dst
, (u128
*)&ctrblk
);
82 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr
);
84 void twofish_enc_blk_ctr_3way(void *ctx
, u128
*dst
, const u128
*src
,
95 le128_to_be128(&ctrblks
[0], iv
);
97 le128_to_be128(&ctrblks
[1], iv
);
99 le128_to_be128(&ctrblks
[2], iv
);
102 twofish_enc_blk_xor_3way(ctx
, (u8
*)dst
, (u8
*)ctrblks
);
104 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way
);
106 static const struct common_glue_ctx twofish_enc
= {
108 .fpu_blocks_limit
= -1,
112 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_enc_blk_3way
) }
115 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_enc_blk
) }
119 static const struct common_glue_ctx twofish_ctr
= {
121 .fpu_blocks_limit
= -1,
125 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way
) }
128 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_enc_blk_ctr
) }
132 static const struct common_glue_ctx twofish_dec
= {
134 .fpu_blocks_limit
= -1,
138 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_dec_blk_3way
) }
141 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_dec_blk
) }
145 static const struct common_glue_ctx twofish_dec_cbc
= {
147 .fpu_blocks_limit
= -1,
151 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way
) }
154 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(twofish_dec_blk
) }
158 static int ecb_encrypt(struct skcipher_request
*req
)
160 return glue_ecb_req_128bit(&twofish_enc
, req
);
163 static int ecb_decrypt(struct skcipher_request
*req
)
165 return glue_ecb_req_128bit(&twofish_dec
, req
);
168 static int cbc_encrypt(struct skcipher_request
*req
)
170 return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk
),
174 static int cbc_decrypt(struct skcipher_request
*req
)
176 return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc
, req
);
179 static int ctr_crypt(struct skcipher_request
*req
)
181 return glue_ctr_req_128bit(&twofish_ctr
, req
);
184 static struct skcipher_alg tf_skciphers
[] = {
186 .base
.cra_name
= "ecb(twofish)",
187 .base
.cra_driver_name
= "ecb-twofish-3way",
188 .base
.cra_priority
= 300,
189 .base
.cra_blocksize
= TF_BLOCK_SIZE
,
190 .base
.cra_ctxsize
= sizeof(struct twofish_ctx
),
191 .base
.cra_module
= THIS_MODULE
,
192 .min_keysize
= TF_MIN_KEY_SIZE
,
193 .max_keysize
= TF_MAX_KEY_SIZE
,
194 .setkey
= twofish_setkey_skcipher
,
195 .encrypt
= ecb_encrypt
,
196 .decrypt
= ecb_decrypt
,
198 .base
.cra_name
= "cbc(twofish)",
199 .base
.cra_driver_name
= "cbc-twofish-3way",
200 .base
.cra_priority
= 300,
201 .base
.cra_blocksize
= TF_BLOCK_SIZE
,
202 .base
.cra_ctxsize
= sizeof(struct twofish_ctx
),
203 .base
.cra_module
= THIS_MODULE
,
204 .min_keysize
= TF_MIN_KEY_SIZE
,
205 .max_keysize
= TF_MAX_KEY_SIZE
,
206 .ivsize
= TF_BLOCK_SIZE
,
207 .setkey
= twofish_setkey_skcipher
,
208 .encrypt
= cbc_encrypt
,
209 .decrypt
= cbc_decrypt
,
211 .base
.cra_name
= "ctr(twofish)",
212 .base
.cra_driver_name
= "ctr-twofish-3way",
213 .base
.cra_priority
= 300,
214 .base
.cra_blocksize
= 1,
215 .base
.cra_ctxsize
= sizeof(struct twofish_ctx
),
216 .base
.cra_module
= THIS_MODULE
,
217 .min_keysize
= TF_MIN_KEY_SIZE
,
218 .max_keysize
= TF_MAX_KEY_SIZE
,
219 .ivsize
= TF_BLOCK_SIZE
,
220 .chunksize
= TF_BLOCK_SIZE
,
221 .setkey
= twofish_setkey_skcipher
,
222 .encrypt
= ctr_crypt
,
223 .decrypt
= ctr_crypt
,
227 static bool is_blacklisted_cpu(void)
229 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_INTEL
)
232 if (boot_cpu_data
.x86
== 0x06 &&
233 (boot_cpu_data
.x86_model
== 0x1c ||
234 boot_cpu_data
.x86_model
== 0x26 ||
235 boot_cpu_data
.x86_model
== 0x36)) {
237 * On Atom, twofish-3way is slower than original assembler
238 * implementation. Twofish-3way trades off some performance in
239 * storing blocks in 64bit registers to allow three blocks to
240 * be processed parallel. Parallel operation then allows gaining
241 * more performance than was trade off, on out-of-order CPUs.
242 * However Atom does not benefit from this parallellism and
243 * should be blacklisted.
248 if (boot_cpu_data
.x86
== 0x0f) {
250 * On Pentium 4, twofish-3way is slower than original assembler
251 * implementation because excessive uses of 64bit rotate and
252 * left-shifts (which are really slow on P4) needed to store and
253 * handle 128bit block in two 64bit registers.
262 module_param(force
, int, 0);
263 MODULE_PARM_DESC(force
, "Force module load, ignore CPU blacklist");
265 static int __init
init(void)
267 if (!force
&& is_blacklisted_cpu()) {
269 "twofish-x86_64-3way: performance on this CPU "
270 "would be suboptimal: disabling "
271 "twofish-x86_64-3way.\n");
275 return crypto_register_skciphers(tf_skciphers
,
276 ARRAY_SIZE(tf_skciphers
));
279 static void __exit
fini(void)
281 crypto_unregister_skciphers(tf_skciphers
, ARRAY_SIZE(tf_skciphers
));
287 MODULE_LICENSE("GPL");
288 MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
289 MODULE_ALIAS_CRYPTO("twofish");
290 MODULE_ALIAS_CRYPTO("twofish-asm");