2 * Glue Code for x86_64/AVX2 assembler optimized version of Twofish
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/crypto.h>
16 #include <linux/err.h>
17 #include <crypto/algapi.h>
18 #include <crypto/ctr.h>
19 #include <crypto/twofish.h>
20 #include <crypto/lrw.h>
21 #include <crypto/xts.h>
23 #include <asm/xsave.h>
24 #include <asm/crypto/twofish.h>
25 #include <asm/crypto/ablk_helper.h>
26 #include <asm/crypto/glue_helper.h>
27 #include <crypto/scatterwalk.h>
29 #define TF_AVX2_PARALLEL_BLOCKS 16
31 /* 16-way AVX2 parallel cipher functions */
32 asmlinkage
void twofish_ecb_enc_16way(struct twofish_ctx
*ctx
, u8
*dst
,
34 asmlinkage
void twofish_ecb_dec_16way(struct twofish_ctx
*ctx
, u8
*dst
,
36 asmlinkage
void twofish_cbc_dec_16way(void *ctx
, u128
*dst
, const u128
*src
);
38 asmlinkage
void twofish_ctr_16way(void *ctx
, u128
*dst
, const u128
*src
,
41 asmlinkage
void twofish_xts_enc_16way(struct twofish_ctx
*ctx
, u8
*dst
,
42 const u8
*src
, le128
*iv
);
43 asmlinkage
void twofish_xts_dec_16way(struct twofish_ctx
*ctx
, u8
*dst
,
44 const u8
*src
, le128
*iv
);
46 static inline void twofish_enc_blk_3way(struct twofish_ctx
*ctx
, u8
*dst
,
49 __twofish_enc_blk_3way(ctx
, dst
, src
, false);
52 static const struct common_glue_ctx twofish_enc
= {
54 .fpu_blocks_limit
= 8,
58 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_ecb_enc_16way
) }
61 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_ecb_enc_8way
) }
64 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_enc_blk_3way
) }
67 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_enc_blk
) }
71 static const struct common_glue_ctx twofish_ctr
= {
73 .fpu_blocks_limit
= 8,
77 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(twofish_ctr_16way
) }
80 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(twofish_ctr_8way
) }
83 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way
) }
86 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr
) }
90 static const struct common_glue_ctx twofish_enc_xts
= {
92 .fpu_blocks_limit
= 8,
96 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way
) }
99 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way
) }
102 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(twofish_xts_enc
) }
106 static const struct common_glue_ctx twofish_dec
= {
108 .fpu_blocks_limit
= 8,
112 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_ecb_dec_16way
) }
115 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_ecb_dec_8way
) }
118 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_dec_blk_3way
) }
121 .fn_u
= { .ecb
= GLUE_FUNC_CAST(twofish_dec_blk
) }
125 static const struct common_glue_ctx twofish_dec_cbc
= {
127 .fpu_blocks_limit
= 8,
131 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way
) }
134 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way
) }
137 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way
) }
140 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(twofish_dec_blk
) }
144 static const struct common_glue_ctx twofish_dec_xts
= {
146 .fpu_blocks_limit
= 8,
150 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way
) }
153 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way
) }
156 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(twofish_xts_dec
) }
160 static int ecb_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
161 struct scatterlist
*src
, unsigned int nbytes
)
163 return glue_ecb_crypt_128bit(&twofish_enc
, desc
, dst
, src
, nbytes
);
166 static int ecb_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
167 struct scatterlist
*src
, unsigned int nbytes
)
169 return glue_ecb_crypt_128bit(&twofish_dec
, desc
, dst
, src
, nbytes
);
172 static int cbc_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
173 struct scatterlist
*src
, unsigned int nbytes
)
175 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk
), desc
,
179 static int cbc_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
180 struct scatterlist
*src
, unsigned int nbytes
)
182 return glue_cbc_decrypt_128bit(&twofish_dec_cbc
, desc
, dst
, src
,
186 static int ctr_crypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
187 struct scatterlist
*src
, unsigned int nbytes
)
189 return glue_ctr_crypt_128bit(&twofish_ctr
, desc
, dst
, src
, nbytes
);
192 static inline bool twofish_fpu_begin(bool fpu_enabled
, unsigned int nbytes
)
194 /* since reusing AVX functions, starts using FPU at 8 parallel blocks */
195 return glue_fpu_begin(TF_BLOCK_SIZE
, 8, NULL
, fpu_enabled
, nbytes
);
198 static inline void twofish_fpu_end(bool fpu_enabled
)
200 glue_fpu_end(fpu_enabled
);
204 struct twofish_ctx
*ctx
;
208 static void encrypt_callback(void *priv
, u8
*srcdst
, unsigned int nbytes
)
210 const unsigned int bsize
= TF_BLOCK_SIZE
;
211 struct crypt_priv
*ctx
= priv
;
214 ctx
->fpu_enabled
= twofish_fpu_begin(ctx
->fpu_enabled
, nbytes
);
216 while (nbytes
>= TF_AVX2_PARALLEL_BLOCKS
* bsize
) {
217 twofish_ecb_enc_16way(ctx
->ctx
, srcdst
, srcdst
);
218 srcdst
+= bsize
* TF_AVX2_PARALLEL_BLOCKS
;
219 nbytes
-= bsize
* TF_AVX2_PARALLEL_BLOCKS
;
222 while (nbytes
>= 8 * bsize
) {
223 twofish_ecb_enc_8way(ctx
->ctx
, srcdst
, srcdst
);
228 while (nbytes
>= 3 * bsize
) {
229 twofish_enc_blk_3way(ctx
->ctx
, srcdst
, srcdst
);
234 for (i
= 0; i
< nbytes
/ bsize
; i
++, srcdst
+= bsize
)
235 twofish_enc_blk(ctx
->ctx
, srcdst
, srcdst
);
238 static void decrypt_callback(void *priv
, u8
*srcdst
, unsigned int nbytes
)
240 const unsigned int bsize
= TF_BLOCK_SIZE
;
241 struct crypt_priv
*ctx
= priv
;
244 ctx
->fpu_enabled
= twofish_fpu_begin(ctx
->fpu_enabled
, nbytes
);
246 while (nbytes
>= TF_AVX2_PARALLEL_BLOCKS
* bsize
) {
247 twofish_ecb_dec_16way(ctx
->ctx
, srcdst
, srcdst
);
248 srcdst
+= bsize
* TF_AVX2_PARALLEL_BLOCKS
;
249 nbytes
-= bsize
* TF_AVX2_PARALLEL_BLOCKS
;
252 while (nbytes
>= 8 * bsize
) {
253 twofish_ecb_dec_8way(ctx
->ctx
, srcdst
, srcdst
);
258 while (nbytes
>= 3 * bsize
) {
259 twofish_dec_blk_3way(ctx
->ctx
, srcdst
, srcdst
);
264 for (i
= 0; i
< nbytes
/ bsize
; i
++, srcdst
+= bsize
)
265 twofish_dec_blk(ctx
->ctx
, srcdst
, srcdst
);
268 static int lrw_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
269 struct scatterlist
*src
, unsigned int nbytes
)
271 struct twofish_lrw_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
272 be128 buf
[TF_AVX2_PARALLEL_BLOCKS
];
273 struct crypt_priv crypt_ctx
= {
274 .ctx
= &ctx
->twofish_ctx
,
275 .fpu_enabled
= false,
277 struct lrw_crypt_req req
= {
279 .tbuflen
= sizeof(buf
),
281 .table_ctx
= &ctx
->lrw_table
,
282 .crypt_ctx
= &crypt_ctx
,
283 .crypt_fn
= encrypt_callback
,
287 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
288 ret
= lrw_crypt(desc
, dst
, src
, nbytes
, &req
);
289 twofish_fpu_end(crypt_ctx
.fpu_enabled
);
294 static int lrw_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
295 struct scatterlist
*src
, unsigned int nbytes
)
297 struct twofish_lrw_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
298 be128 buf
[TF_AVX2_PARALLEL_BLOCKS
];
299 struct crypt_priv crypt_ctx
= {
300 .ctx
= &ctx
->twofish_ctx
,
301 .fpu_enabled
= false,
303 struct lrw_crypt_req req
= {
305 .tbuflen
= sizeof(buf
),
307 .table_ctx
= &ctx
->lrw_table
,
308 .crypt_ctx
= &crypt_ctx
,
309 .crypt_fn
= decrypt_callback
,
313 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
314 ret
= lrw_crypt(desc
, dst
, src
, nbytes
, &req
);
315 twofish_fpu_end(crypt_ctx
.fpu_enabled
);
320 static int xts_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
321 struct scatterlist
*src
, unsigned int nbytes
)
323 struct twofish_xts_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
325 return glue_xts_crypt_128bit(&twofish_enc_xts
, desc
, dst
, src
, nbytes
,
326 XTS_TWEAK_CAST(twofish_enc_blk
),
327 &ctx
->tweak_ctx
, &ctx
->crypt_ctx
);
330 static int xts_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
331 struct scatterlist
*src
, unsigned int nbytes
)
333 struct twofish_xts_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
335 return glue_xts_crypt_128bit(&twofish_dec_xts
, desc
, dst
, src
, nbytes
,
336 XTS_TWEAK_CAST(twofish_enc_blk
),
337 &ctx
->tweak_ctx
, &ctx
->crypt_ctx
);
340 static struct crypto_alg tf_algs
[10] = { {
341 .cra_name
= "__ecb-twofish-avx2",
342 .cra_driver_name
= "__driver-ecb-twofish-avx2",
344 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
345 .cra_blocksize
= TF_BLOCK_SIZE
,
346 .cra_ctxsize
= sizeof(struct twofish_ctx
),
348 .cra_type
= &crypto_blkcipher_type
,
349 .cra_module
= THIS_MODULE
,
352 .min_keysize
= TF_MIN_KEY_SIZE
,
353 .max_keysize
= TF_MAX_KEY_SIZE
,
354 .setkey
= twofish_setkey
,
355 .encrypt
= ecb_encrypt
,
356 .decrypt
= ecb_decrypt
,
360 .cra_name
= "__cbc-twofish-avx2",
361 .cra_driver_name
= "__driver-cbc-twofish-avx2",
363 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
364 .cra_blocksize
= TF_BLOCK_SIZE
,
365 .cra_ctxsize
= sizeof(struct twofish_ctx
),
367 .cra_type
= &crypto_blkcipher_type
,
368 .cra_module
= THIS_MODULE
,
371 .min_keysize
= TF_MIN_KEY_SIZE
,
372 .max_keysize
= TF_MAX_KEY_SIZE
,
373 .setkey
= twofish_setkey
,
374 .encrypt
= cbc_encrypt
,
375 .decrypt
= cbc_decrypt
,
379 .cra_name
= "__ctr-twofish-avx2",
380 .cra_driver_name
= "__driver-ctr-twofish-avx2",
382 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
384 .cra_ctxsize
= sizeof(struct twofish_ctx
),
386 .cra_type
= &crypto_blkcipher_type
,
387 .cra_module
= THIS_MODULE
,
390 .min_keysize
= TF_MIN_KEY_SIZE
,
391 .max_keysize
= TF_MAX_KEY_SIZE
,
392 .ivsize
= TF_BLOCK_SIZE
,
393 .setkey
= twofish_setkey
,
394 .encrypt
= ctr_crypt
,
395 .decrypt
= ctr_crypt
,
399 .cra_name
= "__lrw-twofish-avx2",
400 .cra_driver_name
= "__driver-lrw-twofish-avx2",
402 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
403 .cra_blocksize
= TF_BLOCK_SIZE
,
404 .cra_ctxsize
= sizeof(struct twofish_lrw_ctx
),
406 .cra_type
= &crypto_blkcipher_type
,
407 .cra_module
= THIS_MODULE
,
408 .cra_exit
= lrw_twofish_exit_tfm
,
411 .min_keysize
= TF_MIN_KEY_SIZE
+
413 .max_keysize
= TF_MAX_KEY_SIZE
+
415 .ivsize
= TF_BLOCK_SIZE
,
416 .setkey
= lrw_twofish_setkey
,
417 .encrypt
= lrw_encrypt
,
418 .decrypt
= lrw_decrypt
,
422 .cra_name
= "__xts-twofish-avx2",
423 .cra_driver_name
= "__driver-xts-twofish-avx2",
425 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
426 .cra_blocksize
= TF_BLOCK_SIZE
,
427 .cra_ctxsize
= sizeof(struct twofish_xts_ctx
),
429 .cra_type
= &crypto_blkcipher_type
,
430 .cra_module
= THIS_MODULE
,
433 .min_keysize
= TF_MIN_KEY_SIZE
* 2,
434 .max_keysize
= TF_MAX_KEY_SIZE
* 2,
435 .ivsize
= TF_BLOCK_SIZE
,
436 .setkey
= xts_twofish_setkey
,
437 .encrypt
= xts_encrypt
,
438 .decrypt
= xts_decrypt
,
442 .cra_name
= "ecb(twofish)",
443 .cra_driver_name
= "ecb-twofish-avx2",
445 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
446 .cra_blocksize
= TF_BLOCK_SIZE
,
447 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
449 .cra_type
= &crypto_ablkcipher_type
,
450 .cra_module
= THIS_MODULE
,
451 .cra_init
= ablk_init
,
452 .cra_exit
= ablk_exit
,
455 .min_keysize
= TF_MIN_KEY_SIZE
,
456 .max_keysize
= TF_MAX_KEY_SIZE
,
457 .setkey
= ablk_set_key
,
458 .encrypt
= ablk_encrypt
,
459 .decrypt
= ablk_decrypt
,
463 .cra_name
= "cbc(twofish)",
464 .cra_driver_name
= "cbc-twofish-avx2",
466 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
467 .cra_blocksize
= TF_BLOCK_SIZE
,
468 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
470 .cra_type
= &crypto_ablkcipher_type
,
471 .cra_module
= THIS_MODULE
,
472 .cra_init
= ablk_init
,
473 .cra_exit
= ablk_exit
,
476 .min_keysize
= TF_MIN_KEY_SIZE
,
477 .max_keysize
= TF_MAX_KEY_SIZE
,
478 .ivsize
= TF_BLOCK_SIZE
,
479 .setkey
= ablk_set_key
,
480 .encrypt
= __ablk_encrypt
,
481 .decrypt
= ablk_decrypt
,
485 .cra_name
= "ctr(twofish)",
486 .cra_driver_name
= "ctr-twofish-avx2",
488 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
490 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
492 .cra_type
= &crypto_ablkcipher_type
,
493 .cra_module
= THIS_MODULE
,
494 .cra_init
= ablk_init
,
495 .cra_exit
= ablk_exit
,
498 .min_keysize
= TF_MIN_KEY_SIZE
,
499 .max_keysize
= TF_MAX_KEY_SIZE
,
500 .ivsize
= TF_BLOCK_SIZE
,
501 .setkey
= ablk_set_key
,
502 .encrypt
= ablk_encrypt
,
503 .decrypt
= ablk_encrypt
,
508 .cra_name
= "lrw(twofish)",
509 .cra_driver_name
= "lrw-twofish-avx2",
511 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
512 .cra_blocksize
= TF_BLOCK_SIZE
,
513 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
515 .cra_type
= &crypto_ablkcipher_type
,
516 .cra_module
= THIS_MODULE
,
517 .cra_init
= ablk_init
,
518 .cra_exit
= ablk_exit
,
521 .min_keysize
= TF_MIN_KEY_SIZE
+
523 .max_keysize
= TF_MAX_KEY_SIZE
+
525 .ivsize
= TF_BLOCK_SIZE
,
526 .setkey
= ablk_set_key
,
527 .encrypt
= ablk_encrypt
,
528 .decrypt
= ablk_decrypt
,
532 .cra_name
= "xts(twofish)",
533 .cra_driver_name
= "xts-twofish-avx2",
535 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
536 .cra_blocksize
= TF_BLOCK_SIZE
,
537 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
539 .cra_type
= &crypto_ablkcipher_type
,
540 .cra_module
= THIS_MODULE
,
541 .cra_init
= ablk_init
,
542 .cra_exit
= ablk_exit
,
545 .min_keysize
= TF_MIN_KEY_SIZE
* 2,
546 .max_keysize
= TF_MAX_KEY_SIZE
* 2,
547 .ivsize
= TF_BLOCK_SIZE
,
548 .setkey
= ablk_set_key
,
549 .encrypt
= ablk_encrypt
,
550 .decrypt
= ablk_decrypt
,
555 static int __init
init(void)
559 if (!cpu_has_avx2
|| !cpu_has_osxsave
) {
560 pr_info("AVX2 instructions are not detected.\n");
564 xcr0
= xgetbv(XCR_XFEATURE_ENABLED_MASK
);
565 if ((xcr0
& (XSTATE_SSE
| XSTATE_YMM
)) != (XSTATE_SSE
| XSTATE_YMM
)) {
566 pr_info("AVX2 detected but unusable.\n");
570 return crypto_register_algs(tf_algs
, ARRAY_SIZE(tf_algs
));
573 static void __exit
fini(void)
575 crypto_unregister_algs(tf_algs
, ARRAY_SIZE(tf_algs
));
581 MODULE_LICENSE("GPL");
582 MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized");
583 MODULE_ALIAS("twofish");
584 MODULE_ALIAS("twofish-asm");