2 * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
23 #include <linux/module.h>
24 #include <linux/types.h>
25 #include <linux/crypto.h>
26 #include <linux/err.h>
27 #include <crypto/algapi.h>
28 #include <crypto/blowfish.h>
29 #include <crypto/cryptd.h>
30 #include <crypto/ctr.h>
33 #include <asm/xsave.h>
34 #include <asm/crypto/blowfish.h>
35 #include <asm/crypto/ablk_helper.h>
36 #include <crypto/scatterwalk.h>
38 #define BF_AVX2_PARALLEL_BLOCKS 32
40 /* 32-way AVX2 parallel cipher functions */
41 asmlinkage
void blowfish_ecb_enc_32way(struct bf_ctx
*ctx
, u8
*dst
,
43 asmlinkage
void blowfish_ecb_dec_32way(struct bf_ctx
*ctx
, u8
*dst
,
45 asmlinkage
void blowfish_cbc_dec_32way(struct bf_ctx
*ctx
, u8
*dst
,
47 asmlinkage
void blowfish_ctr_32way(struct bf_ctx
*ctx
, u8
*dst
, const u8
*src
,
50 static inline bool bf_fpu_begin(bool fpu_enabled
, unsigned int nbytes
)
55 /* FPU is only used when chunk to be processed is large enough, so
56 * do not enable FPU until it is necessary.
58 if (nbytes
< BF_BLOCK_SIZE
* BF_AVX2_PARALLEL_BLOCKS
)
65 static inline void bf_fpu_end(bool fpu_enabled
)
71 static int ecb_crypt(struct blkcipher_desc
*desc
, struct blkcipher_walk
*walk
,
74 bool fpu_enabled
= false;
75 struct bf_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
76 const unsigned int bsize
= BF_BLOCK_SIZE
;
80 err
= blkcipher_walk_virt(desc
, walk
);
81 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
83 while ((nbytes
= walk
->nbytes
)) {
84 u8
*wsrc
= walk
->src
.virt
.addr
;
85 u8
*wdst
= walk
->dst
.virt
.addr
;
87 fpu_enabled
= bf_fpu_begin(fpu_enabled
, nbytes
);
89 /* Process multi-block AVX2 batch */
90 if (nbytes
>= bsize
* BF_AVX2_PARALLEL_BLOCKS
) {
93 blowfish_ecb_enc_32way(ctx
, wdst
, wsrc
);
95 blowfish_ecb_dec_32way(ctx
, wdst
, wsrc
);
97 wsrc
+= bsize
* BF_AVX2_PARALLEL_BLOCKS
;
98 wdst
+= bsize
* BF_AVX2_PARALLEL_BLOCKS
;
99 nbytes
-= bsize
* BF_AVX2_PARALLEL_BLOCKS
;
100 } while (nbytes
>= bsize
* BF_AVX2_PARALLEL_BLOCKS
);
106 /* Process multi-block batch */
107 if (nbytes
>= bsize
* BF_PARALLEL_BLOCKS
) {
110 blowfish_enc_blk_4way(ctx
, wdst
, wsrc
);
112 blowfish_dec_blk_4way(ctx
, wdst
, wsrc
);
114 wsrc
+= bsize
* BF_PARALLEL_BLOCKS
;
115 wdst
+= bsize
* BF_PARALLEL_BLOCKS
;
116 nbytes
-= bsize
* BF_PARALLEL_BLOCKS
;
117 } while (nbytes
>= bsize
* BF_PARALLEL_BLOCKS
);
123 /* Handle leftovers */
126 blowfish_enc_blk(ctx
, wdst
, wsrc
);
128 blowfish_dec_blk(ctx
, wdst
, wsrc
);
133 } while (nbytes
>= bsize
);
136 err
= blkcipher_walk_done(desc
, walk
, nbytes
);
139 bf_fpu_end(fpu_enabled
);
143 static int ecb_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
144 struct scatterlist
*src
, unsigned int nbytes
)
146 struct blkcipher_walk walk
;
148 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
149 return ecb_crypt(desc
, &walk
, true);
152 static int ecb_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
153 struct scatterlist
*src
, unsigned int nbytes
)
155 struct blkcipher_walk walk
;
157 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
158 return ecb_crypt(desc
, &walk
, false);
161 static unsigned int __cbc_encrypt(struct blkcipher_desc
*desc
,
162 struct blkcipher_walk
*walk
)
164 struct bf_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
165 unsigned int bsize
= BF_BLOCK_SIZE
;
166 unsigned int nbytes
= walk
->nbytes
;
167 u64
*src
= (u64
*)walk
->src
.virt
.addr
;
168 u64
*dst
= (u64
*)walk
->dst
.virt
.addr
;
169 u64
*iv
= (u64
*)walk
->iv
;
173 blowfish_enc_blk(ctx
, (u8
*)dst
, (u8
*)dst
);
179 } while (nbytes
>= bsize
);
181 *(u64
*)walk
->iv
= *iv
;
185 static int cbc_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
186 struct scatterlist
*src
, unsigned int nbytes
)
188 struct blkcipher_walk walk
;
191 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
192 err
= blkcipher_walk_virt(desc
, &walk
);
194 while ((nbytes
= walk
.nbytes
)) {
195 nbytes
= __cbc_encrypt(desc
, &walk
);
196 err
= blkcipher_walk_done(desc
, &walk
, nbytes
);
202 static unsigned int __cbc_decrypt(struct blkcipher_desc
*desc
,
203 struct blkcipher_walk
*walk
)
205 struct bf_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
206 const unsigned int bsize
= BF_BLOCK_SIZE
;
207 unsigned int nbytes
= walk
->nbytes
;
208 u64
*src
= (u64
*)walk
->src
.virt
.addr
;
209 u64
*dst
= (u64
*)walk
->dst
.virt
.addr
;
213 /* Start of the last block. */
214 src
+= nbytes
/ bsize
- 1;
215 dst
+= nbytes
/ bsize
- 1;
219 /* Process multi-block AVX2 batch */
220 if (nbytes
>= bsize
* BF_AVX2_PARALLEL_BLOCKS
) {
222 nbytes
-= bsize
* (BF_AVX2_PARALLEL_BLOCKS
- 1);
223 src
-= BF_AVX2_PARALLEL_BLOCKS
- 1;
224 dst
-= BF_AVX2_PARALLEL_BLOCKS
- 1;
226 blowfish_cbc_dec_32way(ctx
, (u8
*)dst
, (u8
*)src
);
235 } while (nbytes
>= bsize
* BF_AVX2_PARALLEL_BLOCKS
);
241 /* Process multi-block batch */
242 if (nbytes
>= bsize
* BF_PARALLEL_BLOCKS
) {
243 u64 ivs
[BF_PARALLEL_BLOCKS
- 1];
246 nbytes
-= bsize
* (BF_PARALLEL_BLOCKS
- 1);
247 src
-= BF_PARALLEL_BLOCKS
- 1;
248 dst
-= BF_PARALLEL_BLOCKS
- 1;
250 for (i
= 0; i
< BF_PARALLEL_BLOCKS
- 1; i
++)
253 blowfish_dec_blk_4way(ctx
, (u8
*)dst
, (u8
*)src
);
255 for (i
= 0; i
< BF_PARALLEL_BLOCKS
- 1; i
++)
256 dst
[i
+ 1] ^= ivs
[i
];
265 } while (nbytes
>= bsize
* BF_PARALLEL_BLOCKS
);
271 /* Handle leftovers */
273 blowfish_dec_blk(ctx
, (u8
*)dst
, (u8
*)src
);
285 *dst
^= *(u64
*)walk
->iv
;
286 *(u64
*)walk
->iv
= last_iv
;
291 static int cbc_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
292 struct scatterlist
*src
, unsigned int nbytes
)
294 bool fpu_enabled
= false;
295 struct blkcipher_walk walk
;
298 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
299 err
= blkcipher_walk_virt(desc
, &walk
);
300 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
302 while ((nbytes
= walk
.nbytes
)) {
303 fpu_enabled
= bf_fpu_begin(fpu_enabled
, nbytes
);
304 nbytes
= __cbc_decrypt(desc
, &walk
);
305 err
= blkcipher_walk_done(desc
, &walk
, nbytes
);
308 bf_fpu_end(fpu_enabled
);
312 static void ctr_crypt_final(struct blkcipher_desc
*desc
,
313 struct blkcipher_walk
*walk
)
315 struct bf_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
316 u8
*ctrblk
= walk
->iv
;
317 u8 keystream
[BF_BLOCK_SIZE
];
318 u8
*src
= walk
->src
.virt
.addr
;
319 u8
*dst
= walk
->dst
.virt
.addr
;
320 unsigned int nbytes
= walk
->nbytes
;
322 blowfish_enc_blk(ctx
, keystream
, ctrblk
);
323 crypto_xor(keystream
, src
, nbytes
);
324 memcpy(dst
, keystream
, nbytes
);
326 crypto_inc(ctrblk
, BF_BLOCK_SIZE
);
329 static unsigned int __ctr_crypt(struct blkcipher_desc
*desc
,
330 struct blkcipher_walk
*walk
)
332 struct bf_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
333 unsigned int bsize
= BF_BLOCK_SIZE
;
334 unsigned int nbytes
= walk
->nbytes
;
335 u64
*src
= (u64
*)walk
->src
.virt
.addr
;
336 u64
*dst
= (u64
*)walk
->dst
.virt
.addr
;
339 /* Process multi-block AVX2 batch */
340 if (nbytes
>= bsize
* BF_AVX2_PARALLEL_BLOCKS
) {
342 blowfish_ctr_32way(ctx
, (u8
*)dst
, (u8
*)src
,
345 src
+= BF_AVX2_PARALLEL_BLOCKS
;
346 dst
+= BF_AVX2_PARALLEL_BLOCKS
;
347 nbytes
-= bsize
* BF_AVX2_PARALLEL_BLOCKS
;
348 } while (nbytes
>= bsize
* BF_AVX2_PARALLEL_BLOCKS
);
354 /* Process four block batch */
355 if (nbytes
>= bsize
* BF_PARALLEL_BLOCKS
) {
356 __be64 ctrblocks
[BF_PARALLEL_BLOCKS
];
357 u64 ctrblk
= be64_to_cpu(*(__be64
*)walk
->iv
);
360 /* create ctrblks for parallel encrypt */
361 for (i
= 0; i
< BF_PARALLEL_BLOCKS
; i
++) {
365 ctrblocks
[i
] = cpu_to_be64(ctrblk
++);
368 blowfish_enc_blk_xor_4way(ctx
, (u8
*)dst
,
371 src
+= BF_PARALLEL_BLOCKS
;
372 dst
+= BF_PARALLEL_BLOCKS
;
373 nbytes
-= bsize
* BF_PARALLEL_BLOCKS
;
374 } while (nbytes
>= bsize
* BF_PARALLEL_BLOCKS
);
376 *(__be64
*)walk
->iv
= cpu_to_be64(ctrblk
);
382 /* Handle leftovers */
389 ctrblk
= *(u64
*)walk
->iv
;
390 be64_add_cpu((__be64
*)walk
->iv
, 1);
392 blowfish_enc_blk_xor(ctx
, (u8
*)dst
, (u8
*)&ctrblk
);
396 } while ((nbytes
-= bsize
) >= bsize
);
402 static int ctr_crypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
403 struct scatterlist
*src
, unsigned int nbytes
)
405 bool fpu_enabled
= false;
406 struct blkcipher_walk walk
;
409 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
410 err
= blkcipher_walk_virt_block(desc
, &walk
, BF_BLOCK_SIZE
);
411 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
413 while ((nbytes
= walk
.nbytes
) >= BF_BLOCK_SIZE
) {
414 fpu_enabled
= bf_fpu_begin(fpu_enabled
, nbytes
);
415 nbytes
= __ctr_crypt(desc
, &walk
);
416 err
= blkcipher_walk_done(desc
, &walk
, nbytes
);
419 bf_fpu_end(fpu_enabled
);
422 ctr_crypt_final(desc
, &walk
);
423 err
= blkcipher_walk_done(desc
, &walk
, 0);
429 static struct crypto_alg bf_algs
[6] = { {
430 .cra_name
= "__ecb-blowfish-avx2",
431 .cra_driver_name
= "__driver-ecb-blowfish-avx2",
433 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
434 .cra_blocksize
= BF_BLOCK_SIZE
,
435 .cra_ctxsize
= sizeof(struct bf_ctx
),
437 .cra_type
= &crypto_blkcipher_type
,
438 .cra_module
= THIS_MODULE
,
441 .min_keysize
= BF_MIN_KEY_SIZE
,
442 .max_keysize
= BF_MAX_KEY_SIZE
,
443 .setkey
= blowfish_setkey
,
444 .encrypt
= ecb_encrypt
,
445 .decrypt
= ecb_decrypt
,
449 .cra_name
= "__cbc-blowfish-avx2",
450 .cra_driver_name
= "__driver-cbc-blowfish-avx2",
452 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
453 .cra_blocksize
= BF_BLOCK_SIZE
,
454 .cra_ctxsize
= sizeof(struct bf_ctx
),
456 .cra_type
= &crypto_blkcipher_type
,
457 .cra_module
= THIS_MODULE
,
460 .min_keysize
= BF_MIN_KEY_SIZE
,
461 .max_keysize
= BF_MAX_KEY_SIZE
,
462 .setkey
= blowfish_setkey
,
463 .encrypt
= cbc_encrypt
,
464 .decrypt
= cbc_decrypt
,
468 .cra_name
= "__ctr-blowfish-avx2",
469 .cra_driver_name
= "__driver-ctr-blowfish-avx2",
471 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
473 .cra_ctxsize
= sizeof(struct bf_ctx
),
475 .cra_type
= &crypto_blkcipher_type
,
476 .cra_module
= THIS_MODULE
,
479 .min_keysize
= BF_MIN_KEY_SIZE
,
480 .max_keysize
= BF_MAX_KEY_SIZE
,
481 .ivsize
= BF_BLOCK_SIZE
,
482 .setkey
= blowfish_setkey
,
483 .encrypt
= ctr_crypt
,
484 .decrypt
= ctr_crypt
,
488 .cra_name
= "ecb(blowfish)",
489 .cra_driver_name
= "ecb-blowfish-avx2",
491 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
492 .cra_blocksize
= BF_BLOCK_SIZE
,
493 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
495 .cra_type
= &crypto_ablkcipher_type
,
496 .cra_module
= THIS_MODULE
,
497 .cra_init
= ablk_init
,
498 .cra_exit
= ablk_exit
,
501 .min_keysize
= BF_MIN_KEY_SIZE
,
502 .max_keysize
= BF_MAX_KEY_SIZE
,
503 .setkey
= ablk_set_key
,
504 .encrypt
= ablk_encrypt
,
505 .decrypt
= ablk_decrypt
,
509 .cra_name
= "cbc(blowfish)",
510 .cra_driver_name
= "cbc-blowfish-avx2",
512 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
513 .cra_blocksize
= BF_BLOCK_SIZE
,
514 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
516 .cra_type
= &crypto_ablkcipher_type
,
517 .cra_module
= THIS_MODULE
,
518 .cra_init
= ablk_init
,
519 .cra_exit
= ablk_exit
,
522 .min_keysize
= BF_MIN_KEY_SIZE
,
523 .max_keysize
= BF_MAX_KEY_SIZE
,
524 .ivsize
= BF_BLOCK_SIZE
,
525 .setkey
= ablk_set_key
,
526 .encrypt
= __ablk_encrypt
,
527 .decrypt
= ablk_decrypt
,
531 .cra_name
= "ctr(blowfish)",
532 .cra_driver_name
= "ctr-blowfish-avx2",
534 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
536 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
538 .cra_type
= &crypto_ablkcipher_type
,
539 .cra_module
= THIS_MODULE
,
540 .cra_init
= ablk_init
,
541 .cra_exit
= ablk_exit
,
544 .min_keysize
= BF_MIN_KEY_SIZE
,
545 .max_keysize
= BF_MAX_KEY_SIZE
,
546 .ivsize
= BF_BLOCK_SIZE
,
547 .setkey
= ablk_set_key
,
548 .encrypt
= ablk_encrypt
,
549 .decrypt
= ablk_encrypt
,
556 static int __init
init(void)
560 if (!cpu_has_avx2
|| !cpu_has_osxsave
) {
561 pr_info("AVX2 instructions are not detected.\n");
565 xcr0
= xgetbv(XCR_XFEATURE_ENABLED_MASK
);
566 if ((xcr0
& (XSTATE_SSE
| XSTATE_YMM
)) != (XSTATE_SSE
| XSTATE_YMM
)) {
567 pr_info("AVX detected but unusable.\n");
571 return crypto_register_algs(bf_algs
, ARRAY_SIZE(bf_algs
));
574 static void __exit
fini(void)
576 crypto_unregister_algs(bf_algs
, ARRAY_SIZE(bf_algs
));
582 MODULE_LICENSE("GPL");
583 MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized");
584 MODULE_ALIAS("blowfish");
585 MODULE_ALIAS("blowfish-asm");