2 * Glue Code for x86_64/AVX2 assembler optimized version of Serpent
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/crypto.h>
16 #include <linux/err.h>
17 #include <crypto/ablk_helper.h>
18 #include <crypto/algapi.h>
19 #include <crypto/ctr.h>
20 #include <crypto/lrw.h>
21 #include <crypto/xts.h>
22 #include <crypto/serpent.h>
24 #include <asm/xsave.h>
25 #include <asm/crypto/serpent-avx.h>
26 #include <asm/crypto/glue_helper.h>
28 #define SERPENT_AVX2_PARALLEL_BLOCKS 16
30 /* 16-way AVX2 parallel cipher functions */
31 asmlinkage
void serpent_ecb_enc_16way(struct serpent_ctx
*ctx
, u8
*dst
,
33 asmlinkage
void serpent_ecb_dec_16way(struct serpent_ctx
*ctx
, u8
*dst
,
35 asmlinkage
void serpent_cbc_dec_16way(void *ctx
, u128
*dst
, const u128
*src
);
37 asmlinkage
void serpent_ctr_16way(void *ctx
, u128
*dst
, const u128
*src
,
39 asmlinkage
void serpent_xts_enc_16way(struct serpent_ctx
*ctx
, u8
*dst
,
40 const u8
*src
, le128
*iv
);
41 asmlinkage
void serpent_xts_dec_16way(struct serpent_ctx
*ctx
, u8
*dst
,
42 const u8
*src
, le128
*iv
);
44 static const struct common_glue_ctx serpent_enc
= {
46 .fpu_blocks_limit
= 8,
50 .fn_u
= { .ecb
= GLUE_FUNC_CAST(serpent_ecb_enc_16way
) }
53 .fn_u
= { .ecb
= GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx
) }
56 .fn_u
= { .ecb
= GLUE_FUNC_CAST(__serpent_encrypt
) }
60 static const struct common_glue_ctx serpent_ctr
= {
62 .fpu_blocks_limit
= 8,
66 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(serpent_ctr_16way
) }
69 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx
) }
72 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr
) }
76 static const struct common_glue_ctx serpent_enc_xts
= {
78 .fpu_blocks_limit
= 8,
82 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way
) }
85 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx
) }
88 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_enc
) }
92 static const struct common_glue_ctx serpent_dec
= {
94 .fpu_blocks_limit
= 8,
98 .fn_u
= { .ecb
= GLUE_FUNC_CAST(serpent_ecb_dec_16way
) }
101 .fn_u
= { .ecb
= GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx
) }
104 .fn_u
= { .ecb
= GLUE_FUNC_CAST(__serpent_decrypt
) }
108 static const struct common_glue_ctx serpent_dec_cbc
= {
110 .fpu_blocks_limit
= 8,
114 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way
) }
117 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx
) }
120 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(__serpent_decrypt
) }
124 static const struct common_glue_ctx serpent_dec_xts
= {
126 .fpu_blocks_limit
= 8,
130 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way
) }
133 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx
) }
136 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_dec
) }
140 static int ecb_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
141 struct scatterlist
*src
, unsigned int nbytes
)
143 return glue_ecb_crypt_128bit(&serpent_enc
, desc
, dst
, src
, nbytes
);
146 static int ecb_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
147 struct scatterlist
*src
, unsigned int nbytes
)
149 return glue_ecb_crypt_128bit(&serpent_dec
, desc
, dst
, src
, nbytes
);
152 static int cbc_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
153 struct scatterlist
*src
, unsigned int nbytes
)
155 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt
), desc
,
159 static int cbc_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
160 struct scatterlist
*src
, unsigned int nbytes
)
162 return glue_cbc_decrypt_128bit(&serpent_dec_cbc
, desc
, dst
, src
,
166 static int ctr_crypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
167 struct scatterlist
*src
, unsigned int nbytes
)
169 return glue_ctr_crypt_128bit(&serpent_ctr
, desc
, dst
, src
, nbytes
);
172 static inline bool serpent_fpu_begin(bool fpu_enabled
, unsigned int nbytes
)
174 /* since reusing AVX functions, starts using FPU at 8 parallel blocks */
175 return glue_fpu_begin(SERPENT_BLOCK_SIZE
, 8, NULL
, fpu_enabled
, nbytes
);
178 static inline void serpent_fpu_end(bool fpu_enabled
)
180 glue_fpu_end(fpu_enabled
);
184 struct serpent_ctx
*ctx
;
188 static void encrypt_callback(void *priv
, u8
*srcdst
, unsigned int nbytes
)
190 const unsigned int bsize
= SERPENT_BLOCK_SIZE
;
191 struct crypt_priv
*ctx
= priv
;
194 ctx
->fpu_enabled
= serpent_fpu_begin(ctx
->fpu_enabled
, nbytes
);
196 if (nbytes
>= SERPENT_AVX2_PARALLEL_BLOCKS
* bsize
) {
197 serpent_ecb_enc_16way(ctx
->ctx
, srcdst
, srcdst
);
198 srcdst
+= bsize
* SERPENT_AVX2_PARALLEL_BLOCKS
;
199 nbytes
-= bsize
* SERPENT_AVX2_PARALLEL_BLOCKS
;
202 while (nbytes
>= SERPENT_PARALLEL_BLOCKS
* bsize
) {
203 serpent_ecb_enc_8way_avx(ctx
->ctx
, srcdst
, srcdst
);
204 srcdst
+= bsize
* SERPENT_PARALLEL_BLOCKS
;
205 nbytes
-= bsize
* SERPENT_PARALLEL_BLOCKS
;
208 for (i
= 0; i
< nbytes
/ bsize
; i
++, srcdst
+= bsize
)
209 __serpent_encrypt(ctx
->ctx
, srcdst
, srcdst
);
212 static void decrypt_callback(void *priv
, u8
*srcdst
, unsigned int nbytes
)
214 const unsigned int bsize
= SERPENT_BLOCK_SIZE
;
215 struct crypt_priv
*ctx
= priv
;
218 ctx
->fpu_enabled
= serpent_fpu_begin(ctx
->fpu_enabled
, nbytes
);
220 if (nbytes
>= SERPENT_AVX2_PARALLEL_BLOCKS
* bsize
) {
221 serpent_ecb_dec_16way(ctx
->ctx
, srcdst
, srcdst
);
222 srcdst
+= bsize
* SERPENT_AVX2_PARALLEL_BLOCKS
;
223 nbytes
-= bsize
* SERPENT_AVX2_PARALLEL_BLOCKS
;
226 while (nbytes
>= SERPENT_PARALLEL_BLOCKS
* bsize
) {
227 serpent_ecb_dec_8way_avx(ctx
->ctx
, srcdst
, srcdst
);
228 srcdst
+= bsize
* SERPENT_PARALLEL_BLOCKS
;
229 nbytes
-= bsize
* SERPENT_PARALLEL_BLOCKS
;
232 for (i
= 0; i
< nbytes
/ bsize
; i
++, srcdst
+= bsize
)
233 __serpent_decrypt(ctx
->ctx
, srcdst
, srcdst
);
236 static int lrw_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
237 struct scatterlist
*src
, unsigned int nbytes
)
239 struct serpent_lrw_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
240 be128 buf
[SERPENT_AVX2_PARALLEL_BLOCKS
];
241 struct crypt_priv crypt_ctx
= {
242 .ctx
= &ctx
->serpent_ctx
,
243 .fpu_enabled
= false,
245 struct lrw_crypt_req req
= {
247 .tbuflen
= sizeof(buf
),
249 .table_ctx
= &ctx
->lrw_table
,
250 .crypt_ctx
= &crypt_ctx
,
251 .crypt_fn
= encrypt_callback
,
255 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
256 ret
= lrw_crypt(desc
, dst
, src
, nbytes
, &req
);
257 serpent_fpu_end(crypt_ctx
.fpu_enabled
);
262 static int lrw_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
263 struct scatterlist
*src
, unsigned int nbytes
)
265 struct serpent_lrw_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
266 be128 buf
[SERPENT_AVX2_PARALLEL_BLOCKS
];
267 struct crypt_priv crypt_ctx
= {
268 .ctx
= &ctx
->serpent_ctx
,
269 .fpu_enabled
= false,
271 struct lrw_crypt_req req
= {
273 .tbuflen
= sizeof(buf
),
275 .table_ctx
= &ctx
->lrw_table
,
276 .crypt_ctx
= &crypt_ctx
,
277 .crypt_fn
= decrypt_callback
,
281 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
282 ret
= lrw_crypt(desc
, dst
, src
, nbytes
, &req
);
283 serpent_fpu_end(crypt_ctx
.fpu_enabled
);
288 static int xts_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
289 struct scatterlist
*src
, unsigned int nbytes
)
291 struct serpent_xts_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
293 return glue_xts_crypt_128bit(&serpent_enc_xts
, desc
, dst
, src
, nbytes
,
294 XTS_TWEAK_CAST(__serpent_encrypt
),
295 &ctx
->tweak_ctx
, &ctx
->crypt_ctx
);
298 static int xts_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
299 struct scatterlist
*src
, unsigned int nbytes
)
301 struct serpent_xts_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
303 return glue_xts_crypt_128bit(&serpent_dec_xts
, desc
, dst
, src
, nbytes
,
304 XTS_TWEAK_CAST(__serpent_encrypt
),
305 &ctx
->tweak_ctx
, &ctx
->crypt_ctx
);
308 static struct crypto_alg srp_algs
[10] = { {
309 .cra_name
= "__ecb-serpent-avx2",
310 .cra_driver_name
= "__driver-ecb-serpent-avx2",
312 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
|
314 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
315 .cra_ctxsize
= sizeof(struct serpent_ctx
),
317 .cra_type
= &crypto_blkcipher_type
,
318 .cra_module
= THIS_MODULE
,
319 .cra_list
= LIST_HEAD_INIT(srp_algs
[0].cra_list
),
322 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
323 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
324 .setkey
= serpent_setkey
,
325 .encrypt
= ecb_encrypt
,
326 .decrypt
= ecb_decrypt
,
330 .cra_name
= "__cbc-serpent-avx2",
331 .cra_driver_name
= "__driver-cbc-serpent-avx2",
333 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
|
335 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
336 .cra_ctxsize
= sizeof(struct serpent_ctx
),
338 .cra_type
= &crypto_blkcipher_type
,
339 .cra_module
= THIS_MODULE
,
340 .cra_list
= LIST_HEAD_INIT(srp_algs
[1].cra_list
),
343 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
344 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
345 .setkey
= serpent_setkey
,
346 .encrypt
= cbc_encrypt
,
347 .decrypt
= cbc_decrypt
,
351 .cra_name
= "__ctr-serpent-avx2",
352 .cra_driver_name
= "__driver-ctr-serpent-avx2",
354 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
|
357 .cra_ctxsize
= sizeof(struct serpent_ctx
),
359 .cra_type
= &crypto_blkcipher_type
,
360 .cra_module
= THIS_MODULE
,
361 .cra_list
= LIST_HEAD_INIT(srp_algs
[2].cra_list
),
364 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
365 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
366 .ivsize
= SERPENT_BLOCK_SIZE
,
367 .setkey
= serpent_setkey
,
368 .encrypt
= ctr_crypt
,
369 .decrypt
= ctr_crypt
,
373 .cra_name
= "__lrw-serpent-avx2",
374 .cra_driver_name
= "__driver-lrw-serpent-avx2",
376 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
|
378 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
379 .cra_ctxsize
= sizeof(struct serpent_lrw_ctx
),
381 .cra_type
= &crypto_blkcipher_type
,
382 .cra_module
= THIS_MODULE
,
383 .cra_list
= LIST_HEAD_INIT(srp_algs
[3].cra_list
),
384 .cra_exit
= lrw_serpent_exit_tfm
,
387 .min_keysize
= SERPENT_MIN_KEY_SIZE
+
389 .max_keysize
= SERPENT_MAX_KEY_SIZE
+
391 .ivsize
= SERPENT_BLOCK_SIZE
,
392 .setkey
= lrw_serpent_setkey
,
393 .encrypt
= lrw_encrypt
,
394 .decrypt
= lrw_decrypt
,
398 .cra_name
= "__xts-serpent-avx2",
399 .cra_driver_name
= "__driver-xts-serpent-avx2",
401 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
|
403 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
404 .cra_ctxsize
= sizeof(struct serpent_xts_ctx
),
406 .cra_type
= &crypto_blkcipher_type
,
407 .cra_module
= THIS_MODULE
,
408 .cra_list
= LIST_HEAD_INIT(srp_algs
[4].cra_list
),
411 .min_keysize
= SERPENT_MIN_KEY_SIZE
* 2,
412 .max_keysize
= SERPENT_MAX_KEY_SIZE
* 2,
413 .ivsize
= SERPENT_BLOCK_SIZE
,
414 .setkey
= xts_serpent_setkey
,
415 .encrypt
= xts_encrypt
,
416 .decrypt
= xts_decrypt
,
420 .cra_name
= "ecb(serpent)",
421 .cra_driver_name
= "ecb-serpent-avx2",
423 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
424 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
425 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
427 .cra_type
= &crypto_ablkcipher_type
,
428 .cra_module
= THIS_MODULE
,
429 .cra_list
= LIST_HEAD_INIT(srp_algs
[5].cra_list
),
430 .cra_init
= ablk_init
,
431 .cra_exit
= ablk_exit
,
434 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
435 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
436 .setkey
= ablk_set_key
,
437 .encrypt
= ablk_encrypt
,
438 .decrypt
= ablk_decrypt
,
442 .cra_name
= "cbc(serpent)",
443 .cra_driver_name
= "cbc-serpent-avx2",
445 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
446 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
447 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
449 .cra_type
= &crypto_ablkcipher_type
,
450 .cra_module
= THIS_MODULE
,
451 .cra_list
= LIST_HEAD_INIT(srp_algs
[6].cra_list
),
452 .cra_init
= ablk_init
,
453 .cra_exit
= ablk_exit
,
456 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
457 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
458 .ivsize
= SERPENT_BLOCK_SIZE
,
459 .setkey
= ablk_set_key
,
460 .encrypt
= __ablk_encrypt
,
461 .decrypt
= ablk_decrypt
,
465 .cra_name
= "ctr(serpent)",
466 .cra_driver_name
= "ctr-serpent-avx2",
468 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
470 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
472 .cra_type
= &crypto_ablkcipher_type
,
473 .cra_module
= THIS_MODULE
,
474 .cra_list
= LIST_HEAD_INIT(srp_algs
[7].cra_list
),
475 .cra_init
= ablk_init
,
476 .cra_exit
= ablk_exit
,
479 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
480 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
481 .ivsize
= SERPENT_BLOCK_SIZE
,
482 .setkey
= ablk_set_key
,
483 .encrypt
= ablk_encrypt
,
484 .decrypt
= ablk_encrypt
,
489 .cra_name
= "lrw(serpent)",
490 .cra_driver_name
= "lrw-serpent-avx2",
492 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
493 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
494 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
496 .cra_type
= &crypto_ablkcipher_type
,
497 .cra_module
= THIS_MODULE
,
498 .cra_list
= LIST_HEAD_INIT(srp_algs
[8].cra_list
),
499 .cra_init
= ablk_init
,
500 .cra_exit
= ablk_exit
,
503 .min_keysize
= SERPENT_MIN_KEY_SIZE
+
505 .max_keysize
= SERPENT_MAX_KEY_SIZE
+
507 .ivsize
= SERPENT_BLOCK_SIZE
,
508 .setkey
= ablk_set_key
,
509 .encrypt
= ablk_encrypt
,
510 .decrypt
= ablk_decrypt
,
514 .cra_name
= "xts(serpent)",
515 .cra_driver_name
= "xts-serpent-avx2",
517 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
518 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
519 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
521 .cra_type
= &crypto_ablkcipher_type
,
522 .cra_module
= THIS_MODULE
,
523 .cra_list
= LIST_HEAD_INIT(srp_algs
[9].cra_list
),
524 .cra_init
= ablk_init
,
525 .cra_exit
= ablk_exit
,
528 .min_keysize
= SERPENT_MIN_KEY_SIZE
* 2,
529 .max_keysize
= SERPENT_MAX_KEY_SIZE
* 2,
530 .ivsize
= SERPENT_BLOCK_SIZE
,
531 .setkey
= ablk_set_key
,
532 .encrypt
= ablk_encrypt
,
533 .decrypt
= ablk_decrypt
,
538 static int __init
init(void)
542 if (!cpu_has_avx2
|| !cpu_has_osxsave
) {
543 pr_info("AVX2 instructions are not detected.\n");
547 xcr0
= xgetbv(XCR_XFEATURE_ENABLED_MASK
);
548 if ((xcr0
& (XSTATE_SSE
| XSTATE_YMM
)) != (XSTATE_SSE
| XSTATE_YMM
)) {
549 pr_info("AVX detected but unusable.\n");
553 return crypto_register_algs(srp_algs
, ARRAY_SIZE(srp_algs
));
556 static void __exit
fini(void)
558 crypto_unregister_algs(srp_algs
, ARRAY_SIZE(srp_algs
));
564 MODULE_LICENSE("GPL");
565 MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized");
566 MODULE_ALIAS_CRYPTO("serpent");
567 MODULE_ALIAS_CRYPTO("serpent-asm");