2 * Glue Code for x86_64/AVX2 assembler optimized version of Serpent
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/crypto.h>
16 #include <linux/err.h>
17 #include <crypto/ablk_helper.h>
18 #include <crypto/algapi.h>
19 #include <crypto/ctr.h>
20 #include <crypto/lrw.h>
21 #include <crypto/xts.h>
22 #include <crypto/serpent.h>
23 #include <asm/fpu/api.h>
24 #include <asm/crypto/serpent-avx.h>
25 #include <asm/crypto/glue_helper.h>
27 #define SERPENT_AVX2_PARALLEL_BLOCKS 16
29 /* 16-way AVX2 parallel cipher functions */
30 asmlinkage
void serpent_ecb_enc_16way(struct serpent_ctx
*ctx
, u8
*dst
,
32 asmlinkage
void serpent_ecb_dec_16way(struct serpent_ctx
*ctx
, u8
*dst
,
34 asmlinkage
void serpent_cbc_dec_16way(void *ctx
, u128
*dst
, const u128
*src
);
36 asmlinkage
void serpent_ctr_16way(void *ctx
, u128
*dst
, const u128
*src
,
38 asmlinkage
void serpent_xts_enc_16way(struct serpent_ctx
*ctx
, u8
*dst
,
39 const u8
*src
, le128
*iv
);
40 asmlinkage
void serpent_xts_dec_16way(struct serpent_ctx
*ctx
, u8
*dst
,
41 const u8
*src
, le128
*iv
);
43 static const struct common_glue_ctx serpent_enc
= {
45 .fpu_blocks_limit
= 8,
49 .fn_u
= { .ecb
= GLUE_FUNC_CAST(serpent_ecb_enc_16way
) }
52 .fn_u
= { .ecb
= GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx
) }
55 .fn_u
= { .ecb
= GLUE_FUNC_CAST(__serpent_encrypt
) }
59 static const struct common_glue_ctx serpent_ctr
= {
61 .fpu_blocks_limit
= 8,
65 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(serpent_ctr_16way
) }
68 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx
) }
71 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr
) }
75 static const struct common_glue_ctx serpent_enc_xts
= {
77 .fpu_blocks_limit
= 8,
81 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way
) }
84 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx
) }
87 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_enc
) }
91 static const struct common_glue_ctx serpent_dec
= {
93 .fpu_blocks_limit
= 8,
97 .fn_u
= { .ecb
= GLUE_FUNC_CAST(serpent_ecb_dec_16way
) }
100 .fn_u
= { .ecb
= GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx
) }
103 .fn_u
= { .ecb
= GLUE_FUNC_CAST(__serpent_decrypt
) }
107 static const struct common_glue_ctx serpent_dec_cbc
= {
109 .fpu_blocks_limit
= 8,
113 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way
) }
116 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx
) }
119 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(__serpent_decrypt
) }
123 static const struct common_glue_ctx serpent_dec_xts
= {
125 .fpu_blocks_limit
= 8,
129 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way
) }
132 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx
) }
135 .fn_u
= { .xts
= GLUE_XTS_FUNC_CAST(serpent_xts_dec
) }
139 static int ecb_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
140 struct scatterlist
*src
, unsigned int nbytes
)
142 return glue_ecb_crypt_128bit(&serpent_enc
, desc
, dst
, src
, nbytes
);
145 static int ecb_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
146 struct scatterlist
*src
, unsigned int nbytes
)
148 return glue_ecb_crypt_128bit(&serpent_dec
, desc
, dst
, src
, nbytes
);
151 static int cbc_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
152 struct scatterlist
*src
, unsigned int nbytes
)
154 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt
), desc
,
158 static int cbc_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
159 struct scatterlist
*src
, unsigned int nbytes
)
161 return glue_cbc_decrypt_128bit(&serpent_dec_cbc
, desc
, dst
, src
,
165 static int ctr_crypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
166 struct scatterlist
*src
, unsigned int nbytes
)
168 return glue_ctr_crypt_128bit(&serpent_ctr
, desc
, dst
, src
, nbytes
);
171 static inline bool serpent_fpu_begin(bool fpu_enabled
, unsigned int nbytes
)
173 /* since reusing AVX functions, starts using FPU at 8 parallel blocks */
174 return glue_fpu_begin(SERPENT_BLOCK_SIZE
, 8, NULL
, fpu_enabled
, nbytes
);
177 static inline void serpent_fpu_end(bool fpu_enabled
)
179 glue_fpu_end(fpu_enabled
);
183 struct serpent_ctx
*ctx
;
187 static void encrypt_callback(void *priv
, u8
*srcdst
, unsigned int nbytes
)
189 const unsigned int bsize
= SERPENT_BLOCK_SIZE
;
190 struct crypt_priv
*ctx
= priv
;
193 ctx
->fpu_enabled
= serpent_fpu_begin(ctx
->fpu_enabled
, nbytes
);
195 if (nbytes
>= SERPENT_AVX2_PARALLEL_BLOCKS
* bsize
) {
196 serpent_ecb_enc_16way(ctx
->ctx
, srcdst
, srcdst
);
197 srcdst
+= bsize
* SERPENT_AVX2_PARALLEL_BLOCKS
;
198 nbytes
-= bsize
* SERPENT_AVX2_PARALLEL_BLOCKS
;
201 while (nbytes
>= SERPENT_PARALLEL_BLOCKS
* bsize
) {
202 serpent_ecb_enc_8way_avx(ctx
->ctx
, srcdst
, srcdst
);
203 srcdst
+= bsize
* SERPENT_PARALLEL_BLOCKS
;
204 nbytes
-= bsize
* SERPENT_PARALLEL_BLOCKS
;
207 for (i
= 0; i
< nbytes
/ bsize
; i
++, srcdst
+= bsize
)
208 __serpent_encrypt(ctx
->ctx
, srcdst
, srcdst
);
211 static void decrypt_callback(void *priv
, u8
*srcdst
, unsigned int nbytes
)
213 const unsigned int bsize
= SERPENT_BLOCK_SIZE
;
214 struct crypt_priv
*ctx
= priv
;
217 ctx
->fpu_enabled
= serpent_fpu_begin(ctx
->fpu_enabled
, nbytes
);
219 if (nbytes
>= SERPENT_AVX2_PARALLEL_BLOCKS
* bsize
) {
220 serpent_ecb_dec_16way(ctx
->ctx
, srcdst
, srcdst
);
221 srcdst
+= bsize
* SERPENT_AVX2_PARALLEL_BLOCKS
;
222 nbytes
-= bsize
* SERPENT_AVX2_PARALLEL_BLOCKS
;
225 while (nbytes
>= SERPENT_PARALLEL_BLOCKS
* bsize
) {
226 serpent_ecb_dec_8way_avx(ctx
->ctx
, srcdst
, srcdst
);
227 srcdst
+= bsize
* SERPENT_PARALLEL_BLOCKS
;
228 nbytes
-= bsize
* SERPENT_PARALLEL_BLOCKS
;
231 for (i
= 0; i
< nbytes
/ bsize
; i
++, srcdst
+= bsize
)
232 __serpent_decrypt(ctx
->ctx
, srcdst
, srcdst
);
235 static int lrw_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
236 struct scatterlist
*src
, unsigned int nbytes
)
238 struct serpent_lrw_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
239 be128 buf
[SERPENT_AVX2_PARALLEL_BLOCKS
];
240 struct crypt_priv crypt_ctx
= {
241 .ctx
= &ctx
->serpent_ctx
,
242 .fpu_enabled
= false,
244 struct lrw_crypt_req req
= {
246 .tbuflen
= sizeof(buf
),
248 .table_ctx
= &ctx
->lrw_table
,
249 .crypt_ctx
= &crypt_ctx
,
250 .crypt_fn
= encrypt_callback
,
254 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
255 ret
= lrw_crypt(desc
, dst
, src
, nbytes
, &req
);
256 serpent_fpu_end(crypt_ctx
.fpu_enabled
);
261 static int lrw_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
262 struct scatterlist
*src
, unsigned int nbytes
)
264 struct serpent_lrw_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
265 be128 buf
[SERPENT_AVX2_PARALLEL_BLOCKS
];
266 struct crypt_priv crypt_ctx
= {
267 .ctx
= &ctx
->serpent_ctx
,
268 .fpu_enabled
= false,
270 struct lrw_crypt_req req
= {
272 .tbuflen
= sizeof(buf
),
274 .table_ctx
= &ctx
->lrw_table
,
275 .crypt_ctx
= &crypt_ctx
,
276 .crypt_fn
= decrypt_callback
,
280 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
281 ret
= lrw_crypt(desc
, dst
, src
, nbytes
, &req
);
282 serpent_fpu_end(crypt_ctx
.fpu_enabled
);
287 static int xts_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
288 struct scatterlist
*src
, unsigned int nbytes
)
290 struct serpent_xts_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
292 return glue_xts_crypt_128bit(&serpent_enc_xts
, desc
, dst
, src
, nbytes
,
293 XTS_TWEAK_CAST(__serpent_encrypt
),
294 &ctx
->tweak_ctx
, &ctx
->crypt_ctx
);
297 static int xts_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
298 struct scatterlist
*src
, unsigned int nbytes
)
300 struct serpent_xts_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
302 return glue_xts_crypt_128bit(&serpent_dec_xts
, desc
, dst
, src
, nbytes
,
303 XTS_TWEAK_CAST(__serpent_encrypt
),
304 &ctx
->tweak_ctx
, &ctx
->crypt_ctx
);
307 static struct crypto_alg srp_algs
[10] = { {
308 .cra_name
= "__ecb-serpent-avx2",
309 .cra_driver_name
= "__driver-ecb-serpent-avx2",
311 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
|
313 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
314 .cra_ctxsize
= sizeof(struct serpent_ctx
),
316 .cra_type
= &crypto_blkcipher_type
,
317 .cra_module
= THIS_MODULE
,
318 .cra_list
= LIST_HEAD_INIT(srp_algs
[0].cra_list
),
321 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
322 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
323 .setkey
= serpent_setkey
,
324 .encrypt
= ecb_encrypt
,
325 .decrypt
= ecb_decrypt
,
329 .cra_name
= "__cbc-serpent-avx2",
330 .cra_driver_name
= "__driver-cbc-serpent-avx2",
332 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
|
334 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
335 .cra_ctxsize
= sizeof(struct serpent_ctx
),
337 .cra_type
= &crypto_blkcipher_type
,
338 .cra_module
= THIS_MODULE
,
339 .cra_list
= LIST_HEAD_INIT(srp_algs
[1].cra_list
),
342 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
343 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
344 .setkey
= serpent_setkey
,
345 .encrypt
= cbc_encrypt
,
346 .decrypt
= cbc_decrypt
,
350 .cra_name
= "__ctr-serpent-avx2",
351 .cra_driver_name
= "__driver-ctr-serpent-avx2",
353 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
|
356 .cra_ctxsize
= sizeof(struct serpent_ctx
),
358 .cra_type
= &crypto_blkcipher_type
,
359 .cra_module
= THIS_MODULE
,
360 .cra_list
= LIST_HEAD_INIT(srp_algs
[2].cra_list
),
363 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
364 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
365 .ivsize
= SERPENT_BLOCK_SIZE
,
366 .setkey
= serpent_setkey
,
367 .encrypt
= ctr_crypt
,
368 .decrypt
= ctr_crypt
,
372 .cra_name
= "__lrw-serpent-avx2",
373 .cra_driver_name
= "__driver-lrw-serpent-avx2",
375 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
|
377 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
378 .cra_ctxsize
= sizeof(struct serpent_lrw_ctx
),
380 .cra_type
= &crypto_blkcipher_type
,
381 .cra_module
= THIS_MODULE
,
382 .cra_list
= LIST_HEAD_INIT(srp_algs
[3].cra_list
),
383 .cra_exit
= lrw_serpent_exit_tfm
,
386 .min_keysize
= SERPENT_MIN_KEY_SIZE
+
388 .max_keysize
= SERPENT_MAX_KEY_SIZE
+
390 .ivsize
= SERPENT_BLOCK_SIZE
,
391 .setkey
= lrw_serpent_setkey
,
392 .encrypt
= lrw_encrypt
,
393 .decrypt
= lrw_decrypt
,
397 .cra_name
= "__xts-serpent-avx2",
398 .cra_driver_name
= "__driver-xts-serpent-avx2",
400 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
|
402 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
403 .cra_ctxsize
= sizeof(struct serpent_xts_ctx
),
405 .cra_type
= &crypto_blkcipher_type
,
406 .cra_module
= THIS_MODULE
,
407 .cra_list
= LIST_HEAD_INIT(srp_algs
[4].cra_list
),
410 .min_keysize
= SERPENT_MIN_KEY_SIZE
* 2,
411 .max_keysize
= SERPENT_MAX_KEY_SIZE
* 2,
412 .ivsize
= SERPENT_BLOCK_SIZE
,
413 .setkey
= xts_serpent_setkey
,
414 .encrypt
= xts_encrypt
,
415 .decrypt
= xts_decrypt
,
419 .cra_name
= "ecb(serpent)",
420 .cra_driver_name
= "ecb-serpent-avx2",
422 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
423 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
424 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
426 .cra_type
= &crypto_ablkcipher_type
,
427 .cra_module
= THIS_MODULE
,
428 .cra_list
= LIST_HEAD_INIT(srp_algs
[5].cra_list
),
429 .cra_init
= ablk_init
,
430 .cra_exit
= ablk_exit
,
433 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
434 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
435 .setkey
= ablk_set_key
,
436 .encrypt
= ablk_encrypt
,
437 .decrypt
= ablk_decrypt
,
441 .cra_name
= "cbc(serpent)",
442 .cra_driver_name
= "cbc-serpent-avx2",
444 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
445 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
446 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
448 .cra_type
= &crypto_ablkcipher_type
,
449 .cra_module
= THIS_MODULE
,
450 .cra_list
= LIST_HEAD_INIT(srp_algs
[6].cra_list
),
451 .cra_init
= ablk_init
,
452 .cra_exit
= ablk_exit
,
455 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
456 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
457 .ivsize
= SERPENT_BLOCK_SIZE
,
458 .setkey
= ablk_set_key
,
459 .encrypt
= __ablk_encrypt
,
460 .decrypt
= ablk_decrypt
,
464 .cra_name
= "ctr(serpent)",
465 .cra_driver_name
= "ctr-serpent-avx2",
467 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
469 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
471 .cra_type
= &crypto_ablkcipher_type
,
472 .cra_module
= THIS_MODULE
,
473 .cra_list
= LIST_HEAD_INIT(srp_algs
[7].cra_list
),
474 .cra_init
= ablk_init
,
475 .cra_exit
= ablk_exit
,
478 .min_keysize
= SERPENT_MIN_KEY_SIZE
,
479 .max_keysize
= SERPENT_MAX_KEY_SIZE
,
480 .ivsize
= SERPENT_BLOCK_SIZE
,
481 .setkey
= ablk_set_key
,
482 .encrypt
= ablk_encrypt
,
483 .decrypt
= ablk_encrypt
,
488 .cra_name
= "lrw(serpent)",
489 .cra_driver_name
= "lrw-serpent-avx2",
491 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
492 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
493 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
495 .cra_type
= &crypto_ablkcipher_type
,
496 .cra_module
= THIS_MODULE
,
497 .cra_list
= LIST_HEAD_INIT(srp_algs
[8].cra_list
),
498 .cra_init
= ablk_init
,
499 .cra_exit
= ablk_exit
,
502 .min_keysize
= SERPENT_MIN_KEY_SIZE
+
504 .max_keysize
= SERPENT_MAX_KEY_SIZE
+
506 .ivsize
= SERPENT_BLOCK_SIZE
,
507 .setkey
= ablk_set_key
,
508 .encrypt
= ablk_encrypt
,
509 .decrypt
= ablk_decrypt
,
513 .cra_name
= "xts(serpent)",
514 .cra_driver_name
= "xts-serpent-avx2",
516 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
517 .cra_blocksize
= SERPENT_BLOCK_SIZE
,
518 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
520 .cra_type
= &crypto_ablkcipher_type
,
521 .cra_module
= THIS_MODULE
,
522 .cra_list
= LIST_HEAD_INIT(srp_algs
[9].cra_list
),
523 .cra_init
= ablk_init
,
524 .cra_exit
= ablk_exit
,
527 .min_keysize
= SERPENT_MIN_KEY_SIZE
* 2,
528 .max_keysize
= SERPENT_MAX_KEY_SIZE
* 2,
529 .ivsize
= SERPENT_BLOCK_SIZE
,
530 .setkey
= ablk_set_key
,
531 .encrypt
= ablk_encrypt
,
532 .decrypt
= ablk_decrypt
,
537 static int __init
init(void)
539 const char *feature_name
;
541 if (!cpu_has_avx2
|| !cpu_has_osxsave
) {
542 pr_info("AVX2 instructions are not detected.\n");
545 if (!cpu_has_xfeatures(XFEATURE_MASK_SSE
| XFEATURE_MASK_YMM
,
547 pr_info("CPU feature '%s' is not supported.\n", feature_name
);
551 return crypto_register_algs(srp_algs
, ARRAY_SIZE(srp_algs
));
554 static void __exit
fini(void)
556 crypto_unregister_algs(srp_algs
, ARRAY_SIZE(srp_algs
));
562 MODULE_LICENSE("GPL");
563 MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized");
564 MODULE_ALIAS_CRYPTO("serpent");
565 MODULE_ALIAS_CRYPTO("serpent-asm");