1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Support for AES-NI and VAES instructions. This file contains glue code.
4 * The real AES implementations are in aesni-intel_asm.S and other .S files.
6 * Copyright (C) 2008, Intel Corp.
7 * Author: Huang Ying <ying.huang@intel.com>
9 * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
10 * interface for 64-bit kernels.
11 * Authors: Adrian Hoban <adrian.hoban@intel.com>
12 * Gabriele Paoloni <gabriele.paoloni@intel.com>
13 * Tadeusz Struk (tadeusz.struk@intel.com)
14 * Aidan O'Mahony (aidan.o.mahony@intel.com)
15 * Copyright (c) 2010, Intel Corporation.
17 * Copyright 2024 Google LLC
20 #include <linux/hardirq.h>
21 #include <linux/types.h>
22 #include <linux/module.h>
23 #include <linux/err.h>
24 #include <crypto/algapi.h>
25 #include <crypto/aes.h>
26 #include <crypto/ctr.h>
27 #include <crypto/b128ops.h>
28 #include <crypto/gcm.h>
29 #include <crypto/xts.h>
30 #include <asm/cpu_device_id.h>
32 #include <crypto/scatterwalk.h>
33 #include <crypto/internal/aead.h>
34 #include <crypto/internal/simd.h>
35 #include <crypto/internal/skcipher.h>
36 #include <linux/jump_label.h>
37 #include <linux/workqueue.h>
38 #include <linux/spinlock.h>
39 #include <linux/static_call.h>
42 #define AESNI_ALIGN 16
43 #define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
44 #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1))
45 #define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
46 #define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA)
47 #define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA)
49 struct aesni_xts_ctx
{
50 struct crypto_aes_ctx tweak_ctx AESNI_ALIGN_ATTR
;
51 struct crypto_aes_ctx crypt_ctx AESNI_ALIGN_ATTR
;
54 static inline void *aes_align_addr(void *addr
)
56 if (crypto_tfm_ctx_alignment() >= AESNI_ALIGN
)
58 return PTR_ALIGN(addr
, AESNI_ALIGN
);
61 asmlinkage
void aesni_set_key(struct crypto_aes_ctx
*ctx
, const u8
*in_key
,
62 unsigned int key_len
);
63 asmlinkage
void aesni_enc(const void *ctx
, u8
*out
, const u8
*in
);
64 asmlinkage
void aesni_dec(const void *ctx
, u8
*out
, const u8
*in
);
65 asmlinkage
void aesni_ecb_enc(struct crypto_aes_ctx
*ctx
, u8
*out
,
66 const u8
*in
, unsigned int len
);
67 asmlinkage
void aesni_ecb_dec(struct crypto_aes_ctx
*ctx
, u8
*out
,
68 const u8
*in
, unsigned int len
);
69 asmlinkage
void aesni_cbc_enc(struct crypto_aes_ctx
*ctx
, u8
*out
,
70 const u8
*in
, unsigned int len
, u8
*iv
);
71 asmlinkage
void aesni_cbc_dec(struct crypto_aes_ctx
*ctx
, u8
*out
,
72 const u8
*in
, unsigned int len
, u8
*iv
);
73 asmlinkage
void aesni_cts_cbc_enc(struct crypto_aes_ctx
*ctx
, u8
*out
,
74 const u8
*in
, unsigned int len
, u8
*iv
);
75 asmlinkage
void aesni_cts_cbc_dec(struct crypto_aes_ctx
*ctx
, u8
*out
,
76 const u8
*in
, unsigned int len
, u8
*iv
);
78 asmlinkage
void aesni_xts_enc(const struct crypto_aes_ctx
*ctx
, u8
*out
,
79 const u8
*in
, unsigned int len
, u8
*iv
);
81 asmlinkage
void aesni_xts_dec(const struct crypto_aes_ctx
*ctx
, u8
*out
,
82 const u8
*in
, unsigned int len
, u8
*iv
);
86 asmlinkage
void aesni_ctr_enc(struct crypto_aes_ctx
*ctx
, u8
*out
,
87 const u8
*in
, unsigned int len
, u8
*iv
);
88 DEFINE_STATIC_CALL(aesni_ctr_enc_tfm
, aesni_ctr_enc
);
90 asmlinkage
void aes_ctr_enc_128_avx_by8(const u8
*in
, u8
*iv
,
91 void *keys
, u8
*out
, unsigned int num_bytes
);
92 asmlinkage
void aes_ctr_enc_192_avx_by8(const u8
*in
, u8
*iv
,
93 void *keys
, u8
*out
, unsigned int num_bytes
);
94 asmlinkage
void aes_ctr_enc_256_avx_by8(const u8
*in
, u8
*iv
,
95 void *keys
, u8
*out
, unsigned int num_bytes
);
98 asmlinkage
void aes_xctr_enc_128_avx_by8(const u8
*in
, const u8
*iv
,
99 const void *keys
, u8
*out
, unsigned int num_bytes
,
100 unsigned int byte_ctr
);
102 asmlinkage
void aes_xctr_enc_192_avx_by8(const u8
*in
, const u8
*iv
,
103 const void *keys
, u8
*out
, unsigned int num_bytes
,
104 unsigned int byte_ctr
);
106 asmlinkage
void aes_xctr_enc_256_avx_by8(const u8
*in
, const u8
*iv
,
107 const void *keys
, u8
*out
, unsigned int num_bytes
,
108 unsigned int byte_ctr
);
111 static inline struct crypto_aes_ctx
*aes_ctx(void *raw_ctx
)
113 return aes_align_addr(raw_ctx
);
116 static inline struct aesni_xts_ctx
*aes_xts_ctx(struct crypto_skcipher
*tfm
)
118 return aes_align_addr(crypto_skcipher_ctx(tfm
));
121 static int aes_set_key_common(struct crypto_aes_ctx
*ctx
,
122 const u8
*in_key
, unsigned int key_len
)
126 if (!crypto_simd_usable())
127 return aes_expandkey(ctx
, in_key
, key_len
);
129 err
= aes_check_keylen(key_len
);
134 aesni_set_key(ctx
, in_key
, key_len
);
139 static int aes_set_key(struct crypto_tfm
*tfm
, const u8
*in_key
,
140 unsigned int key_len
)
142 return aes_set_key_common(aes_ctx(crypto_tfm_ctx(tfm
)), in_key
,
146 static void aesni_encrypt(struct crypto_tfm
*tfm
, u8
*dst
, const u8
*src
)
148 struct crypto_aes_ctx
*ctx
= aes_ctx(crypto_tfm_ctx(tfm
));
150 if (!crypto_simd_usable()) {
151 aes_encrypt(ctx
, dst
, src
);
154 aesni_enc(ctx
, dst
, src
);
159 static void aesni_decrypt(struct crypto_tfm
*tfm
, u8
*dst
, const u8
*src
)
161 struct crypto_aes_ctx
*ctx
= aes_ctx(crypto_tfm_ctx(tfm
));
163 if (!crypto_simd_usable()) {
164 aes_decrypt(ctx
, dst
, src
);
167 aesni_dec(ctx
, dst
, src
);
172 static int aesni_skcipher_setkey(struct crypto_skcipher
*tfm
, const u8
*key
,
175 return aes_set_key_common(aes_ctx(crypto_skcipher_ctx(tfm
)), key
, len
);
178 static int ecb_encrypt(struct skcipher_request
*req
)
180 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
181 struct crypto_aes_ctx
*ctx
= aes_ctx(crypto_skcipher_ctx(tfm
));
182 struct skcipher_walk walk
;
186 err
= skcipher_walk_virt(&walk
, req
, false);
188 while ((nbytes
= walk
.nbytes
)) {
190 aesni_ecb_enc(ctx
, walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
191 nbytes
& AES_BLOCK_MASK
);
193 nbytes
&= AES_BLOCK_SIZE
- 1;
194 err
= skcipher_walk_done(&walk
, nbytes
);
200 static int ecb_decrypt(struct skcipher_request
*req
)
202 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
203 struct crypto_aes_ctx
*ctx
= aes_ctx(crypto_skcipher_ctx(tfm
));
204 struct skcipher_walk walk
;
208 err
= skcipher_walk_virt(&walk
, req
, false);
210 while ((nbytes
= walk
.nbytes
)) {
212 aesni_ecb_dec(ctx
, walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
213 nbytes
& AES_BLOCK_MASK
);
215 nbytes
&= AES_BLOCK_SIZE
- 1;
216 err
= skcipher_walk_done(&walk
, nbytes
);
222 static int cbc_encrypt(struct skcipher_request
*req
)
224 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
225 struct crypto_aes_ctx
*ctx
= aes_ctx(crypto_skcipher_ctx(tfm
));
226 struct skcipher_walk walk
;
230 err
= skcipher_walk_virt(&walk
, req
, false);
232 while ((nbytes
= walk
.nbytes
)) {
234 aesni_cbc_enc(ctx
, walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
235 nbytes
& AES_BLOCK_MASK
, walk
.iv
);
237 nbytes
&= AES_BLOCK_SIZE
- 1;
238 err
= skcipher_walk_done(&walk
, nbytes
);
244 static int cbc_decrypt(struct skcipher_request
*req
)
246 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
247 struct crypto_aes_ctx
*ctx
= aes_ctx(crypto_skcipher_ctx(tfm
));
248 struct skcipher_walk walk
;
252 err
= skcipher_walk_virt(&walk
, req
, false);
254 while ((nbytes
= walk
.nbytes
)) {
256 aesni_cbc_dec(ctx
, walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
257 nbytes
& AES_BLOCK_MASK
, walk
.iv
);
259 nbytes
&= AES_BLOCK_SIZE
- 1;
260 err
= skcipher_walk_done(&walk
, nbytes
);
266 static int cts_cbc_encrypt(struct skcipher_request
*req
)
268 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
269 struct crypto_aes_ctx
*ctx
= aes_ctx(crypto_skcipher_ctx(tfm
));
270 int cbc_blocks
= DIV_ROUND_UP(req
->cryptlen
, AES_BLOCK_SIZE
) - 2;
271 struct scatterlist
*src
= req
->src
, *dst
= req
->dst
;
272 struct scatterlist sg_src
[2], sg_dst
[2];
273 struct skcipher_request subreq
;
274 struct skcipher_walk walk
;
277 skcipher_request_set_tfm(&subreq
, tfm
);
278 skcipher_request_set_callback(&subreq
, skcipher_request_flags(req
),
281 if (req
->cryptlen
<= AES_BLOCK_SIZE
) {
282 if (req
->cryptlen
< AES_BLOCK_SIZE
)
287 if (cbc_blocks
> 0) {
288 skcipher_request_set_crypt(&subreq
, req
->src
, req
->dst
,
289 cbc_blocks
* AES_BLOCK_SIZE
,
292 err
= cbc_encrypt(&subreq
);
296 if (req
->cryptlen
== AES_BLOCK_SIZE
)
299 dst
= src
= scatterwalk_ffwd(sg_src
, req
->src
, subreq
.cryptlen
);
300 if (req
->dst
!= req
->src
)
301 dst
= scatterwalk_ffwd(sg_dst
, req
->dst
,
305 /* handle ciphertext stealing */
306 skcipher_request_set_crypt(&subreq
, src
, dst
,
307 req
->cryptlen
- cbc_blocks
* AES_BLOCK_SIZE
,
310 err
= skcipher_walk_virt(&walk
, &subreq
, false);
315 aesni_cts_cbc_enc(ctx
, walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
316 walk
.nbytes
, walk
.iv
);
319 return skcipher_walk_done(&walk
, 0);
322 static int cts_cbc_decrypt(struct skcipher_request
*req
)
324 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
325 struct crypto_aes_ctx
*ctx
= aes_ctx(crypto_skcipher_ctx(tfm
));
326 int cbc_blocks
= DIV_ROUND_UP(req
->cryptlen
, AES_BLOCK_SIZE
) - 2;
327 struct scatterlist
*src
= req
->src
, *dst
= req
->dst
;
328 struct scatterlist sg_src
[2], sg_dst
[2];
329 struct skcipher_request subreq
;
330 struct skcipher_walk walk
;
333 skcipher_request_set_tfm(&subreq
, tfm
);
334 skcipher_request_set_callback(&subreq
, skcipher_request_flags(req
),
337 if (req
->cryptlen
<= AES_BLOCK_SIZE
) {
338 if (req
->cryptlen
< AES_BLOCK_SIZE
)
343 if (cbc_blocks
> 0) {
344 skcipher_request_set_crypt(&subreq
, req
->src
, req
->dst
,
345 cbc_blocks
* AES_BLOCK_SIZE
,
348 err
= cbc_decrypt(&subreq
);
352 if (req
->cryptlen
== AES_BLOCK_SIZE
)
355 dst
= src
= scatterwalk_ffwd(sg_src
, req
->src
, subreq
.cryptlen
);
356 if (req
->dst
!= req
->src
)
357 dst
= scatterwalk_ffwd(sg_dst
, req
->dst
,
361 /* handle ciphertext stealing */
362 skcipher_request_set_crypt(&subreq
, src
, dst
,
363 req
->cryptlen
- cbc_blocks
* AES_BLOCK_SIZE
,
366 err
= skcipher_walk_virt(&walk
, &subreq
, false);
371 aesni_cts_cbc_dec(ctx
, walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
372 walk
.nbytes
, walk
.iv
);
375 return skcipher_walk_done(&walk
, 0);
379 static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx
*ctx
, u8
*out
,
380 const u8
*in
, unsigned int len
, u8
*iv
)
383 * based on key length, override with the by8 version
384 * of ctr mode encryption/decryption for improved performance
385 * aes_set_key_common() ensures that key length is one of
388 if (ctx
->key_length
== AES_KEYSIZE_128
)
389 aes_ctr_enc_128_avx_by8(in
, iv
, (void *)ctx
, out
, len
);
390 else if (ctx
->key_length
== AES_KEYSIZE_192
)
391 aes_ctr_enc_192_avx_by8(in
, iv
, (void *)ctx
, out
, len
);
393 aes_ctr_enc_256_avx_by8(in
, iv
, (void *)ctx
, out
, len
);
396 static int ctr_crypt(struct skcipher_request
*req
)
398 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
399 struct crypto_aes_ctx
*ctx
= aes_ctx(crypto_skcipher_ctx(tfm
));
400 u8 keystream
[AES_BLOCK_SIZE
];
401 struct skcipher_walk walk
;
405 err
= skcipher_walk_virt(&walk
, req
, false);
407 while ((nbytes
= walk
.nbytes
) > 0) {
409 if (nbytes
& AES_BLOCK_MASK
)
410 static_call(aesni_ctr_enc_tfm
)(ctx
, walk
.dst
.virt
.addr
,
412 nbytes
& AES_BLOCK_MASK
,
414 nbytes
&= ~AES_BLOCK_MASK
;
416 if (walk
.nbytes
== walk
.total
&& nbytes
> 0) {
417 aesni_enc(ctx
, keystream
, walk
.iv
);
418 crypto_xor_cpy(walk
.dst
.virt
.addr
+ walk
.nbytes
- nbytes
,
419 walk
.src
.virt
.addr
+ walk
.nbytes
- nbytes
,
421 crypto_inc(walk
.iv
, AES_BLOCK_SIZE
);
425 err
= skcipher_walk_done(&walk
, nbytes
);
430 static void aesni_xctr_enc_avx_tfm(struct crypto_aes_ctx
*ctx
, u8
*out
,
431 const u8
*in
, unsigned int len
, u8
*iv
,
432 unsigned int byte_ctr
)
434 if (ctx
->key_length
== AES_KEYSIZE_128
)
435 aes_xctr_enc_128_avx_by8(in
, iv
, (void *)ctx
, out
, len
,
437 else if (ctx
->key_length
== AES_KEYSIZE_192
)
438 aes_xctr_enc_192_avx_by8(in
, iv
, (void *)ctx
, out
, len
,
441 aes_xctr_enc_256_avx_by8(in
, iv
, (void *)ctx
, out
, len
,
445 static int xctr_crypt(struct skcipher_request
*req
)
447 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
448 struct crypto_aes_ctx
*ctx
= aes_ctx(crypto_skcipher_ctx(tfm
));
449 u8 keystream
[AES_BLOCK_SIZE
];
450 struct skcipher_walk walk
;
452 unsigned int byte_ctr
= 0;
454 __le32 block
[AES_BLOCK_SIZE
/ sizeof(__le32
)];
456 err
= skcipher_walk_virt(&walk
, req
, false);
458 while ((nbytes
= walk
.nbytes
) > 0) {
460 if (nbytes
& AES_BLOCK_MASK
)
461 aesni_xctr_enc_avx_tfm(ctx
, walk
.dst
.virt
.addr
,
462 walk
.src
.virt
.addr
, nbytes
& AES_BLOCK_MASK
,
464 nbytes
&= ~AES_BLOCK_MASK
;
465 byte_ctr
+= walk
.nbytes
- nbytes
;
467 if (walk
.nbytes
== walk
.total
&& nbytes
> 0) {
468 memcpy(block
, walk
.iv
, AES_BLOCK_SIZE
);
469 block
[0] ^= cpu_to_le32(1 + byte_ctr
/ AES_BLOCK_SIZE
);
470 aesni_enc(ctx
, keystream
, (u8
*)block
);
471 crypto_xor_cpy(walk
.dst
.virt
.addr
+ walk
.nbytes
-
472 nbytes
, walk
.src
.virt
.addr
+ walk
.nbytes
473 - nbytes
, keystream
, nbytes
);
478 err
= skcipher_walk_done(&walk
, nbytes
);
484 static int xts_setkey_aesni(struct crypto_skcipher
*tfm
, const u8
*key
,
487 struct aesni_xts_ctx
*ctx
= aes_xts_ctx(tfm
);
490 err
= xts_verify_key(tfm
, key
, keylen
);
496 /* first half of xts-key is for crypt */
497 err
= aes_set_key_common(&ctx
->crypt_ctx
, key
, keylen
);
501 /* second half of xts-key is for tweak */
502 return aes_set_key_common(&ctx
->tweak_ctx
, key
+ keylen
, keylen
);
505 typedef void (*xts_encrypt_iv_func
)(const struct crypto_aes_ctx
*tweak_key
,
506 u8 iv
[AES_BLOCK_SIZE
]);
507 typedef void (*xts_crypt_func
)(const struct crypto_aes_ctx
*key
,
508 const u8
*src
, u8
*dst
, unsigned int len
,
509 u8 tweak
[AES_BLOCK_SIZE
]);
511 /* This handles cases where the source and/or destination span pages. */
513 xts_crypt_slowpath(struct skcipher_request
*req
, xts_crypt_func crypt_func
)
515 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
516 const struct aesni_xts_ctx
*ctx
= aes_xts_ctx(tfm
);
517 int tail
= req
->cryptlen
% AES_BLOCK_SIZE
;
518 struct scatterlist sg_src
[2], sg_dst
[2];
519 struct skcipher_request subreq
;
520 struct skcipher_walk walk
;
521 struct scatterlist
*src
, *dst
;
525 * If the message length isn't divisible by the AES block size, then
526 * separate off the last full block and the partial block. This ensures
527 * that they are processed in the same call to the assembly function,
528 * which is required for ciphertext stealing.
531 skcipher_request_set_tfm(&subreq
, tfm
);
532 skcipher_request_set_callback(&subreq
,
533 skcipher_request_flags(req
),
535 skcipher_request_set_crypt(&subreq
, req
->src
, req
->dst
,
536 req
->cryptlen
- tail
- AES_BLOCK_SIZE
,
541 err
= skcipher_walk_virt(&walk
, req
, false);
543 while (walk
.nbytes
) {
545 (*crypt_func
)(&ctx
->crypt_ctx
,
546 walk
.src
.virt
.addr
, walk
.dst
.virt
.addr
,
547 walk
.nbytes
& ~(AES_BLOCK_SIZE
- 1), req
->iv
);
549 err
= skcipher_walk_done(&walk
,
550 walk
.nbytes
& (AES_BLOCK_SIZE
- 1));
556 /* Do ciphertext stealing with the last full block and partial block. */
558 dst
= src
= scatterwalk_ffwd(sg_src
, req
->src
, req
->cryptlen
);
559 if (req
->dst
!= req
->src
)
560 dst
= scatterwalk_ffwd(sg_dst
, req
->dst
, req
->cryptlen
);
562 skcipher_request_set_crypt(req
, src
, dst
, AES_BLOCK_SIZE
+ tail
,
565 err
= skcipher_walk_virt(&walk
, req
, false);
570 (*crypt_func
)(&ctx
->crypt_ctx
, walk
.src
.virt
.addr
, walk
.dst
.virt
.addr
,
571 walk
.nbytes
, req
->iv
);
574 return skcipher_walk_done(&walk
, 0);
577 /* __always_inline to avoid indirect call in fastpath */
578 static __always_inline
int
579 xts_crypt(struct skcipher_request
*req
, xts_encrypt_iv_func encrypt_iv
,
580 xts_crypt_func crypt_func
)
582 struct crypto_skcipher
*tfm
= crypto_skcipher_reqtfm(req
);
583 const struct aesni_xts_ctx
*ctx
= aes_xts_ctx(tfm
);
584 const unsigned int cryptlen
= req
->cryptlen
;
585 struct scatterlist
*src
= req
->src
;
586 struct scatterlist
*dst
= req
->dst
;
588 if (unlikely(cryptlen
< AES_BLOCK_SIZE
))
592 (*encrypt_iv
)(&ctx
->tweak_ctx
, req
->iv
);
595 * In practice, virtually all XTS plaintexts and ciphertexts are either
596 * 512 or 4096 bytes, aligned such that they don't span page boundaries.
597 * To optimize the performance of these cases, and also any other case
598 * where no page boundary is spanned, the below fast-path handles
599 * single-page sources and destinations as efficiently as possible.
601 if (likely(src
->length
>= cryptlen
&& dst
->length
>= cryptlen
&&
602 src
->offset
+ cryptlen
<= PAGE_SIZE
&&
603 dst
->offset
+ cryptlen
<= PAGE_SIZE
)) {
604 struct page
*src_page
= sg_page(src
);
605 struct page
*dst_page
= sg_page(dst
);
606 void *src_virt
= kmap_local_page(src_page
) + src
->offset
;
607 void *dst_virt
= kmap_local_page(dst_page
) + dst
->offset
;
609 (*crypt_func
)(&ctx
->crypt_ctx
, src_virt
, dst_virt
, cryptlen
,
611 kunmap_local(dst_virt
);
612 kunmap_local(src_virt
);
617 return xts_crypt_slowpath(req
, crypt_func
);
620 static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx
*tweak_key
,
621 u8 iv
[AES_BLOCK_SIZE
])
623 aesni_enc(tweak_key
, iv
, iv
);
626 static void aesni_xts_encrypt(const struct crypto_aes_ctx
*key
,
627 const u8
*src
, u8
*dst
, unsigned int len
,
628 u8 tweak
[AES_BLOCK_SIZE
])
630 aesni_xts_enc(key
, dst
, src
, len
, tweak
);
633 static void aesni_xts_decrypt(const struct crypto_aes_ctx
*key
,
634 const u8
*src
, u8
*dst
, unsigned int len
,
635 u8 tweak
[AES_BLOCK_SIZE
])
637 aesni_xts_dec(key
, dst
, src
, len
, tweak
);
640 static int xts_encrypt_aesni(struct skcipher_request
*req
)
642 return xts_crypt(req
, aesni_xts_encrypt_iv
, aesni_xts_encrypt
);
645 static int xts_decrypt_aesni(struct skcipher_request
*req
)
647 return xts_crypt(req
, aesni_xts_encrypt_iv
, aesni_xts_decrypt
);
650 static struct crypto_alg aesni_cipher_alg
= {
652 .cra_driver_name
= "aes-aesni",
654 .cra_flags
= CRYPTO_ALG_TYPE_CIPHER
,
655 .cra_blocksize
= AES_BLOCK_SIZE
,
656 .cra_ctxsize
= CRYPTO_AES_CTX_SIZE
,
657 .cra_module
= THIS_MODULE
,
660 .cia_min_keysize
= AES_MIN_KEY_SIZE
,
661 .cia_max_keysize
= AES_MAX_KEY_SIZE
,
662 .cia_setkey
= aes_set_key
,
663 .cia_encrypt
= aesni_encrypt
,
664 .cia_decrypt
= aesni_decrypt
669 static struct skcipher_alg aesni_skciphers
[] = {
672 .cra_name
= "__ecb(aes)",
673 .cra_driver_name
= "__ecb-aes-aesni",
675 .cra_flags
= CRYPTO_ALG_INTERNAL
,
676 .cra_blocksize
= AES_BLOCK_SIZE
,
677 .cra_ctxsize
= CRYPTO_AES_CTX_SIZE
,
678 .cra_module
= THIS_MODULE
,
680 .min_keysize
= AES_MIN_KEY_SIZE
,
681 .max_keysize
= AES_MAX_KEY_SIZE
,
682 .setkey
= aesni_skcipher_setkey
,
683 .encrypt
= ecb_encrypt
,
684 .decrypt
= ecb_decrypt
,
687 .cra_name
= "__cbc(aes)",
688 .cra_driver_name
= "__cbc-aes-aesni",
690 .cra_flags
= CRYPTO_ALG_INTERNAL
,
691 .cra_blocksize
= AES_BLOCK_SIZE
,
692 .cra_ctxsize
= CRYPTO_AES_CTX_SIZE
,
693 .cra_module
= THIS_MODULE
,
695 .min_keysize
= AES_MIN_KEY_SIZE
,
696 .max_keysize
= AES_MAX_KEY_SIZE
,
697 .ivsize
= AES_BLOCK_SIZE
,
698 .setkey
= aesni_skcipher_setkey
,
699 .encrypt
= cbc_encrypt
,
700 .decrypt
= cbc_decrypt
,
703 .cra_name
= "__cts(cbc(aes))",
704 .cra_driver_name
= "__cts-cbc-aes-aesni",
706 .cra_flags
= CRYPTO_ALG_INTERNAL
,
707 .cra_blocksize
= AES_BLOCK_SIZE
,
708 .cra_ctxsize
= CRYPTO_AES_CTX_SIZE
,
709 .cra_module
= THIS_MODULE
,
711 .min_keysize
= AES_MIN_KEY_SIZE
,
712 .max_keysize
= AES_MAX_KEY_SIZE
,
713 .ivsize
= AES_BLOCK_SIZE
,
714 .walksize
= 2 * AES_BLOCK_SIZE
,
715 .setkey
= aesni_skcipher_setkey
,
716 .encrypt
= cts_cbc_encrypt
,
717 .decrypt
= cts_cbc_decrypt
,
721 .cra_name
= "__ctr(aes)",
722 .cra_driver_name
= "__ctr-aes-aesni",
724 .cra_flags
= CRYPTO_ALG_INTERNAL
,
726 .cra_ctxsize
= CRYPTO_AES_CTX_SIZE
,
727 .cra_module
= THIS_MODULE
,
729 .min_keysize
= AES_MIN_KEY_SIZE
,
730 .max_keysize
= AES_MAX_KEY_SIZE
,
731 .ivsize
= AES_BLOCK_SIZE
,
732 .chunksize
= AES_BLOCK_SIZE
,
733 .setkey
= aesni_skcipher_setkey
,
734 .encrypt
= ctr_crypt
,
735 .decrypt
= ctr_crypt
,
739 .cra_name
= "__xts(aes)",
740 .cra_driver_name
= "__xts-aes-aesni",
742 .cra_flags
= CRYPTO_ALG_INTERNAL
,
743 .cra_blocksize
= AES_BLOCK_SIZE
,
744 .cra_ctxsize
= XTS_AES_CTX_SIZE
,
745 .cra_module
= THIS_MODULE
,
747 .min_keysize
= 2 * AES_MIN_KEY_SIZE
,
748 .max_keysize
= 2 * AES_MAX_KEY_SIZE
,
749 .ivsize
= AES_BLOCK_SIZE
,
750 .walksize
= 2 * AES_BLOCK_SIZE
,
751 .setkey
= xts_setkey_aesni
,
752 .encrypt
= xts_encrypt_aesni
,
753 .decrypt
= xts_decrypt_aesni
,
758 struct simd_skcipher_alg
*aesni_simd_skciphers
[ARRAY_SIZE(aesni_skciphers
)];
762 * XCTR does not have a non-AVX implementation, so it must be enabled
765 static struct skcipher_alg aesni_xctr
= {
767 .cra_name
= "__xctr(aes)",
768 .cra_driver_name
= "__xctr-aes-aesni",
770 .cra_flags
= CRYPTO_ALG_INTERNAL
,
772 .cra_ctxsize
= CRYPTO_AES_CTX_SIZE
,
773 .cra_module
= THIS_MODULE
,
775 .min_keysize
= AES_MIN_KEY_SIZE
,
776 .max_keysize
= AES_MAX_KEY_SIZE
,
777 .ivsize
= AES_BLOCK_SIZE
,
778 .chunksize
= AES_BLOCK_SIZE
,
779 .setkey
= aesni_skcipher_setkey
,
780 .encrypt
= xctr_crypt
,
781 .decrypt
= xctr_crypt
,
784 static struct simd_skcipher_alg
*aesni_simd_xctr
;
786 asmlinkage
void aes_xts_encrypt_iv(const struct crypto_aes_ctx
*tweak_key
,
787 u8 iv
[AES_BLOCK_SIZE
]);
789 #define DEFINE_XTS_ALG(suffix, driver_name, priority) \
792 aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \
793 u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \
795 aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \
796 u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \
798 static int xts_encrypt_##suffix(struct skcipher_request *req) \
800 return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_encrypt_##suffix); \
803 static int xts_decrypt_##suffix(struct skcipher_request *req) \
805 return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_decrypt_##suffix); \
808 static struct skcipher_alg aes_xts_alg_##suffix = { \
810 .cra_name = "__xts(aes)", \
811 .cra_driver_name = "__" driver_name, \
812 .cra_priority = priority, \
813 .cra_flags = CRYPTO_ALG_INTERNAL, \
814 .cra_blocksize = AES_BLOCK_SIZE, \
815 .cra_ctxsize = XTS_AES_CTX_SIZE, \
816 .cra_module = THIS_MODULE, \
818 .min_keysize = 2 * AES_MIN_KEY_SIZE, \
819 .max_keysize = 2 * AES_MAX_KEY_SIZE, \
820 .ivsize = AES_BLOCK_SIZE, \
821 .walksize = 2 * AES_BLOCK_SIZE, \
822 .setkey = xts_setkey_aesni, \
823 .encrypt = xts_encrypt_##suffix, \
824 .decrypt = xts_decrypt_##suffix, \
827 static struct simd_skcipher_alg *aes_xts_simdalg_##suffix
829 DEFINE_XTS_ALG(aesni_avx
, "xts-aes-aesni-avx", 500);
830 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
831 DEFINE_XTS_ALG(vaes_avx2
, "xts-aes-vaes-avx2", 600);
832 DEFINE_XTS_ALG(vaes_avx10_256
, "xts-aes-vaes-avx10_256", 700);
833 DEFINE_XTS_ALG(vaes_avx10_512
, "xts-aes-vaes-avx10_512", 800);
836 /* The common part of the x86_64 AES-GCM key struct */
838 /* Expanded AES key and the AES key length in bytes */
839 struct crypto_aes_ctx aes_key
;
841 /* RFC4106 nonce (used only by the rfc4106 algorithms) */
845 /* Key struct used by the AES-NI implementations of AES-GCM */
846 struct aes_gcm_key_aesni
{
848 * Common part of the key. The assembly code requires 16-byte alignment
849 * for the round keys; we get this by them being located at the start of
850 * the struct and the whole struct being 16-byte aligned.
852 struct aes_gcm_key base
;
855 * Powers of the hash key H^8 through H^1. These are 128-bit values.
856 * They all have an extra factor of x^-1 and are byte-reversed. 16-byte
857 * alignment is required by the assembly code.
859 u64 h_powers
[8][2] __aligned(16);
862 * h_powers_xored[i] contains the two 64-bit halves of h_powers[i] XOR'd
863 * together. It's used for Karatsuba multiplication. 16-byte alignment
864 * is required by the assembly code.
866 u64 h_powers_xored
[8] __aligned(16);
869 * H^1 times x^64 (and also the usual extra factor of x^-1). 16-byte
870 * alignment is required by the assembly code.
872 u64 h_times_x64
[2] __aligned(16);
874 #define AES_GCM_KEY_AESNI(key) \
875 container_of((key), struct aes_gcm_key_aesni, base)
876 #define AES_GCM_KEY_AESNI_SIZE \
877 (sizeof(struct aes_gcm_key_aesni) + (15 & ~(CRYPTO_MINALIGN - 1)))
879 /* Key struct used by the VAES + AVX10 implementations of AES-GCM */
880 struct aes_gcm_key_avx10
{
882 * Common part of the key. The assembly code prefers 16-byte alignment
883 * for the round keys; we get this by them being located at the start of
884 * the struct and the whole struct being 64-byte aligned.
886 struct aes_gcm_key base
;
889 * Powers of the hash key H^16 through H^1. These are 128-bit values.
890 * They all have an extra factor of x^-1 and are byte-reversed. This
891 * array is aligned to a 64-byte boundary to make it naturally aligned
892 * for 512-bit loads, which can improve performance. (The assembly code
893 * doesn't *need* the alignment; this is just an optimization.)
895 u64 h_powers
[16][2] __aligned(64);
897 /* Three padding blocks required by the assembly code */
900 #define AES_GCM_KEY_AVX10(key) \
901 container_of((key), struct aes_gcm_key_avx10, base)
902 #define AES_GCM_KEY_AVX10_SIZE \
903 (sizeof(struct aes_gcm_key_avx10) + (63 & ~(CRYPTO_MINALIGN - 1)))
906 * These flags are passed to the AES-GCM helper functions to specify the
907 * specific version of AES-GCM (RFC4106 or not), whether it's encryption or
908 * decryption, and which assembly functions should be called. Assembly
909 * functions are selected using flags instead of function pointers to avoid
910 * indirect calls (which are very expensive on x86) regardless of inlining.
912 #define FLAG_RFC4106 BIT(0)
913 #define FLAG_ENC BIT(1)
914 #define FLAG_AVX BIT(2)
915 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
916 # define FLAG_AVX10_256 BIT(3)
917 # define FLAG_AVX10_512 BIT(4)
920 * This should cause all calls to the AVX10 assembly functions to be
921 * optimized out, avoiding the need to ifdef each call individually.
923 # define FLAG_AVX10_256 0
924 # define FLAG_AVX10_512 0
927 static inline struct aes_gcm_key
*
928 aes_gcm_key_get(struct crypto_aead
*tfm
, int flags
)
930 if (flags
& (FLAG_AVX10_256
| FLAG_AVX10_512
))
931 return PTR_ALIGN(crypto_aead_ctx(tfm
), 64);
933 return PTR_ALIGN(crypto_aead_ctx(tfm
), 16);
937 aes_gcm_precompute_aesni(struct aes_gcm_key_aesni
*key
);
939 aes_gcm_precompute_aesni_avx(struct aes_gcm_key_aesni
*key
);
941 aes_gcm_precompute_vaes_avx10_256(struct aes_gcm_key_avx10
*key
);
943 aes_gcm_precompute_vaes_avx10_512(struct aes_gcm_key_avx10
*key
);
945 static void aes_gcm_precompute(struct aes_gcm_key
*key
, int flags
)
948 * To make things a bit easier on the assembly side, the AVX10
949 * implementations use the same key format. Therefore, a single
950 * function using 256-bit vectors would suffice here. However, it's
951 * straightforward to provide a 512-bit one because of how the assembly
952 * code is structured, and it works nicely because the total size of the
953 * key powers is a multiple of 512 bits. So we take advantage of that.
955 * A similar situation applies to the AES-NI implementations.
957 if (flags
& FLAG_AVX10_512
)
958 aes_gcm_precompute_vaes_avx10_512(AES_GCM_KEY_AVX10(key
));
959 else if (flags
& FLAG_AVX10_256
)
960 aes_gcm_precompute_vaes_avx10_256(AES_GCM_KEY_AVX10(key
));
961 else if (flags
& FLAG_AVX
)
962 aes_gcm_precompute_aesni_avx(AES_GCM_KEY_AESNI(key
));
964 aes_gcm_precompute_aesni(AES_GCM_KEY_AESNI(key
));
968 aes_gcm_aad_update_aesni(const struct aes_gcm_key_aesni
*key
,
969 u8 ghash_acc
[16], const u8
*aad
, int aadlen
);
971 aes_gcm_aad_update_aesni_avx(const struct aes_gcm_key_aesni
*key
,
972 u8 ghash_acc
[16], const u8
*aad
, int aadlen
);
974 aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10
*key
,
975 u8 ghash_acc
[16], const u8
*aad
, int aadlen
);
977 static void aes_gcm_aad_update(const struct aes_gcm_key
*key
, u8 ghash_acc
[16],
978 const u8
*aad
, int aadlen
, int flags
)
980 if (flags
& (FLAG_AVX10_256
| FLAG_AVX10_512
))
981 aes_gcm_aad_update_vaes_avx10(AES_GCM_KEY_AVX10(key
), ghash_acc
,
983 else if (flags
& FLAG_AVX
)
984 aes_gcm_aad_update_aesni_avx(AES_GCM_KEY_AESNI(key
), ghash_acc
,
987 aes_gcm_aad_update_aesni(AES_GCM_KEY_AESNI(key
), ghash_acc
,
992 aes_gcm_enc_update_aesni(const struct aes_gcm_key_aesni
*key
,
993 const u32 le_ctr
[4], u8 ghash_acc
[16],
994 const u8
*src
, u8
*dst
, int datalen
);
996 aes_gcm_enc_update_aesni_avx(const struct aes_gcm_key_aesni
*key
,
997 const u32 le_ctr
[4], u8 ghash_acc
[16],
998 const u8
*src
, u8
*dst
, int datalen
);
1000 aes_gcm_enc_update_vaes_avx10_256(const struct aes_gcm_key_avx10
*key
,
1001 const u32 le_ctr
[4], u8 ghash_acc
[16],
1002 const u8
*src
, u8
*dst
, int datalen
);
1004 aes_gcm_enc_update_vaes_avx10_512(const struct aes_gcm_key_avx10
*key
,
1005 const u32 le_ctr
[4], u8 ghash_acc
[16],
1006 const u8
*src
, u8
*dst
, int datalen
);
1009 aes_gcm_dec_update_aesni(const struct aes_gcm_key_aesni
*key
,
1010 const u32 le_ctr
[4], u8 ghash_acc
[16],
1011 const u8
*src
, u8
*dst
, int datalen
);
1013 aes_gcm_dec_update_aesni_avx(const struct aes_gcm_key_aesni
*key
,
1014 const u32 le_ctr
[4], u8 ghash_acc
[16],
1015 const u8
*src
, u8
*dst
, int datalen
);
1017 aes_gcm_dec_update_vaes_avx10_256(const struct aes_gcm_key_avx10
*key
,
1018 const u32 le_ctr
[4], u8 ghash_acc
[16],
1019 const u8
*src
, u8
*dst
, int datalen
);
1021 aes_gcm_dec_update_vaes_avx10_512(const struct aes_gcm_key_avx10
*key
,
1022 const u32 le_ctr
[4], u8 ghash_acc
[16],
1023 const u8
*src
, u8
*dst
, int datalen
);
1025 /* __always_inline to optimize out the branches based on @flags */
1026 static __always_inline
void
1027 aes_gcm_update(const struct aes_gcm_key
*key
,
1028 const u32 le_ctr
[4], u8 ghash_acc
[16],
1029 const u8
*src
, u8
*dst
, int datalen
, int flags
)
1031 if (flags
& FLAG_ENC
) {
1032 if (flags
& FLAG_AVX10_512
)
1033 aes_gcm_enc_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key
),
1036 else if (flags
& FLAG_AVX10_256
)
1037 aes_gcm_enc_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key
),
1040 else if (flags
& FLAG_AVX
)
1041 aes_gcm_enc_update_aesni_avx(AES_GCM_KEY_AESNI(key
),
1045 aes_gcm_enc_update_aesni(AES_GCM_KEY_AESNI(key
), le_ctr
,
1046 ghash_acc
, src
, dst
, datalen
);
1048 if (flags
& FLAG_AVX10_512
)
1049 aes_gcm_dec_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key
),
1052 else if (flags
& FLAG_AVX10_256
)
1053 aes_gcm_dec_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key
),
1056 else if (flags
& FLAG_AVX
)
1057 aes_gcm_dec_update_aesni_avx(AES_GCM_KEY_AESNI(key
),
1061 aes_gcm_dec_update_aesni(AES_GCM_KEY_AESNI(key
),
1068 aes_gcm_enc_final_aesni(const struct aes_gcm_key_aesni
*key
,
1069 const u32 le_ctr
[4], u8 ghash_acc
[16],
1070 u64 total_aadlen
, u64 total_datalen
);
1072 aes_gcm_enc_final_aesni_avx(const struct aes_gcm_key_aesni
*key
,
1073 const u32 le_ctr
[4], u8 ghash_acc
[16],
1074 u64 total_aadlen
, u64 total_datalen
);
1076 aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10
*key
,
1077 const u32 le_ctr
[4], u8 ghash_acc
[16],
1078 u64 total_aadlen
, u64 total_datalen
);
1080 /* __always_inline to optimize out the branches based on @flags */
1081 static __always_inline
void
1082 aes_gcm_enc_final(const struct aes_gcm_key
*key
,
1083 const u32 le_ctr
[4], u8 ghash_acc
[16],
1084 u64 total_aadlen
, u64 total_datalen
, int flags
)
1086 if (flags
& (FLAG_AVX10_256
| FLAG_AVX10_512
))
1087 aes_gcm_enc_final_vaes_avx10(AES_GCM_KEY_AVX10(key
),
1089 total_aadlen
, total_datalen
);
1090 else if (flags
& FLAG_AVX
)
1091 aes_gcm_enc_final_aesni_avx(AES_GCM_KEY_AESNI(key
),
1093 total_aadlen
, total_datalen
);
1095 aes_gcm_enc_final_aesni(AES_GCM_KEY_AESNI(key
),
1097 total_aadlen
, total_datalen
);
1100 asmlinkage
bool __must_check
1101 aes_gcm_dec_final_aesni(const struct aes_gcm_key_aesni
*key
,
1102 const u32 le_ctr
[4], const u8 ghash_acc
[16],
1103 u64 total_aadlen
, u64 total_datalen
,
1104 const u8 tag
[16], int taglen
);
1105 asmlinkage
bool __must_check
1106 aes_gcm_dec_final_aesni_avx(const struct aes_gcm_key_aesni
*key
,
1107 const u32 le_ctr
[4], const u8 ghash_acc
[16],
1108 u64 total_aadlen
, u64 total_datalen
,
1109 const u8 tag
[16], int taglen
);
1110 asmlinkage
bool __must_check
1111 aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10
*key
,
1112 const u32 le_ctr
[4], const u8 ghash_acc
[16],
1113 u64 total_aadlen
, u64 total_datalen
,
1114 const u8 tag
[16], int taglen
);
1116 /* __always_inline to optimize out the branches based on @flags */
1117 static __always_inline
bool __must_check
1118 aes_gcm_dec_final(const struct aes_gcm_key
*key
, const u32 le_ctr
[4],
1119 u8 ghash_acc
[16], u64 total_aadlen
, u64 total_datalen
,
1120 u8 tag
[16], int taglen
, int flags
)
1122 if (flags
& (FLAG_AVX10_256
| FLAG_AVX10_512
))
1123 return aes_gcm_dec_final_vaes_avx10(AES_GCM_KEY_AVX10(key
),
1125 total_aadlen
, total_datalen
,
1127 else if (flags
& FLAG_AVX
)
1128 return aes_gcm_dec_final_aesni_avx(AES_GCM_KEY_AESNI(key
),
1130 total_aadlen
, total_datalen
,
1133 return aes_gcm_dec_final_aesni(AES_GCM_KEY_AESNI(key
),
1135 total_aadlen
, total_datalen
,
1140 * This is the Integrity Check Value (aka the authentication tag) length and can
1141 * be 8, 12 or 16 bytes long.
1143 static int common_rfc4106_set_authsize(struct crypto_aead
*aead
,
1144 unsigned int authsize
)
1158 static int generic_gcmaes_set_authsize(struct crypto_aead
*tfm
,
1159 unsigned int authsize
)
1178 * This is the setkey function for the x86_64 implementations of AES-GCM. It
1179 * saves the RFC4106 nonce if applicable, expands the AES key, and precomputes
1180 * powers of the hash key.
1182 * To comply with the crypto_aead API, this has to be usable in no-SIMD context.
1183 * For that reason, this function includes a portable C implementation of the
1184 * needed logic. However, the portable C implementation is very slow, taking
1185 * about the same time as encrypting 37 KB of data. To be ready for users that
1186 * may set a key even somewhat frequently, we therefore also include a SIMD
1187 * assembly implementation, expanding the AES key using AES-NI and precomputing
1188 * the hash key powers using PCLMULQDQ or VPCLMULQDQ.
1190 static int gcm_setkey(struct crypto_aead
*tfm
, const u8
*raw_key
,
1191 unsigned int keylen
, int flags
)
1193 struct aes_gcm_key
*key
= aes_gcm_key_get(tfm
, flags
);
1196 if (flags
& FLAG_RFC4106
) {
1200 key
->rfc4106_nonce
= get_unaligned_be32(raw_key
+ keylen
);
1203 /* The assembly code assumes the following offsets. */
1204 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni
, base
.aes_key
.key_enc
) != 0);
1205 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni
, base
.aes_key
.key_length
) != 480);
1206 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni
, h_powers
) != 496);
1207 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni
, h_powers_xored
) != 624);
1208 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni
, h_times_x64
) != 688);
1209 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10
, base
.aes_key
.key_enc
) != 0);
1210 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10
, base
.aes_key
.key_length
) != 480);
1211 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10
, h_powers
) != 512);
1212 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10
, padding
) != 768);
1214 if (likely(crypto_simd_usable())) {
1215 err
= aes_check_keylen(keylen
);
1219 aesni_set_key(&key
->aes_key
, raw_key
, keylen
);
1220 aes_gcm_precompute(key
, flags
);
1223 static const u8 x_to_the_minus1
[16] __aligned(__alignof__(be128
)) = {
1224 [0] = 0xc2, [15] = 1
1226 static const u8 x_to_the_63
[16] __aligned(__alignof__(be128
)) = {
1233 err
= aes_expandkey(&key
->aes_key
, raw_key
, keylen
);
1237 /* Encrypt the all-zeroes block to get the hash key H^1 */
1238 aes_encrypt(&key
->aes_key
, (u8
*)&h1
, (u8
*)&h1
);
1240 /* Compute H^1 * x^-1 */
1242 gf128mul_lle(&h
, (const be128
*)x_to_the_minus1
);
1244 /* Compute the needed key powers */
1245 if (flags
& (FLAG_AVX10_256
| FLAG_AVX10_512
)) {
1246 struct aes_gcm_key_avx10
*k
= AES_GCM_KEY_AVX10(key
);
1248 for (i
= ARRAY_SIZE(k
->h_powers
) - 1; i
>= 0; i
--) {
1249 k
->h_powers
[i
][0] = be64_to_cpu(h
.b
);
1250 k
->h_powers
[i
][1] = be64_to_cpu(h
.a
);
1251 gf128mul_lle(&h
, &h1
);
1253 memset(k
->padding
, 0, sizeof(k
->padding
));
1255 struct aes_gcm_key_aesni
*k
= AES_GCM_KEY_AESNI(key
);
1257 for (i
= ARRAY_SIZE(k
->h_powers
) - 1; i
>= 0; i
--) {
1258 k
->h_powers
[i
][0] = be64_to_cpu(h
.b
);
1259 k
->h_powers
[i
][1] = be64_to_cpu(h
.a
);
1260 k
->h_powers_xored
[i
] = k
->h_powers
[i
][0] ^
1262 gf128mul_lle(&h
, &h1
);
1264 gf128mul_lle(&h1
, (const be128
*)x_to_the_63
);
1265 k
->h_times_x64
[0] = be64_to_cpu(h1
.b
);
1266 k
->h_times_x64
[1] = be64_to_cpu(h1
.a
);
1273 * Initialize @ghash_acc, then pass all @assoclen bytes of associated data
1274 * (a.k.a. additional authenticated data) from @sg_src through the GHASH update
1275 * assembly function. kernel_fpu_begin() must have already been called.
1277 static void gcm_process_assoc(const struct aes_gcm_key
*key
, u8 ghash_acc
[16],
1278 struct scatterlist
*sg_src
, unsigned int assoclen
,
1281 struct scatter_walk walk
;
1283 * The assembly function requires that the length of any non-last
1284 * segment of associated data be a multiple of 16 bytes, so this
1285 * function does the buffering needed to achieve that.
1287 unsigned int pos
= 0;
1290 memset(ghash_acc
, 0, 16);
1291 scatterwalk_start(&walk
, sg_src
);
1294 unsigned int len_this_page
= scatterwalk_clamp(&walk
, assoclen
);
1295 void *mapped
= scatterwalk_map(&walk
);
1296 const void *src
= mapped
;
1299 assoclen
-= len_this_page
;
1300 scatterwalk_advance(&walk
, len_this_page
);
1301 if (unlikely(pos
)) {
1302 len
= min(len_this_page
, 16 - pos
);
1303 memcpy(&buf
[pos
], src
, len
);
1306 len_this_page
-= len
;
1309 aes_gcm_aad_update(key
, ghash_acc
, buf
, 16, flags
);
1312 len
= len_this_page
;
1313 if (unlikely(assoclen
)) /* Not the last segment yet? */
1314 len
= round_down(len
, 16);
1315 aes_gcm_aad_update(key
, ghash_acc
, src
, len
, flags
);
1317 len_this_page
-= len
;
1318 if (unlikely(len_this_page
)) {
1319 memcpy(buf
, src
, len_this_page
);
1320 pos
= len_this_page
;
1323 scatterwalk_unmap(mapped
);
1324 scatterwalk_pagedone(&walk
, 0, assoclen
);
1325 if (need_resched()) {
1331 aes_gcm_aad_update(key
, ghash_acc
, buf
, pos
, flags
);
1335 /* __always_inline to optimize out the branches based on @flags */
1336 static __always_inline
int
1337 gcm_crypt(struct aead_request
*req
, int flags
)
1339 struct crypto_aead
*tfm
= crypto_aead_reqtfm(req
);
1340 const struct aes_gcm_key
*key
= aes_gcm_key_get(tfm
, flags
);
1341 unsigned int assoclen
= req
->assoclen
;
1342 struct skcipher_walk walk
;
1343 unsigned int nbytes
;
1344 u8 ghash_acc
[16]; /* GHASH accumulator */
1345 u32 le_ctr
[4]; /* Counter in little-endian format */
1349 /* Initialize the counter and determine the associated data length. */
1351 if (flags
& FLAG_RFC4106
) {
1352 if (unlikely(assoclen
!= 16 && assoclen
!= 20))
1355 le_ctr
[1] = get_unaligned_be32(req
->iv
+ 4);
1356 le_ctr
[2] = get_unaligned_be32(req
->iv
+ 0);
1357 le_ctr
[3] = key
->rfc4106_nonce
; /* already byte-swapped */
1359 le_ctr
[1] = get_unaligned_be32(req
->iv
+ 8);
1360 le_ctr
[2] = get_unaligned_be32(req
->iv
+ 4);
1361 le_ctr
[3] = get_unaligned_be32(req
->iv
+ 0);
1364 /* Begin walking through the plaintext or ciphertext. */
1365 if (flags
& FLAG_ENC
)
1366 err
= skcipher_walk_aead_encrypt(&walk
, req
, false);
1368 err
= skcipher_walk_aead_decrypt(&walk
, req
, false);
1373 * Since the AES-GCM assembly code requires that at least three assembly
1374 * functions be called to process any message (this is needed to support
1375 * incremental updates cleanly), to reduce overhead we try to do all
1376 * three calls in the same kernel FPU section if possible. We close the
1377 * section and start a new one if there are multiple data segments or if
1378 * rescheduling is needed while processing the associated data.
1382 /* Pass the associated data through GHASH. */
1383 gcm_process_assoc(key
, ghash_acc
, req
->src
, assoclen
, flags
);
1385 /* En/decrypt the data and pass the ciphertext through GHASH. */
1386 while (unlikely((nbytes
= walk
.nbytes
) < walk
.total
)) {
1388 * Non-last segment. In this case, the assembly function
1389 * requires that the length be a multiple of 16 (AES_BLOCK_SIZE)
1390 * bytes. The needed buffering of up to 16 bytes is handled by
1391 * the skcipher_walk. Here we just need to round down to a
1394 nbytes
= round_down(nbytes
, AES_BLOCK_SIZE
);
1395 aes_gcm_update(key
, le_ctr
, ghash_acc
, walk
.src
.virt
.addr
,
1396 walk
.dst
.virt
.addr
, nbytes
, flags
);
1397 le_ctr
[0] += nbytes
/ AES_BLOCK_SIZE
;
1399 err
= skcipher_walk_done(&walk
, walk
.nbytes
- nbytes
);
1404 /* Last segment: process all remaining data. */
1405 aes_gcm_update(key
, le_ctr
, ghash_acc
, walk
.src
.virt
.addr
,
1406 walk
.dst
.virt
.addr
, nbytes
, flags
);
1408 * The low word of the counter isn't used by the finalize, so there's no
1409 * need to increment it here.
1413 taglen
= crypto_aead_authsize(tfm
);
1414 if (flags
& FLAG_ENC
) {
1415 /* Finish computing the auth tag. */
1416 aes_gcm_enc_final(key
, le_ctr
, ghash_acc
, assoclen
,
1417 req
->cryptlen
, flags
);
1419 /* Store the computed auth tag in the dst scatterlist. */
1420 scatterwalk_map_and_copy(ghash_acc
, req
->dst
, req
->assoclen
+
1421 req
->cryptlen
, taglen
, 1);
1423 unsigned int datalen
= req
->cryptlen
- taglen
;
1426 /* Get the transmitted auth tag from the src scatterlist. */
1427 scatterwalk_map_and_copy(tag
, req
->src
, req
->assoclen
+ datalen
,
1430 * Finish computing the auth tag and compare it to the
1431 * transmitted one. The assembly function does the actual tag
1432 * comparison. Here, just check the boolean result.
1434 if (!aes_gcm_dec_final(key
, le_ctr
, ghash_acc
, assoclen
,
1435 datalen
, tag
, taglen
, flags
))
1440 skcipher_walk_done(&walk
, 0);
1444 #define DEFINE_GCM_ALGS(suffix, flags, generic_driver_name, rfc_driver_name, \
1445 ctxsize, priority) \
1447 static int gcm_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \
1448 unsigned int keylen) \
1450 return gcm_setkey(tfm, raw_key, keylen, (flags)); \
1453 static int gcm_encrypt_##suffix(struct aead_request *req) \
1455 return gcm_crypt(req, (flags) | FLAG_ENC); \
1458 static int gcm_decrypt_##suffix(struct aead_request *req) \
1460 return gcm_crypt(req, (flags)); \
1463 static int rfc4106_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \
1464 unsigned int keylen) \
1466 return gcm_setkey(tfm, raw_key, keylen, (flags) | FLAG_RFC4106); \
1469 static int rfc4106_encrypt_##suffix(struct aead_request *req) \
1471 return gcm_crypt(req, (flags) | FLAG_RFC4106 | FLAG_ENC); \
1474 static int rfc4106_decrypt_##suffix(struct aead_request *req) \
1476 return gcm_crypt(req, (flags) | FLAG_RFC4106); \
1479 static struct aead_alg aes_gcm_algs_##suffix[] = { { \
1480 .setkey = gcm_setkey_##suffix, \
1481 .setauthsize = generic_gcmaes_set_authsize, \
1482 .encrypt = gcm_encrypt_##suffix, \
1483 .decrypt = gcm_decrypt_##suffix, \
1484 .ivsize = GCM_AES_IV_SIZE, \
1485 .chunksize = AES_BLOCK_SIZE, \
1486 .maxauthsize = 16, \
1488 .cra_name = "__gcm(aes)", \
1489 .cra_driver_name = "__" generic_driver_name, \
1490 .cra_priority = (priority), \
1491 .cra_flags = CRYPTO_ALG_INTERNAL, \
1492 .cra_blocksize = 1, \
1493 .cra_ctxsize = (ctxsize), \
1494 .cra_module = THIS_MODULE, \
1497 .setkey = rfc4106_setkey_##suffix, \
1498 .setauthsize = common_rfc4106_set_authsize, \
1499 .encrypt = rfc4106_encrypt_##suffix, \
1500 .decrypt = rfc4106_decrypt_##suffix, \
1501 .ivsize = GCM_RFC4106_IV_SIZE, \
1502 .chunksize = AES_BLOCK_SIZE, \
1503 .maxauthsize = 16, \
1505 .cra_name = "__rfc4106(gcm(aes))", \
1506 .cra_driver_name = "__" rfc_driver_name, \
1507 .cra_priority = (priority), \
1508 .cra_flags = CRYPTO_ALG_INTERNAL, \
1509 .cra_blocksize = 1, \
1510 .cra_ctxsize = (ctxsize), \
1511 .cra_module = THIS_MODULE, \
1515 static struct simd_aead_alg *aes_gcm_simdalgs_##suffix[2] \
1517 /* aes_gcm_algs_aesni */
1518 DEFINE_GCM_ALGS(aesni
, /* no flags */ 0,
1519 "generic-gcm-aesni", "rfc4106-gcm-aesni",
1520 AES_GCM_KEY_AESNI_SIZE
, 400);
1522 /* aes_gcm_algs_aesni_avx */
1523 DEFINE_GCM_ALGS(aesni_avx
, FLAG_AVX
,
1524 "generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx",
1525 AES_GCM_KEY_AESNI_SIZE
, 500);
1527 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
1528 /* aes_gcm_algs_vaes_avx10_256 */
1529 DEFINE_GCM_ALGS(vaes_avx10_256
, FLAG_AVX10_256
,
1530 "generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256",
1531 AES_GCM_KEY_AVX10_SIZE
, 700);
1533 /* aes_gcm_algs_vaes_avx10_512 */
1534 DEFINE_GCM_ALGS(vaes_avx10_512
, FLAG_AVX10_512
,
1535 "generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512",
1536 AES_GCM_KEY_AVX10_SIZE
, 800);
1537 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
1540 * This is a list of CPU models that are known to suffer from downclocking when
1541 * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode
1542 * implementations with zmm registers won't be used by default. Implementations
1543 * with ymm registers (256-bit vectors) will be used by default instead.
1545 static const struct x86_cpu_id zmm_exclusion_list
[] = {
1546 X86_MATCH_VFM(INTEL_SKYLAKE_X
, 0),
1547 X86_MATCH_VFM(INTEL_ICELAKE_X
, 0),
1548 X86_MATCH_VFM(INTEL_ICELAKE_D
, 0),
1549 X86_MATCH_VFM(INTEL_ICELAKE
, 0),
1550 X86_MATCH_VFM(INTEL_ICELAKE_L
, 0),
1551 X86_MATCH_VFM(INTEL_ICELAKE_NNPI
, 0),
1552 X86_MATCH_VFM(INTEL_TIGERLAKE_L
, 0),
1553 X86_MATCH_VFM(INTEL_TIGERLAKE
, 0),
1554 /* Allow Rocket Lake and later, and Sapphire Rapids and later. */
1555 /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */
1559 static int __init
register_avx_algs(void)
1563 if (!boot_cpu_has(X86_FEATURE_AVX
))
1565 err
= simd_register_skciphers_compat(&aes_xts_alg_aesni_avx
, 1,
1566 &aes_xts_simdalg_aesni_avx
);
1569 err
= simd_register_aeads_compat(aes_gcm_algs_aesni_avx
,
1570 ARRAY_SIZE(aes_gcm_algs_aesni_avx
),
1571 aes_gcm_simdalgs_aesni_avx
);
1574 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
1575 if (!boot_cpu_has(X86_FEATURE_AVX2
) ||
1576 !boot_cpu_has(X86_FEATURE_VAES
) ||
1577 !boot_cpu_has(X86_FEATURE_VPCLMULQDQ
) ||
1578 !boot_cpu_has(X86_FEATURE_PCLMULQDQ
) ||
1579 !cpu_has_xfeatures(XFEATURE_MASK_SSE
| XFEATURE_MASK_YMM
, NULL
))
1581 err
= simd_register_skciphers_compat(&aes_xts_alg_vaes_avx2
, 1,
1582 &aes_xts_simdalg_vaes_avx2
);
1586 if (!boot_cpu_has(X86_FEATURE_AVX512BW
) ||
1587 !boot_cpu_has(X86_FEATURE_AVX512VL
) ||
1588 !boot_cpu_has(X86_FEATURE_BMI2
) ||
1589 !cpu_has_xfeatures(XFEATURE_MASK_SSE
| XFEATURE_MASK_YMM
|
1590 XFEATURE_MASK_AVX512
, NULL
))
1593 err
= simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_256
, 1,
1594 &aes_xts_simdalg_vaes_avx10_256
);
1597 err
= simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256
,
1598 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256
),
1599 aes_gcm_simdalgs_vaes_avx10_256
);
1603 if (x86_match_cpu(zmm_exclusion_list
)) {
1606 aes_xts_alg_vaes_avx10_512
.base
.cra_priority
= 1;
1607 for (i
= 0; i
< ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512
); i
++)
1608 aes_gcm_algs_vaes_avx10_512
[i
].base
.cra_priority
= 1;
1611 err
= simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512
, 1,
1612 &aes_xts_simdalg_vaes_avx10_512
);
1615 err
= simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512
,
1616 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512
),
1617 aes_gcm_simdalgs_vaes_avx10_512
);
1620 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
1624 static void unregister_avx_algs(void)
1626 if (aes_xts_simdalg_aesni_avx
)
1627 simd_unregister_skciphers(&aes_xts_alg_aesni_avx
, 1,
1628 &aes_xts_simdalg_aesni_avx
);
1629 if (aes_gcm_simdalgs_aesni_avx
[0])
1630 simd_unregister_aeads(aes_gcm_algs_aesni_avx
,
1631 ARRAY_SIZE(aes_gcm_algs_aesni_avx
),
1632 aes_gcm_simdalgs_aesni_avx
);
1633 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
1634 if (aes_xts_simdalg_vaes_avx2
)
1635 simd_unregister_skciphers(&aes_xts_alg_vaes_avx2
, 1,
1636 &aes_xts_simdalg_vaes_avx2
);
1637 if (aes_xts_simdalg_vaes_avx10_256
)
1638 simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256
, 1,
1639 &aes_xts_simdalg_vaes_avx10_256
);
1640 if (aes_gcm_simdalgs_vaes_avx10_256
[0])
1641 simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256
,
1642 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256
),
1643 aes_gcm_simdalgs_vaes_avx10_256
);
1644 if (aes_xts_simdalg_vaes_avx10_512
)
1645 simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512
, 1,
1646 &aes_xts_simdalg_vaes_avx10_512
);
1647 if (aes_gcm_simdalgs_vaes_avx10_512
[0])
1648 simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512
,
1649 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512
),
1650 aes_gcm_simdalgs_vaes_avx10_512
);
1653 #else /* CONFIG_X86_64 */
1654 static struct aead_alg aes_gcm_algs_aesni
[0];
1655 static struct simd_aead_alg
*aes_gcm_simdalgs_aesni
[0];
1657 static int __init
register_avx_algs(void)
1662 static void unregister_avx_algs(void)
1665 #endif /* !CONFIG_X86_64 */
1667 static const struct x86_cpu_id aesni_cpu_id
[] = {
1668 X86_MATCH_FEATURE(X86_FEATURE_AES
, NULL
),
1671 MODULE_DEVICE_TABLE(x86cpu
, aesni_cpu_id
);
1673 static int __init
aesni_init(void)
1677 if (!x86_match_cpu(aesni_cpu_id
))
1679 #ifdef CONFIG_X86_64
1680 if (boot_cpu_has(X86_FEATURE_AVX
)) {
1681 /* optimize performance of ctr mode encryption transform */
1682 static_call_update(aesni_ctr_enc_tfm
, aesni_ctr_enc_avx_tfm
);
1683 pr_info("AES CTR mode by8 optimization enabled\n");
1685 #endif /* CONFIG_X86_64 */
1687 err
= crypto_register_alg(&aesni_cipher_alg
);
1691 err
= simd_register_skciphers_compat(aesni_skciphers
,
1692 ARRAY_SIZE(aesni_skciphers
),
1693 aesni_simd_skciphers
);
1695 goto unregister_cipher
;
1697 err
= simd_register_aeads_compat(aes_gcm_algs_aesni
,
1698 ARRAY_SIZE(aes_gcm_algs_aesni
),
1699 aes_gcm_simdalgs_aesni
);
1701 goto unregister_skciphers
;
1703 #ifdef CONFIG_X86_64
1704 if (boot_cpu_has(X86_FEATURE_AVX
))
1705 err
= simd_register_skciphers_compat(&aesni_xctr
, 1,
1708 goto unregister_aeads
;
1709 #endif /* CONFIG_X86_64 */
1711 err
= register_avx_algs();
1713 goto unregister_avx
;
1718 unregister_avx_algs();
1719 #ifdef CONFIG_X86_64
1720 if (aesni_simd_xctr
)
1721 simd_unregister_skciphers(&aesni_xctr
, 1, &aesni_simd_xctr
);
1723 #endif /* CONFIG_X86_64 */
1724 simd_unregister_aeads(aes_gcm_algs_aesni
,
1725 ARRAY_SIZE(aes_gcm_algs_aesni
),
1726 aes_gcm_simdalgs_aesni
);
1727 unregister_skciphers
:
1728 simd_unregister_skciphers(aesni_skciphers
, ARRAY_SIZE(aesni_skciphers
),
1729 aesni_simd_skciphers
);
1731 crypto_unregister_alg(&aesni_cipher_alg
);
1735 static void __exit
aesni_exit(void)
1737 simd_unregister_aeads(aes_gcm_algs_aesni
,
1738 ARRAY_SIZE(aes_gcm_algs_aesni
),
1739 aes_gcm_simdalgs_aesni
);
1740 simd_unregister_skciphers(aesni_skciphers
, ARRAY_SIZE(aesni_skciphers
),
1741 aesni_simd_skciphers
);
1742 crypto_unregister_alg(&aesni_cipher_alg
);
1743 #ifdef CONFIG_X86_64
1744 if (boot_cpu_has(X86_FEATURE_AVX
))
1745 simd_unregister_skciphers(&aesni_xctr
, 1, &aesni_simd_xctr
);
1746 #endif /* CONFIG_X86_64 */
1747 unregister_avx_algs();
1750 module_init(aesni_init
);
1751 module_exit(aesni_exit
);
1753 MODULE_DESCRIPTION("AES cipher and modes, optimized with AES-NI or VAES instructions");
1754 MODULE_LICENSE("GPL");
1755 MODULE_ALIAS_CRYPTO("aes");