1 // SPDX-License-Identifier: GPL-2.0-only
3 * Accelerated GHASH implementation with ARMv8 PMULL instructions.
5 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
10 #include <asm/unaligned.h>
11 #include <crypto/aes.h>
12 #include <crypto/algapi.h>
13 #include <crypto/b128ops.h>
14 #include <crypto/gf128mul.h>
15 #include <crypto/internal/aead.h>
16 #include <crypto/internal/hash.h>
17 #include <crypto/internal/simd.h>
18 #include <crypto/internal/skcipher.h>
19 #include <crypto/scatterwalk.h>
20 #include <linux/cpufeature.h>
21 #include <linux/crypto.h>
22 #include <linux/module.h>
24 MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
25 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
26 MODULE_LICENSE("GPL v2");
27 MODULE_ALIAS_CRYPTO("ghash");
29 #define GHASH_BLOCK_SIZE 16
30 #define GHASH_DIGEST_SIZE 16
31 #define GCM_IV_SIZE 12
42 struct ghash_desc_ctx
{
43 u64 digest
[GHASH_DIGEST_SIZE
/sizeof(u64
)];
44 u8 buf
[GHASH_BLOCK_SIZE
];
49 struct crypto_aes_ctx aes_key
;
50 struct ghash_key ghash_key
;
53 asmlinkage
void pmull_ghash_update_p64(int blocks
, u64 dg
[], const char *src
,
54 struct ghash_key
const *k
,
57 asmlinkage
void pmull_ghash_update_p8(int blocks
, u64 dg
[], const char *src
,
58 struct ghash_key
const *k
,
61 asmlinkage
void pmull_gcm_encrypt(int bytes
, u8 dst
[], const u8 src
[],
62 struct ghash_key
const *k
, u64 dg
[],
63 u8 ctr
[], u32
const rk
[], int rounds
,
66 asmlinkage
void pmull_gcm_decrypt(int bytes
, u8 dst
[], const u8 src
[],
67 struct ghash_key
const *k
, u64 dg
[],
68 u8 ctr
[], u32
const rk
[], int rounds
,
71 static int ghash_init(struct shash_desc
*desc
)
73 struct ghash_desc_ctx
*ctx
= shash_desc_ctx(desc
);
75 *ctx
= (struct ghash_desc_ctx
){};
79 static void ghash_do_update(int blocks
, u64 dg
[], const char *src
,
80 struct ghash_key
*key
, const char *head
,
81 void (*simd_update
)(int blocks
, u64 dg
[],
83 struct ghash_key
const *k
,
86 if (likely(crypto_simd_usable() && simd_update
)) {
88 simd_update(blocks
, dg
, src
, key
, head
);
91 be128 dst
= { cpu_to_be64(dg
[1]), cpu_to_be64(dg
[0]) };
101 src
+= GHASH_BLOCK_SIZE
;
104 crypto_xor((u8
*)&dst
, in
, GHASH_BLOCK_SIZE
);
105 gf128mul_lle(&dst
, &key
->k
);
108 dg
[0] = be64_to_cpu(dst
.b
);
109 dg
[1] = be64_to_cpu(dst
.a
);
113 /* avoid hogging the CPU for too long */
114 #define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE)
116 static int __ghash_update(struct shash_desc
*desc
, const u8
*src
,
118 void (*simd_update
)(int blocks
, u64 dg
[],
120 struct ghash_key
const *k
,
123 struct ghash_desc_ctx
*ctx
= shash_desc_ctx(desc
);
124 unsigned int partial
= ctx
->count
% GHASH_BLOCK_SIZE
;
128 if ((partial
+ len
) >= GHASH_BLOCK_SIZE
) {
129 struct ghash_key
*key
= crypto_shash_ctx(desc
->tfm
);
133 int p
= GHASH_BLOCK_SIZE
- partial
;
135 memcpy(ctx
->buf
+ partial
, src
, p
);
140 blocks
= len
/ GHASH_BLOCK_SIZE
;
141 len
%= GHASH_BLOCK_SIZE
;
144 int chunk
= min(blocks
, MAX_BLOCKS
);
146 ghash_do_update(chunk
, ctx
->digest
, src
, key
,
147 partial
? ctx
->buf
: NULL
,
151 src
+= chunk
* GHASH_BLOCK_SIZE
;
153 } while (unlikely(blocks
> 0));
156 memcpy(ctx
->buf
+ partial
, src
, len
);
160 static int ghash_update_p8(struct shash_desc
*desc
, const u8
*src
,
163 return __ghash_update(desc
, src
, len
, pmull_ghash_update_p8
);
166 static int ghash_update_p64(struct shash_desc
*desc
, const u8
*src
,
169 return __ghash_update(desc
, src
, len
, pmull_ghash_update_p64
);
172 static int ghash_final_p8(struct shash_desc
*desc
, u8
*dst
)
174 struct ghash_desc_ctx
*ctx
= shash_desc_ctx(desc
);
175 unsigned int partial
= ctx
->count
% GHASH_BLOCK_SIZE
;
178 struct ghash_key
*key
= crypto_shash_ctx(desc
->tfm
);
180 memset(ctx
->buf
+ partial
, 0, GHASH_BLOCK_SIZE
- partial
);
182 ghash_do_update(1, ctx
->digest
, ctx
->buf
, key
, NULL
,
183 pmull_ghash_update_p8
);
185 put_unaligned_be64(ctx
->digest
[1], dst
);
186 put_unaligned_be64(ctx
->digest
[0], dst
+ 8);
188 *ctx
= (struct ghash_desc_ctx
){};
192 static int ghash_final_p64(struct shash_desc
*desc
, u8
*dst
)
194 struct ghash_desc_ctx
*ctx
= shash_desc_ctx(desc
);
195 unsigned int partial
= ctx
->count
% GHASH_BLOCK_SIZE
;
198 struct ghash_key
*key
= crypto_shash_ctx(desc
->tfm
);
200 memset(ctx
->buf
+ partial
, 0, GHASH_BLOCK_SIZE
- partial
);
202 ghash_do_update(1, ctx
->digest
, ctx
->buf
, key
, NULL
,
203 pmull_ghash_update_p64
);
205 put_unaligned_be64(ctx
->digest
[1], dst
);
206 put_unaligned_be64(ctx
->digest
[0], dst
+ 8);
208 *ctx
= (struct ghash_desc_ctx
){};
212 static void ghash_reflect(u64 h
[], const be128
*k
)
214 u64 carry
= be64_to_cpu(k
->a
) & BIT(63) ? 1 : 0;
216 h
[0] = (be64_to_cpu(k
->b
) << 1) | carry
;
217 h
[1] = (be64_to_cpu(k
->a
) << 1) | (be64_to_cpu(k
->b
) >> 63);
220 h
[1] ^= 0xc200000000000000UL
;
223 static int __ghash_setkey(struct ghash_key
*key
,
224 const u8
*inkey
, unsigned int keylen
)
228 /* needed for the fallback */
229 memcpy(&key
->k
, inkey
, GHASH_BLOCK_SIZE
);
231 ghash_reflect(key
->h
, &key
->k
);
234 gf128mul_lle(&h
, &key
->k
);
235 ghash_reflect(key
->h2
, &h
);
237 gf128mul_lle(&h
, &key
->k
);
238 ghash_reflect(key
->h3
, &h
);
240 gf128mul_lle(&h
, &key
->k
);
241 ghash_reflect(key
->h4
, &h
);
246 static int ghash_setkey(struct crypto_shash
*tfm
,
247 const u8
*inkey
, unsigned int keylen
)
249 struct ghash_key
*key
= crypto_shash_ctx(tfm
);
251 if (keylen
!= GHASH_BLOCK_SIZE
)
254 return __ghash_setkey(key
, inkey
, keylen
);
257 static struct shash_alg ghash_alg
[] = {{
258 .base
.cra_name
= "ghash",
259 .base
.cra_driver_name
= "ghash-neon",
260 .base
.cra_priority
= 150,
261 .base
.cra_blocksize
= GHASH_BLOCK_SIZE
,
262 .base
.cra_ctxsize
= sizeof(struct ghash_key
),
263 .base
.cra_module
= THIS_MODULE
,
265 .digestsize
= GHASH_DIGEST_SIZE
,
267 .update
= ghash_update_p8
,
268 .final
= ghash_final_p8
,
269 .setkey
= ghash_setkey
,
270 .descsize
= sizeof(struct ghash_desc_ctx
),
272 .base
.cra_name
= "ghash",
273 .base
.cra_driver_name
= "ghash-ce",
274 .base
.cra_priority
= 200,
275 .base
.cra_blocksize
= GHASH_BLOCK_SIZE
,
276 .base
.cra_ctxsize
= sizeof(struct ghash_key
),
277 .base
.cra_module
= THIS_MODULE
,
279 .digestsize
= GHASH_DIGEST_SIZE
,
281 .update
= ghash_update_p64
,
282 .final
= ghash_final_p64
,
283 .setkey
= ghash_setkey
,
284 .descsize
= sizeof(struct ghash_desc_ctx
),
287 static int num_rounds(struct crypto_aes_ctx
*ctx
)
290 * # of rounds specified by AES:
291 * 128 bit key 10 rounds
292 * 192 bit key 12 rounds
293 * 256 bit key 14 rounds
294 * => n byte key => 6 + (n/4) rounds
296 return 6 + ctx
->key_length
/ 4;
299 static int gcm_setkey(struct crypto_aead
*tfm
, const u8
*inkey
,
302 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(tfm
);
303 u8 key
[GHASH_BLOCK_SIZE
];
306 ret
= aes_expandkey(&ctx
->aes_key
, inkey
, keylen
);
310 aes_encrypt(&ctx
->aes_key
, key
, (u8
[AES_BLOCK_SIZE
]){});
312 return __ghash_setkey(&ctx
->ghash_key
, key
, sizeof(be128
));
315 static int gcm_setauthsize(struct crypto_aead
*tfm
, unsigned int authsize
)
328 static void gcm_update_mac(u64 dg
[], const u8
*src
, int count
, u8 buf
[],
329 int *buf_count
, struct gcm_aes_ctx
*ctx
)
331 if (*buf_count
> 0) {
332 int buf_added
= min(count
, GHASH_BLOCK_SIZE
- *buf_count
);
334 memcpy(&buf
[*buf_count
], src
, buf_added
);
336 *buf_count
+= buf_added
;
341 if (count
>= GHASH_BLOCK_SIZE
|| *buf_count
== GHASH_BLOCK_SIZE
) {
342 int blocks
= count
/ GHASH_BLOCK_SIZE
;
344 ghash_do_update(blocks
, dg
, src
, &ctx
->ghash_key
,
345 *buf_count
? buf
: NULL
,
346 pmull_ghash_update_p64
);
348 src
+= blocks
* GHASH_BLOCK_SIZE
;
349 count
%= GHASH_BLOCK_SIZE
;
354 memcpy(buf
, src
, count
);
359 static void gcm_calculate_auth_mac(struct aead_request
*req
, u64 dg
[])
361 struct crypto_aead
*aead
= crypto_aead_reqtfm(req
);
362 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(aead
);
363 u8 buf
[GHASH_BLOCK_SIZE
];
364 struct scatter_walk walk
;
365 u32 len
= req
->assoclen
;
368 scatterwalk_start(&walk
, req
->src
);
371 u32 n
= scatterwalk_clamp(&walk
, len
);
375 scatterwalk_start(&walk
, sg_next(walk
.sg
));
376 n
= scatterwalk_clamp(&walk
, len
);
378 p
= scatterwalk_map(&walk
);
380 gcm_update_mac(dg
, p
, n
, buf
, &buf_count
, ctx
);
383 scatterwalk_unmap(p
);
384 scatterwalk_advance(&walk
, n
);
385 scatterwalk_done(&walk
, 0, len
);
389 memset(&buf
[buf_count
], 0, GHASH_BLOCK_SIZE
- buf_count
);
390 ghash_do_update(1, dg
, buf
, &ctx
->ghash_key
, NULL
,
391 pmull_ghash_update_p64
);
395 static int gcm_encrypt(struct aead_request
*req
)
397 struct crypto_aead
*aead
= crypto_aead_reqtfm(req
);
398 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(aead
);
399 int nrounds
= num_rounds(&ctx
->aes_key
);
400 struct skcipher_walk walk
;
401 u8 buf
[AES_BLOCK_SIZE
];
402 u8 iv
[AES_BLOCK_SIZE
];
408 lengths
.a
= cpu_to_be64(req
->assoclen
* 8);
409 lengths
.b
= cpu_to_be64(req
->cryptlen
* 8);
412 gcm_calculate_auth_mac(req
, dg
);
414 memcpy(iv
, req
->iv
, GCM_IV_SIZE
);
415 put_unaligned_be32(2, iv
+ GCM_IV_SIZE
);
417 err
= skcipher_walk_aead_encrypt(&walk
, req
, false);
419 if (likely(crypto_simd_usable())) {
421 const u8
*src
= walk
.src
.virt
.addr
;
422 u8
*dst
= walk
.dst
.virt
.addr
;
423 int nbytes
= walk
.nbytes
;
425 tag
= (u8
*)&lengths
;
427 if (unlikely(nbytes
> 0 && nbytes
< AES_BLOCK_SIZE
)) {
428 src
= dst
= memcpy(buf
+ sizeof(buf
) - nbytes
,
430 } else if (nbytes
< walk
.total
) {
431 nbytes
&= ~(AES_BLOCK_SIZE
- 1);
436 pmull_gcm_encrypt(nbytes
, dst
, src
, &ctx
->ghash_key
, dg
,
437 iv
, ctx
->aes_key
.key_enc
, nrounds
,
441 if (unlikely(!nbytes
))
444 if (unlikely(nbytes
> 0 && nbytes
< AES_BLOCK_SIZE
))
445 memcpy(walk
.dst
.virt
.addr
,
446 buf
+ sizeof(buf
) - nbytes
, nbytes
);
448 err
= skcipher_walk_done(&walk
, walk
.nbytes
- nbytes
);
449 } while (walk
.nbytes
);
451 while (walk
.nbytes
>= AES_BLOCK_SIZE
) {
452 int blocks
= walk
.nbytes
/ AES_BLOCK_SIZE
;
453 const u8
*src
= walk
.src
.virt
.addr
;
454 u8
*dst
= walk
.dst
.virt
.addr
;
455 int remaining
= blocks
;
458 aes_encrypt(&ctx
->aes_key
, buf
, iv
);
459 crypto_xor_cpy(dst
, src
, buf
, AES_BLOCK_SIZE
);
460 crypto_inc(iv
, AES_BLOCK_SIZE
);
462 dst
+= AES_BLOCK_SIZE
;
463 src
+= AES_BLOCK_SIZE
;
464 } while (--remaining
> 0);
466 ghash_do_update(blocks
, dg
, walk
.dst
.virt
.addr
,
467 &ctx
->ghash_key
, NULL
, NULL
);
469 err
= skcipher_walk_done(&walk
,
470 walk
.nbytes
% AES_BLOCK_SIZE
);
473 /* handle the tail */
475 aes_encrypt(&ctx
->aes_key
, buf
, iv
);
477 crypto_xor_cpy(walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
480 memcpy(buf
, walk
.dst
.virt
.addr
, walk
.nbytes
);
481 memset(buf
+ walk
.nbytes
, 0, sizeof(buf
) - walk
.nbytes
);
484 tag
= (u8
*)&lengths
;
485 ghash_do_update(1, dg
, tag
, &ctx
->ghash_key
,
486 walk
.nbytes
? buf
: NULL
, NULL
);
489 err
= skcipher_walk_done(&walk
, 0);
491 put_unaligned_be64(dg
[1], tag
);
492 put_unaligned_be64(dg
[0], tag
+ 8);
493 put_unaligned_be32(1, iv
+ GCM_IV_SIZE
);
494 aes_encrypt(&ctx
->aes_key
, iv
, iv
);
495 crypto_xor(tag
, iv
, AES_BLOCK_SIZE
);
501 /* copy authtag to end of dst */
502 scatterwalk_map_and_copy(tag
, req
->dst
, req
->assoclen
+ req
->cryptlen
,
503 crypto_aead_authsize(aead
), 1);
508 static int gcm_decrypt(struct aead_request
*req
)
510 struct crypto_aead
*aead
= crypto_aead_reqtfm(req
);
511 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(aead
);
512 unsigned int authsize
= crypto_aead_authsize(aead
);
513 int nrounds
= num_rounds(&ctx
->aes_key
);
514 struct skcipher_walk walk
;
515 u8 buf
[AES_BLOCK_SIZE
];
516 u8 iv
[AES_BLOCK_SIZE
];
522 lengths
.a
= cpu_to_be64(req
->assoclen
* 8);
523 lengths
.b
= cpu_to_be64((req
->cryptlen
- authsize
) * 8);
526 gcm_calculate_auth_mac(req
, dg
);
528 memcpy(iv
, req
->iv
, GCM_IV_SIZE
);
529 put_unaligned_be32(2, iv
+ GCM_IV_SIZE
);
531 err
= skcipher_walk_aead_decrypt(&walk
, req
, false);
533 if (likely(crypto_simd_usable())) {
535 const u8
*src
= walk
.src
.virt
.addr
;
536 u8
*dst
= walk
.dst
.virt
.addr
;
537 int nbytes
= walk
.nbytes
;
539 tag
= (u8
*)&lengths
;
541 if (unlikely(nbytes
> 0 && nbytes
< AES_BLOCK_SIZE
)) {
542 src
= dst
= memcpy(buf
+ sizeof(buf
) - nbytes
,
544 } else if (nbytes
< walk
.total
) {
545 nbytes
&= ~(AES_BLOCK_SIZE
- 1);
550 pmull_gcm_decrypt(nbytes
, dst
, src
, &ctx
->ghash_key
, dg
,
551 iv
, ctx
->aes_key
.key_enc
, nrounds
,
555 if (unlikely(!nbytes
))
558 if (unlikely(nbytes
> 0 && nbytes
< AES_BLOCK_SIZE
))
559 memcpy(walk
.dst
.virt
.addr
,
560 buf
+ sizeof(buf
) - nbytes
, nbytes
);
562 err
= skcipher_walk_done(&walk
, walk
.nbytes
- nbytes
);
563 } while (walk
.nbytes
);
565 while (walk
.nbytes
>= AES_BLOCK_SIZE
) {
566 int blocks
= walk
.nbytes
/ AES_BLOCK_SIZE
;
567 const u8
*src
= walk
.src
.virt
.addr
;
568 u8
*dst
= walk
.dst
.virt
.addr
;
570 ghash_do_update(blocks
, dg
, walk
.src
.virt
.addr
,
571 &ctx
->ghash_key
, NULL
, NULL
);
574 aes_encrypt(&ctx
->aes_key
, buf
, iv
);
575 crypto_xor_cpy(dst
, src
, buf
, AES_BLOCK_SIZE
);
576 crypto_inc(iv
, AES_BLOCK_SIZE
);
578 dst
+= AES_BLOCK_SIZE
;
579 src
+= AES_BLOCK_SIZE
;
580 } while (--blocks
> 0);
582 err
= skcipher_walk_done(&walk
,
583 walk
.nbytes
% AES_BLOCK_SIZE
);
586 /* handle the tail */
588 memcpy(buf
, walk
.src
.virt
.addr
, walk
.nbytes
);
589 memset(buf
+ walk
.nbytes
, 0, sizeof(buf
) - walk
.nbytes
);
592 tag
= (u8
*)&lengths
;
593 ghash_do_update(1, dg
, tag
, &ctx
->ghash_key
,
594 walk
.nbytes
? buf
: NULL
, NULL
);
597 aes_encrypt(&ctx
->aes_key
, buf
, iv
);
599 crypto_xor_cpy(walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
602 err
= skcipher_walk_done(&walk
, 0);
605 put_unaligned_be64(dg
[1], tag
);
606 put_unaligned_be64(dg
[0], tag
+ 8);
607 put_unaligned_be32(1, iv
+ GCM_IV_SIZE
);
608 aes_encrypt(&ctx
->aes_key
, iv
, iv
);
609 crypto_xor(tag
, iv
, AES_BLOCK_SIZE
);
615 /* compare calculated auth tag with the stored one */
616 scatterwalk_map_and_copy(buf
, req
->src
,
617 req
->assoclen
+ req
->cryptlen
- authsize
,
620 if (crypto_memneq(tag
, buf
, authsize
))
625 static struct aead_alg gcm_aes_alg
= {
626 .ivsize
= GCM_IV_SIZE
,
627 .chunksize
= AES_BLOCK_SIZE
,
628 .maxauthsize
= AES_BLOCK_SIZE
,
629 .setkey
= gcm_setkey
,
630 .setauthsize
= gcm_setauthsize
,
631 .encrypt
= gcm_encrypt
,
632 .decrypt
= gcm_decrypt
,
634 .base
.cra_name
= "gcm(aes)",
635 .base
.cra_driver_name
= "gcm-aes-ce",
636 .base
.cra_priority
= 300,
637 .base
.cra_blocksize
= 1,
638 .base
.cra_ctxsize
= sizeof(struct gcm_aes_ctx
),
639 .base
.cra_module
= THIS_MODULE
,
642 static int __init
ghash_ce_mod_init(void)
646 if (!cpu_have_named_feature(ASIMD
))
649 if (cpu_have_named_feature(PMULL
))
650 ret
= crypto_register_shashes(ghash_alg
,
651 ARRAY_SIZE(ghash_alg
));
653 /* only register the first array element */
654 ret
= crypto_register_shash(ghash_alg
);
659 if (cpu_have_named_feature(PMULL
)) {
660 ret
= crypto_register_aead(&gcm_aes_alg
);
662 crypto_unregister_shashes(ghash_alg
,
663 ARRAY_SIZE(ghash_alg
));
668 static void __exit
ghash_ce_mod_exit(void)
670 if (cpu_have_named_feature(PMULL
))
671 crypto_unregister_shashes(ghash_alg
, ARRAY_SIZE(ghash_alg
));
673 crypto_unregister_shash(ghash_alg
);
674 crypto_unregister_aead(&gcm_aes_alg
);
677 static const struct cpu_feature ghash_cpu_feature
[] = {
678 { cpu_feature(PMULL
) }, { }
680 MODULE_DEVICE_TABLE(cpu
, ghash_cpu_feature
);
682 module_init(ghash_ce_mod_init
);
683 module_exit(ghash_ce_mod_exit
);