1 // SPDX-License-Identifier: GPL-2.0-only
3 * Accelerated GHASH implementation with ARMv8 PMULL instructions.
5 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
10 #include <asm/unaligned.h>
11 #include <crypto/aes.h>
12 #include <crypto/algapi.h>
13 #include <crypto/b128ops.h>
14 #include <crypto/gf128mul.h>
15 #include <crypto/internal/aead.h>
16 #include <crypto/internal/hash.h>
17 #include <crypto/internal/simd.h>
18 #include <crypto/internal/skcipher.h>
19 #include <crypto/scatterwalk.h>
20 #include <linux/cpufeature.h>
21 #include <linux/crypto.h>
22 #include <linux/module.h>
24 MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
25 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
26 MODULE_LICENSE("GPL v2");
27 MODULE_ALIAS_CRYPTO("ghash");
29 #define GHASH_BLOCK_SIZE 16
30 #define GHASH_DIGEST_SIZE 16
31 #define GCM_IV_SIZE 12
38 struct ghash_desc_ctx
{
39 u64 digest
[GHASH_DIGEST_SIZE
/sizeof(u64
)];
40 u8 buf
[GHASH_BLOCK_SIZE
];
45 struct crypto_aes_ctx aes_key
;
46 struct ghash_key ghash_key
;
49 asmlinkage
void pmull_ghash_update_p64(int blocks
, u64 dg
[], const char *src
,
50 u64
const h
[][2], const char *head
);
52 asmlinkage
void pmull_ghash_update_p8(int blocks
, u64 dg
[], const char *src
,
53 u64
const h
[][2], const char *head
);
55 asmlinkage
void pmull_gcm_encrypt(int bytes
, u8 dst
[], const u8 src
[],
56 u64
const h
[][2], u64 dg
[], u8 ctr
[],
57 u32
const rk
[], int rounds
, u8 tag
[]);
58 asmlinkage
int pmull_gcm_decrypt(int bytes
, u8 dst
[], const u8 src
[],
59 u64
const h
[][2], u64 dg
[], u8 ctr
[],
60 u32
const rk
[], int rounds
, const u8 l
[],
61 const u8 tag
[], u64 authsize
);
63 static int ghash_init(struct shash_desc
*desc
)
65 struct ghash_desc_ctx
*ctx
= shash_desc_ctx(desc
);
67 *ctx
= (struct ghash_desc_ctx
){};
71 static void ghash_do_update(int blocks
, u64 dg
[], const char *src
,
72 struct ghash_key
*key
, const char *head
)
74 be128 dst
= { cpu_to_be64(dg
[1]), cpu_to_be64(dg
[0]) };
84 src
+= GHASH_BLOCK_SIZE
;
87 crypto_xor((u8
*)&dst
, in
, GHASH_BLOCK_SIZE
);
88 gf128mul_lle(&dst
, &key
->k
);
91 dg
[0] = be64_to_cpu(dst
.b
);
92 dg
[1] = be64_to_cpu(dst
.a
);
95 static __always_inline
96 void ghash_do_simd_update(int blocks
, u64 dg
[], const char *src
,
97 struct ghash_key
*key
, const char *head
,
98 void (*simd_update
)(int blocks
, u64 dg
[],
103 if (likely(crypto_simd_usable())) {
105 simd_update(blocks
, dg
, src
, key
->h
, head
);
108 ghash_do_update(blocks
, dg
, src
, key
, head
);
112 /* avoid hogging the CPU for too long */
113 #define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE)
115 static int ghash_update(struct shash_desc
*desc
, const u8
*src
,
118 struct ghash_desc_ctx
*ctx
= shash_desc_ctx(desc
);
119 unsigned int partial
= ctx
->count
% GHASH_BLOCK_SIZE
;
123 if ((partial
+ len
) >= GHASH_BLOCK_SIZE
) {
124 struct ghash_key
*key
= crypto_shash_ctx(desc
->tfm
);
128 int p
= GHASH_BLOCK_SIZE
- partial
;
130 memcpy(ctx
->buf
+ partial
, src
, p
);
135 blocks
= len
/ GHASH_BLOCK_SIZE
;
136 len
%= GHASH_BLOCK_SIZE
;
139 int chunk
= min(blocks
, MAX_BLOCKS
);
141 ghash_do_simd_update(chunk
, ctx
->digest
, src
, key
,
142 partial
? ctx
->buf
: NULL
,
143 pmull_ghash_update_p8
);
146 src
+= chunk
* GHASH_BLOCK_SIZE
;
148 } while (unlikely(blocks
> 0));
151 memcpy(ctx
->buf
+ partial
, src
, len
);
155 static int ghash_final(struct shash_desc
*desc
, u8
*dst
)
157 struct ghash_desc_ctx
*ctx
= shash_desc_ctx(desc
);
158 unsigned int partial
= ctx
->count
% GHASH_BLOCK_SIZE
;
161 struct ghash_key
*key
= crypto_shash_ctx(desc
->tfm
);
163 memset(ctx
->buf
+ partial
, 0, GHASH_BLOCK_SIZE
- partial
);
165 ghash_do_simd_update(1, ctx
->digest
, ctx
->buf
, key
, NULL
,
166 pmull_ghash_update_p8
);
168 put_unaligned_be64(ctx
->digest
[1], dst
);
169 put_unaligned_be64(ctx
->digest
[0], dst
+ 8);
171 memzero_explicit(ctx
, sizeof(*ctx
));
175 static void ghash_reflect(u64 h
[], const be128
*k
)
177 u64 carry
= be64_to_cpu(k
->a
) & BIT(63) ? 1 : 0;
179 h
[0] = (be64_to_cpu(k
->b
) << 1) | carry
;
180 h
[1] = (be64_to_cpu(k
->a
) << 1) | (be64_to_cpu(k
->b
) >> 63);
183 h
[1] ^= 0xc200000000000000UL
;
186 static int ghash_setkey(struct crypto_shash
*tfm
,
187 const u8
*inkey
, unsigned int keylen
)
189 struct ghash_key
*key
= crypto_shash_ctx(tfm
);
191 if (keylen
!= GHASH_BLOCK_SIZE
)
194 /* needed for the fallback */
195 memcpy(&key
->k
, inkey
, GHASH_BLOCK_SIZE
);
197 ghash_reflect(key
->h
[0], &key
->k
);
201 static struct shash_alg ghash_alg
= {
202 .base
.cra_name
= "ghash",
203 .base
.cra_driver_name
= "ghash-neon",
204 .base
.cra_priority
= 150,
205 .base
.cra_blocksize
= GHASH_BLOCK_SIZE
,
206 .base
.cra_ctxsize
= sizeof(struct ghash_key
) + sizeof(u64
[2]),
207 .base
.cra_module
= THIS_MODULE
,
209 .digestsize
= GHASH_DIGEST_SIZE
,
211 .update
= ghash_update
,
212 .final
= ghash_final
,
213 .setkey
= ghash_setkey
,
214 .descsize
= sizeof(struct ghash_desc_ctx
),
217 static int num_rounds(struct crypto_aes_ctx
*ctx
)
220 * # of rounds specified by AES:
221 * 128 bit key 10 rounds
222 * 192 bit key 12 rounds
223 * 256 bit key 14 rounds
224 * => n byte key => 6 + (n/4) rounds
226 return 6 + ctx
->key_length
/ 4;
229 static int gcm_setkey(struct crypto_aead
*tfm
, const u8
*inkey
,
232 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(tfm
);
233 u8 key
[GHASH_BLOCK_SIZE
];
237 ret
= aes_expandkey(&ctx
->aes_key
, inkey
, keylen
);
241 aes_encrypt(&ctx
->aes_key
, key
, (u8
[AES_BLOCK_SIZE
]){});
243 /* needed for the fallback */
244 memcpy(&ctx
->ghash_key
.k
, key
, GHASH_BLOCK_SIZE
);
246 ghash_reflect(ctx
->ghash_key
.h
[0], &ctx
->ghash_key
.k
);
248 h
= ctx
->ghash_key
.k
;
249 gf128mul_lle(&h
, &ctx
->ghash_key
.k
);
250 ghash_reflect(ctx
->ghash_key
.h
[1], &h
);
252 gf128mul_lle(&h
, &ctx
->ghash_key
.k
);
253 ghash_reflect(ctx
->ghash_key
.h
[2], &h
);
255 gf128mul_lle(&h
, &ctx
->ghash_key
.k
);
256 ghash_reflect(ctx
->ghash_key
.h
[3], &h
);
261 static int gcm_setauthsize(struct crypto_aead
*tfm
, unsigned int authsize
)
274 static void gcm_update_mac(u64 dg
[], const u8
*src
, int count
, u8 buf
[],
275 int *buf_count
, struct gcm_aes_ctx
*ctx
)
277 if (*buf_count
> 0) {
278 int buf_added
= min(count
, GHASH_BLOCK_SIZE
- *buf_count
);
280 memcpy(&buf
[*buf_count
], src
, buf_added
);
282 *buf_count
+= buf_added
;
287 if (count
>= GHASH_BLOCK_SIZE
|| *buf_count
== GHASH_BLOCK_SIZE
) {
288 int blocks
= count
/ GHASH_BLOCK_SIZE
;
290 ghash_do_simd_update(blocks
, dg
, src
, &ctx
->ghash_key
,
291 *buf_count
? buf
: NULL
,
292 pmull_ghash_update_p64
);
294 src
+= blocks
* GHASH_BLOCK_SIZE
;
295 count
%= GHASH_BLOCK_SIZE
;
300 memcpy(buf
, src
, count
);
305 static void gcm_calculate_auth_mac(struct aead_request
*req
, u64 dg
[])
307 struct crypto_aead
*aead
= crypto_aead_reqtfm(req
);
308 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(aead
);
309 u8 buf
[GHASH_BLOCK_SIZE
];
310 struct scatter_walk walk
;
311 u32 len
= req
->assoclen
;
314 scatterwalk_start(&walk
, req
->src
);
317 u32 n
= scatterwalk_clamp(&walk
, len
);
321 scatterwalk_start(&walk
, sg_next(walk
.sg
));
322 n
= scatterwalk_clamp(&walk
, len
);
324 p
= scatterwalk_map(&walk
);
326 gcm_update_mac(dg
, p
, n
, buf
, &buf_count
, ctx
);
329 scatterwalk_unmap(p
);
330 scatterwalk_advance(&walk
, n
);
331 scatterwalk_done(&walk
, 0, len
);
335 memset(&buf
[buf_count
], 0, GHASH_BLOCK_SIZE
- buf_count
);
336 ghash_do_simd_update(1, dg
, buf
, &ctx
->ghash_key
, NULL
,
337 pmull_ghash_update_p64
);
341 static int gcm_encrypt(struct aead_request
*req
)
343 struct crypto_aead
*aead
= crypto_aead_reqtfm(req
);
344 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(aead
);
345 int nrounds
= num_rounds(&ctx
->aes_key
);
346 struct skcipher_walk walk
;
347 u8 buf
[AES_BLOCK_SIZE
];
348 u8 iv
[AES_BLOCK_SIZE
];
354 lengths
.a
= cpu_to_be64(req
->assoclen
* 8);
355 lengths
.b
= cpu_to_be64(req
->cryptlen
* 8);
358 gcm_calculate_auth_mac(req
, dg
);
360 memcpy(iv
, req
->iv
, GCM_IV_SIZE
);
361 put_unaligned_be32(2, iv
+ GCM_IV_SIZE
);
363 err
= skcipher_walk_aead_encrypt(&walk
, req
, false);
365 if (likely(crypto_simd_usable())) {
367 const u8
*src
= walk
.src
.virt
.addr
;
368 u8
*dst
= walk
.dst
.virt
.addr
;
369 int nbytes
= walk
.nbytes
;
371 tag
= (u8
*)&lengths
;
373 if (unlikely(nbytes
> 0 && nbytes
< AES_BLOCK_SIZE
)) {
374 src
= dst
= memcpy(buf
+ sizeof(buf
) - nbytes
,
376 } else if (nbytes
< walk
.total
) {
377 nbytes
&= ~(AES_BLOCK_SIZE
- 1);
382 pmull_gcm_encrypt(nbytes
, dst
, src
, ctx
->ghash_key
.h
,
383 dg
, iv
, ctx
->aes_key
.key_enc
, nrounds
,
387 if (unlikely(!nbytes
))
390 if (unlikely(nbytes
> 0 && nbytes
< AES_BLOCK_SIZE
))
391 memcpy(walk
.dst
.virt
.addr
,
392 buf
+ sizeof(buf
) - nbytes
, nbytes
);
394 err
= skcipher_walk_done(&walk
, walk
.nbytes
- nbytes
);
395 } while (walk
.nbytes
);
397 while (walk
.nbytes
>= AES_BLOCK_SIZE
) {
398 int blocks
= walk
.nbytes
/ AES_BLOCK_SIZE
;
399 const u8
*src
= walk
.src
.virt
.addr
;
400 u8
*dst
= walk
.dst
.virt
.addr
;
401 int remaining
= blocks
;
404 aes_encrypt(&ctx
->aes_key
, buf
, iv
);
405 crypto_xor_cpy(dst
, src
, buf
, AES_BLOCK_SIZE
);
406 crypto_inc(iv
, AES_BLOCK_SIZE
);
408 dst
+= AES_BLOCK_SIZE
;
409 src
+= AES_BLOCK_SIZE
;
410 } while (--remaining
> 0);
412 ghash_do_update(blocks
, dg
, walk
.dst
.virt
.addr
,
413 &ctx
->ghash_key
, NULL
);
415 err
= skcipher_walk_done(&walk
,
416 walk
.nbytes
% AES_BLOCK_SIZE
);
419 /* handle the tail */
421 aes_encrypt(&ctx
->aes_key
, buf
, iv
);
423 crypto_xor_cpy(walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
426 memcpy(buf
, walk
.dst
.virt
.addr
, walk
.nbytes
);
427 memset(buf
+ walk
.nbytes
, 0, sizeof(buf
) - walk
.nbytes
);
430 tag
= (u8
*)&lengths
;
431 ghash_do_update(1, dg
, tag
, &ctx
->ghash_key
,
432 walk
.nbytes
? buf
: NULL
);
435 err
= skcipher_walk_done(&walk
, 0);
437 put_unaligned_be64(dg
[1], tag
);
438 put_unaligned_be64(dg
[0], tag
+ 8);
439 put_unaligned_be32(1, iv
+ GCM_IV_SIZE
);
440 aes_encrypt(&ctx
->aes_key
, iv
, iv
);
441 crypto_xor(tag
, iv
, AES_BLOCK_SIZE
);
447 /* copy authtag to end of dst */
448 scatterwalk_map_and_copy(tag
, req
->dst
, req
->assoclen
+ req
->cryptlen
,
449 crypto_aead_authsize(aead
), 1);
454 static int gcm_decrypt(struct aead_request
*req
)
456 struct crypto_aead
*aead
= crypto_aead_reqtfm(req
);
457 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(aead
);
458 unsigned int authsize
= crypto_aead_authsize(aead
);
459 int nrounds
= num_rounds(&ctx
->aes_key
);
460 struct skcipher_walk walk
;
461 u8 otag
[AES_BLOCK_SIZE
];
462 u8 buf
[AES_BLOCK_SIZE
];
463 u8 iv
[AES_BLOCK_SIZE
];
469 lengths
.a
= cpu_to_be64(req
->assoclen
* 8);
470 lengths
.b
= cpu_to_be64((req
->cryptlen
- authsize
) * 8);
473 gcm_calculate_auth_mac(req
, dg
);
475 memcpy(iv
, req
->iv
, GCM_IV_SIZE
);
476 put_unaligned_be32(2, iv
+ GCM_IV_SIZE
);
478 scatterwalk_map_and_copy(otag
, req
->src
,
479 req
->assoclen
+ req
->cryptlen
- authsize
,
482 err
= skcipher_walk_aead_decrypt(&walk
, req
, false);
484 if (likely(crypto_simd_usable())) {
488 const u8
*src
= walk
.src
.virt
.addr
;
489 u8
*dst
= walk
.dst
.virt
.addr
;
490 int nbytes
= walk
.nbytes
;
492 tag
= (u8
*)&lengths
;
494 if (unlikely(nbytes
> 0 && nbytes
< AES_BLOCK_SIZE
)) {
495 src
= dst
= memcpy(buf
+ sizeof(buf
) - nbytes
,
497 } else if (nbytes
< walk
.total
) {
498 nbytes
&= ~(AES_BLOCK_SIZE
- 1);
503 ret
= pmull_gcm_decrypt(nbytes
, dst
, src
,
505 dg
, iv
, ctx
->aes_key
.key_enc
,
506 nrounds
, tag
, otag
, authsize
);
509 if (unlikely(!nbytes
))
512 if (unlikely(nbytes
> 0 && nbytes
< AES_BLOCK_SIZE
))
513 memcpy(walk
.dst
.virt
.addr
,
514 buf
+ sizeof(buf
) - nbytes
, nbytes
);
516 err
= skcipher_walk_done(&walk
, walk
.nbytes
- nbytes
);
517 } while (walk
.nbytes
);
524 while (walk
.nbytes
>= AES_BLOCK_SIZE
) {
525 int blocks
= walk
.nbytes
/ AES_BLOCK_SIZE
;
526 const u8
*src
= walk
.src
.virt
.addr
;
527 u8
*dst
= walk
.dst
.virt
.addr
;
529 ghash_do_update(blocks
, dg
, walk
.src
.virt
.addr
,
530 &ctx
->ghash_key
, NULL
);
533 aes_encrypt(&ctx
->aes_key
, buf
, iv
);
534 crypto_xor_cpy(dst
, src
, buf
, AES_BLOCK_SIZE
);
535 crypto_inc(iv
, AES_BLOCK_SIZE
);
537 dst
+= AES_BLOCK_SIZE
;
538 src
+= AES_BLOCK_SIZE
;
539 } while (--blocks
> 0);
541 err
= skcipher_walk_done(&walk
,
542 walk
.nbytes
% AES_BLOCK_SIZE
);
545 /* handle the tail */
547 memcpy(buf
, walk
.src
.virt
.addr
, walk
.nbytes
);
548 memset(buf
+ walk
.nbytes
, 0, sizeof(buf
) - walk
.nbytes
);
551 tag
= (u8
*)&lengths
;
552 ghash_do_update(1, dg
, tag
, &ctx
->ghash_key
,
553 walk
.nbytes
? buf
: NULL
);
556 aes_encrypt(&ctx
->aes_key
, buf
, iv
);
558 crypto_xor_cpy(walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
,
561 err
= skcipher_walk_done(&walk
, 0);
567 put_unaligned_be64(dg
[1], tag
);
568 put_unaligned_be64(dg
[0], tag
+ 8);
569 put_unaligned_be32(1, iv
+ GCM_IV_SIZE
);
570 aes_encrypt(&ctx
->aes_key
, iv
, iv
);
571 crypto_xor(tag
, iv
, AES_BLOCK_SIZE
);
573 if (crypto_memneq(tag
, otag
, authsize
)) {
574 memzero_explicit(tag
, AES_BLOCK_SIZE
);
581 static struct aead_alg gcm_aes_alg
= {
582 .ivsize
= GCM_IV_SIZE
,
583 .chunksize
= AES_BLOCK_SIZE
,
584 .maxauthsize
= AES_BLOCK_SIZE
,
585 .setkey
= gcm_setkey
,
586 .setauthsize
= gcm_setauthsize
,
587 .encrypt
= gcm_encrypt
,
588 .decrypt
= gcm_decrypt
,
590 .base
.cra_name
= "gcm(aes)",
591 .base
.cra_driver_name
= "gcm-aes-ce",
592 .base
.cra_priority
= 300,
593 .base
.cra_blocksize
= 1,
594 .base
.cra_ctxsize
= sizeof(struct gcm_aes_ctx
) +
596 .base
.cra_module
= THIS_MODULE
,
599 static int __init
ghash_ce_mod_init(void)
601 if (!cpu_have_named_feature(ASIMD
))
604 if (cpu_have_named_feature(PMULL
))
605 return crypto_register_aead(&gcm_aes_alg
);
607 return crypto_register_shash(&ghash_alg
);
610 static void __exit
ghash_ce_mod_exit(void)
612 if (cpu_have_named_feature(PMULL
))
613 crypto_unregister_aead(&gcm_aes_alg
);
615 crypto_unregister_shash(&ghash_alg
);
618 static const struct cpu_feature ghash_cpu_feature
[] = {
619 { cpu_feature(PMULL
) }, { }
621 MODULE_DEVICE_TABLE(cpu
, ghash_cpu_feature
);
623 module_init(ghash_ce_mod_init
);
624 module_exit(ghash_ce_mod_exit
);