2 * Accelerated GHASH implementation with ARMv8 PMULL instructions.
4 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
13 #include <asm/unaligned.h>
14 #include <crypto/aes.h>
15 #include <crypto/algapi.h>
16 #include <crypto/b128ops.h>
17 #include <crypto/gf128mul.h>
18 #include <crypto/internal/aead.h>
19 #include <crypto/internal/hash.h>
20 #include <crypto/internal/skcipher.h>
21 #include <crypto/scatterwalk.h>
22 #include <linux/cpufeature.h>
23 #include <linux/crypto.h>
24 #include <linux/module.h>
26 MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
27 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
28 MODULE_LICENSE("GPL v2");
29 MODULE_ALIAS_CRYPTO("ghash");
31 #define GHASH_BLOCK_SIZE 16
32 #define GHASH_DIGEST_SIZE 16
33 #define GCM_IV_SIZE 12
44 struct ghash_desc_ctx
{
45 u64 digest
[GHASH_DIGEST_SIZE
/sizeof(u64
)];
46 u8 buf
[GHASH_BLOCK_SIZE
];
51 struct crypto_aes_ctx aes_key
;
52 struct ghash_key ghash_key
;
55 asmlinkage
void pmull_ghash_update_p64(int blocks
, u64 dg
[], const char *src
,
56 struct ghash_key
const *k
,
59 asmlinkage
void pmull_ghash_update_p8(int blocks
, u64 dg
[], const char *src
,
60 struct ghash_key
const *k
,
63 static void (*pmull_ghash_update
)(int blocks
, u64 dg
[], const char *src
,
64 struct ghash_key
const *k
,
67 asmlinkage
void pmull_gcm_encrypt(int blocks
, u64 dg
[], u8 dst
[],
68 const u8 src
[], struct ghash_key
const *k
,
69 u8 ctr
[], u32
const rk
[], int rounds
,
72 asmlinkage
void pmull_gcm_decrypt(int blocks
, u64 dg
[], u8 dst
[],
73 const u8 src
[], struct ghash_key
const *k
,
74 u8 ctr
[], u32
const rk
[], int rounds
);
76 asmlinkage
void pmull_gcm_encrypt_block(u8 dst
[], u8
const src
[],
77 u32
const rk
[], int rounds
);
79 asmlinkage
void __aes_arm64_encrypt(u32
*rk
, u8
*out
, const u8
*in
, int rounds
);
81 static int ghash_init(struct shash_desc
*desc
)
83 struct ghash_desc_ctx
*ctx
= shash_desc_ctx(desc
);
85 *ctx
= (struct ghash_desc_ctx
){};
89 static void ghash_do_update(int blocks
, u64 dg
[], const char *src
,
90 struct ghash_key
*key
, const char *head
)
92 if (likely(may_use_simd())) {
94 pmull_ghash_update(blocks
, dg
, src
, key
, head
);
97 be128 dst
= { cpu_to_be64(dg
[1]), cpu_to_be64(dg
[0]) };
107 src
+= GHASH_BLOCK_SIZE
;
110 crypto_xor((u8
*)&dst
, in
, GHASH_BLOCK_SIZE
);
111 gf128mul_lle(&dst
, &key
->k
);
114 dg
[0] = be64_to_cpu(dst
.b
);
115 dg
[1] = be64_to_cpu(dst
.a
);
119 /* avoid hogging the CPU for too long */
120 #define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE)
122 static int ghash_update(struct shash_desc
*desc
, const u8
*src
,
125 struct ghash_desc_ctx
*ctx
= shash_desc_ctx(desc
);
126 unsigned int partial
= ctx
->count
% GHASH_BLOCK_SIZE
;
130 if ((partial
+ len
) >= GHASH_BLOCK_SIZE
) {
131 struct ghash_key
*key
= crypto_shash_ctx(desc
->tfm
);
135 int p
= GHASH_BLOCK_SIZE
- partial
;
137 memcpy(ctx
->buf
+ partial
, src
, p
);
142 blocks
= len
/ GHASH_BLOCK_SIZE
;
143 len
%= GHASH_BLOCK_SIZE
;
146 int chunk
= min(blocks
, MAX_BLOCKS
);
148 ghash_do_update(chunk
, ctx
->digest
, src
, key
,
149 partial
? ctx
->buf
: NULL
);
152 src
+= chunk
* GHASH_BLOCK_SIZE
;
154 } while (unlikely(blocks
> 0));
157 memcpy(ctx
->buf
+ partial
, src
, len
);
161 static int ghash_final(struct shash_desc
*desc
, u8
*dst
)
163 struct ghash_desc_ctx
*ctx
= shash_desc_ctx(desc
);
164 unsigned int partial
= ctx
->count
% GHASH_BLOCK_SIZE
;
167 struct ghash_key
*key
= crypto_shash_ctx(desc
->tfm
);
169 memset(ctx
->buf
+ partial
, 0, GHASH_BLOCK_SIZE
- partial
);
171 ghash_do_update(1, ctx
->digest
, ctx
->buf
, key
, NULL
);
173 put_unaligned_be64(ctx
->digest
[1], dst
);
174 put_unaligned_be64(ctx
->digest
[0], dst
+ 8);
176 *ctx
= (struct ghash_desc_ctx
){};
180 static void ghash_reflect(u64 h
[], const be128
*k
)
182 u64 carry
= be64_to_cpu(k
->a
) & BIT(63) ? 1 : 0;
184 h
[0] = (be64_to_cpu(k
->b
) << 1) | carry
;
185 h
[1] = (be64_to_cpu(k
->a
) << 1) | (be64_to_cpu(k
->b
) >> 63);
188 h
[1] ^= 0xc200000000000000UL
;
191 static int __ghash_setkey(struct ghash_key
*key
,
192 const u8
*inkey
, unsigned int keylen
)
196 /* needed for the fallback */
197 memcpy(&key
->k
, inkey
, GHASH_BLOCK_SIZE
);
199 ghash_reflect(key
->h
, &key
->k
);
202 gf128mul_lle(&h
, &key
->k
);
203 ghash_reflect(key
->h2
, &h
);
205 gf128mul_lle(&h
, &key
->k
);
206 ghash_reflect(key
->h3
, &h
);
208 gf128mul_lle(&h
, &key
->k
);
209 ghash_reflect(key
->h4
, &h
);
214 static int ghash_setkey(struct crypto_shash
*tfm
,
215 const u8
*inkey
, unsigned int keylen
)
217 struct ghash_key
*key
= crypto_shash_ctx(tfm
);
219 if (keylen
!= GHASH_BLOCK_SIZE
) {
220 crypto_shash_set_flags(tfm
, CRYPTO_TFM_RES_BAD_KEY_LEN
);
224 return __ghash_setkey(key
, inkey
, keylen
);
227 static struct shash_alg ghash_alg
= {
228 .base
.cra_name
= "ghash",
229 .base
.cra_driver_name
= "ghash-ce",
230 .base
.cra_priority
= 200,
231 .base
.cra_blocksize
= GHASH_BLOCK_SIZE
,
232 .base
.cra_ctxsize
= sizeof(struct ghash_key
),
233 .base
.cra_module
= THIS_MODULE
,
235 .digestsize
= GHASH_DIGEST_SIZE
,
237 .update
= ghash_update
,
238 .final
= ghash_final
,
239 .setkey
= ghash_setkey
,
240 .descsize
= sizeof(struct ghash_desc_ctx
),
243 static int num_rounds(struct crypto_aes_ctx
*ctx
)
246 * # of rounds specified by AES:
247 * 128 bit key 10 rounds
248 * 192 bit key 12 rounds
249 * 256 bit key 14 rounds
250 * => n byte key => 6 + (n/4) rounds
252 return 6 + ctx
->key_length
/ 4;
255 static int gcm_setkey(struct crypto_aead
*tfm
, const u8
*inkey
,
258 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(tfm
);
259 u8 key
[GHASH_BLOCK_SIZE
];
262 ret
= crypto_aes_expand_key(&ctx
->aes_key
, inkey
, keylen
);
264 tfm
->base
.crt_flags
|= CRYPTO_TFM_RES_BAD_KEY_LEN
;
268 __aes_arm64_encrypt(ctx
->aes_key
.key_enc
, key
, (u8
[AES_BLOCK_SIZE
]){},
269 num_rounds(&ctx
->aes_key
));
271 return __ghash_setkey(&ctx
->ghash_key
, key
, sizeof(be128
));
274 static int gcm_setauthsize(struct crypto_aead
*tfm
, unsigned int authsize
)
287 static void gcm_update_mac(u64 dg
[], const u8
*src
, int count
, u8 buf
[],
288 int *buf_count
, struct gcm_aes_ctx
*ctx
)
290 if (*buf_count
> 0) {
291 int buf_added
= min(count
, GHASH_BLOCK_SIZE
- *buf_count
);
293 memcpy(&buf
[*buf_count
], src
, buf_added
);
295 *buf_count
+= buf_added
;
300 if (count
>= GHASH_BLOCK_SIZE
|| *buf_count
== GHASH_BLOCK_SIZE
) {
301 int blocks
= count
/ GHASH_BLOCK_SIZE
;
303 ghash_do_update(blocks
, dg
, src
, &ctx
->ghash_key
,
304 *buf_count
? buf
: NULL
);
306 src
+= blocks
* GHASH_BLOCK_SIZE
;
307 count
%= GHASH_BLOCK_SIZE
;
312 memcpy(buf
, src
, count
);
317 static void gcm_calculate_auth_mac(struct aead_request
*req
, u64 dg
[])
319 struct crypto_aead
*aead
= crypto_aead_reqtfm(req
);
320 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(aead
);
321 u8 buf
[GHASH_BLOCK_SIZE
];
322 struct scatter_walk walk
;
323 u32 len
= req
->assoclen
;
326 scatterwalk_start(&walk
, req
->src
);
329 u32 n
= scatterwalk_clamp(&walk
, len
);
333 scatterwalk_start(&walk
, sg_next(walk
.sg
));
334 n
= scatterwalk_clamp(&walk
, len
);
336 p
= scatterwalk_map(&walk
);
338 gcm_update_mac(dg
, p
, n
, buf
, &buf_count
, ctx
);
341 scatterwalk_unmap(p
);
342 scatterwalk_advance(&walk
, n
);
343 scatterwalk_done(&walk
, 0, len
);
347 memset(&buf
[buf_count
], 0, GHASH_BLOCK_SIZE
- buf_count
);
348 ghash_do_update(1, dg
, buf
, &ctx
->ghash_key
, NULL
);
352 static void gcm_final(struct aead_request
*req
, struct gcm_aes_ctx
*ctx
,
353 u64 dg
[], u8 tag
[], int cryptlen
)
355 u8 mac
[AES_BLOCK_SIZE
];
358 lengths
.a
= cpu_to_be64(req
->assoclen
* 8);
359 lengths
.b
= cpu_to_be64(cryptlen
* 8);
361 ghash_do_update(1, dg
, (void *)&lengths
, &ctx
->ghash_key
, NULL
);
363 put_unaligned_be64(dg
[1], mac
);
364 put_unaligned_be64(dg
[0], mac
+ 8);
366 crypto_xor(tag
, mac
, AES_BLOCK_SIZE
);
369 static int gcm_encrypt(struct aead_request
*req
)
371 struct crypto_aead
*aead
= crypto_aead_reqtfm(req
);
372 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(aead
);
373 struct skcipher_walk walk
;
374 u8 iv
[AES_BLOCK_SIZE
];
375 u8 ks
[2 * AES_BLOCK_SIZE
];
376 u8 tag
[AES_BLOCK_SIZE
];
378 int nrounds
= num_rounds(&ctx
->aes_key
);
382 gcm_calculate_auth_mac(req
, dg
);
384 memcpy(iv
, req
->iv
, GCM_IV_SIZE
);
385 put_unaligned_be32(1, iv
+ GCM_IV_SIZE
);
387 err
= skcipher_walk_aead_encrypt(&walk
, req
, false);
389 if (likely(may_use_simd() && walk
.total
>= 2 * AES_BLOCK_SIZE
)) {
390 u32
const *rk
= NULL
;
393 pmull_gcm_encrypt_block(tag
, iv
, ctx
->aes_key
.key_enc
, nrounds
);
394 put_unaligned_be32(2, iv
+ GCM_IV_SIZE
);
395 pmull_gcm_encrypt_block(ks
, iv
, NULL
, nrounds
);
396 put_unaligned_be32(3, iv
+ GCM_IV_SIZE
);
397 pmull_gcm_encrypt_block(ks
+ AES_BLOCK_SIZE
, iv
, NULL
, nrounds
);
398 put_unaligned_be32(4, iv
+ GCM_IV_SIZE
);
401 int blocks
= walk
.nbytes
/ (2 * AES_BLOCK_SIZE
) * 2;
406 pmull_gcm_encrypt(blocks
, dg
, walk
.dst
.virt
.addr
,
407 walk
.src
.virt
.addr
, &ctx
->ghash_key
,
408 iv
, rk
, nrounds
, ks
);
411 err
= skcipher_walk_done(&walk
,
412 walk
.nbytes
% (2 * AES_BLOCK_SIZE
));
414 rk
= ctx
->aes_key
.key_enc
;
415 } while (walk
.nbytes
>= 2 * AES_BLOCK_SIZE
);
417 __aes_arm64_encrypt(ctx
->aes_key
.key_enc
, tag
, iv
, nrounds
);
418 put_unaligned_be32(2, iv
+ GCM_IV_SIZE
);
420 while (walk
.nbytes
>= (2 * AES_BLOCK_SIZE
)) {
421 int blocks
= walk
.nbytes
/ AES_BLOCK_SIZE
;
422 u8
*dst
= walk
.dst
.virt
.addr
;
423 u8
*src
= walk
.src
.virt
.addr
;
426 __aes_arm64_encrypt(ctx
->aes_key
.key_enc
,
428 crypto_xor_cpy(dst
, src
, ks
, AES_BLOCK_SIZE
);
429 crypto_inc(iv
, AES_BLOCK_SIZE
);
431 dst
+= AES_BLOCK_SIZE
;
432 src
+= AES_BLOCK_SIZE
;
433 } while (--blocks
> 0);
435 ghash_do_update(walk
.nbytes
/ AES_BLOCK_SIZE
, dg
,
436 walk
.dst
.virt
.addr
, &ctx
->ghash_key
,
439 err
= skcipher_walk_done(&walk
,
440 walk
.nbytes
% (2 * AES_BLOCK_SIZE
));
443 __aes_arm64_encrypt(ctx
->aes_key
.key_enc
, ks
, iv
,
445 if (walk
.nbytes
> AES_BLOCK_SIZE
) {
446 crypto_inc(iv
, AES_BLOCK_SIZE
);
447 __aes_arm64_encrypt(ctx
->aes_key
.key_enc
,
448 ks
+ AES_BLOCK_SIZE
, iv
,
454 /* handle the tail */
456 u8 buf
[GHASH_BLOCK_SIZE
];
457 unsigned int nbytes
= walk
.nbytes
;
458 u8
*dst
= walk
.dst
.virt
.addr
;
461 crypto_xor_cpy(walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
, ks
,
464 if (walk
.nbytes
> GHASH_BLOCK_SIZE
) {
466 dst
+= GHASH_BLOCK_SIZE
;
467 nbytes
%= GHASH_BLOCK_SIZE
;
470 memcpy(buf
, dst
, nbytes
);
471 memset(buf
+ nbytes
, 0, GHASH_BLOCK_SIZE
- nbytes
);
472 ghash_do_update(!!nbytes
, dg
, buf
, &ctx
->ghash_key
, head
);
474 err
= skcipher_walk_done(&walk
, 0);
480 gcm_final(req
, ctx
, dg
, tag
, req
->cryptlen
);
482 /* copy authtag to end of dst */
483 scatterwalk_map_and_copy(tag
, req
->dst
, req
->assoclen
+ req
->cryptlen
,
484 crypto_aead_authsize(aead
), 1);
489 static int gcm_decrypt(struct aead_request
*req
)
491 struct crypto_aead
*aead
= crypto_aead_reqtfm(req
);
492 struct gcm_aes_ctx
*ctx
= crypto_aead_ctx(aead
);
493 unsigned int authsize
= crypto_aead_authsize(aead
);
494 struct skcipher_walk walk
;
495 u8 iv
[2 * AES_BLOCK_SIZE
];
496 u8 tag
[AES_BLOCK_SIZE
];
497 u8 buf
[2 * GHASH_BLOCK_SIZE
];
499 int nrounds
= num_rounds(&ctx
->aes_key
);
503 gcm_calculate_auth_mac(req
, dg
);
505 memcpy(iv
, req
->iv
, GCM_IV_SIZE
);
506 put_unaligned_be32(1, iv
+ GCM_IV_SIZE
);
508 err
= skcipher_walk_aead_decrypt(&walk
, req
, false);
510 if (likely(may_use_simd() && walk
.total
>= 2 * AES_BLOCK_SIZE
)) {
511 u32
const *rk
= NULL
;
514 pmull_gcm_encrypt_block(tag
, iv
, ctx
->aes_key
.key_enc
, nrounds
);
515 put_unaligned_be32(2, iv
+ GCM_IV_SIZE
);
518 int blocks
= walk
.nbytes
/ (2 * AES_BLOCK_SIZE
) * 2;
519 int rem
= walk
.total
- blocks
* AES_BLOCK_SIZE
;
524 pmull_gcm_decrypt(blocks
, dg
, walk
.dst
.virt
.addr
,
525 walk
.src
.virt
.addr
, &ctx
->ghash_key
,
528 /* check if this is the final iteration of the loop */
529 if (rem
< (2 * AES_BLOCK_SIZE
)) {
530 u8
*iv2
= iv
+ AES_BLOCK_SIZE
;
532 if (rem
> AES_BLOCK_SIZE
) {
533 memcpy(iv2
, iv
, AES_BLOCK_SIZE
);
534 crypto_inc(iv2
, AES_BLOCK_SIZE
);
537 pmull_gcm_encrypt_block(iv
, iv
, NULL
, nrounds
);
539 if (rem
> AES_BLOCK_SIZE
)
540 pmull_gcm_encrypt_block(iv2
, iv2
, NULL
,
546 err
= skcipher_walk_done(&walk
,
547 walk
.nbytes
% (2 * AES_BLOCK_SIZE
));
549 rk
= ctx
->aes_key
.key_enc
;
550 } while (walk
.nbytes
>= 2 * AES_BLOCK_SIZE
);
552 __aes_arm64_encrypt(ctx
->aes_key
.key_enc
, tag
, iv
, nrounds
);
553 put_unaligned_be32(2, iv
+ GCM_IV_SIZE
);
555 while (walk
.nbytes
>= (2 * AES_BLOCK_SIZE
)) {
556 int blocks
= walk
.nbytes
/ AES_BLOCK_SIZE
;
557 u8
*dst
= walk
.dst
.virt
.addr
;
558 u8
*src
= walk
.src
.virt
.addr
;
560 ghash_do_update(blocks
, dg
, walk
.src
.virt
.addr
,
561 &ctx
->ghash_key
, NULL
);
564 __aes_arm64_encrypt(ctx
->aes_key
.key_enc
,
566 crypto_xor_cpy(dst
, src
, buf
, AES_BLOCK_SIZE
);
567 crypto_inc(iv
, AES_BLOCK_SIZE
);
569 dst
+= AES_BLOCK_SIZE
;
570 src
+= AES_BLOCK_SIZE
;
571 } while (--blocks
> 0);
573 err
= skcipher_walk_done(&walk
,
574 walk
.nbytes
% (2 * AES_BLOCK_SIZE
));
577 if (walk
.nbytes
> AES_BLOCK_SIZE
) {
578 u8
*iv2
= iv
+ AES_BLOCK_SIZE
;
580 memcpy(iv2
, iv
, AES_BLOCK_SIZE
);
581 crypto_inc(iv2
, AES_BLOCK_SIZE
);
583 __aes_arm64_encrypt(ctx
->aes_key
.key_enc
, iv2
,
586 __aes_arm64_encrypt(ctx
->aes_key
.key_enc
, iv
, iv
,
591 /* handle the tail */
593 const u8
*src
= walk
.src
.virt
.addr
;
594 const u8
*head
= NULL
;
595 unsigned int nbytes
= walk
.nbytes
;
597 if (walk
.nbytes
> GHASH_BLOCK_SIZE
) {
599 src
+= GHASH_BLOCK_SIZE
;
600 nbytes
%= GHASH_BLOCK_SIZE
;
603 memcpy(buf
, src
, nbytes
);
604 memset(buf
+ nbytes
, 0, GHASH_BLOCK_SIZE
- nbytes
);
605 ghash_do_update(!!nbytes
, dg
, buf
, &ctx
->ghash_key
, head
);
607 crypto_xor_cpy(walk
.dst
.virt
.addr
, walk
.src
.virt
.addr
, iv
,
610 err
= skcipher_walk_done(&walk
, 0);
616 gcm_final(req
, ctx
, dg
, tag
, req
->cryptlen
- authsize
);
618 /* compare calculated auth tag with the stored one */
619 scatterwalk_map_and_copy(buf
, req
->src
,
620 req
->assoclen
+ req
->cryptlen
- authsize
,
623 if (crypto_memneq(tag
, buf
, authsize
))
628 static struct aead_alg gcm_aes_alg
= {
629 .ivsize
= GCM_IV_SIZE
,
630 .chunksize
= 2 * AES_BLOCK_SIZE
,
631 .maxauthsize
= AES_BLOCK_SIZE
,
632 .setkey
= gcm_setkey
,
633 .setauthsize
= gcm_setauthsize
,
634 .encrypt
= gcm_encrypt
,
635 .decrypt
= gcm_decrypt
,
637 .base
.cra_name
= "gcm(aes)",
638 .base
.cra_driver_name
= "gcm-aes-ce",
639 .base
.cra_priority
= 300,
640 .base
.cra_blocksize
= 1,
641 .base
.cra_ctxsize
= sizeof(struct gcm_aes_ctx
),
642 .base
.cra_module
= THIS_MODULE
,
645 static int __init
ghash_ce_mod_init(void)
649 if (!(elf_hwcap
& HWCAP_ASIMD
))
652 if (elf_hwcap
& HWCAP_PMULL
)
653 pmull_ghash_update
= pmull_ghash_update_p64
;
656 pmull_ghash_update
= pmull_ghash_update_p8
;
658 ret
= crypto_register_shash(&ghash_alg
);
662 if (elf_hwcap
& HWCAP_PMULL
) {
663 ret
= crypto_register_aead(&gcm_aes_alg
);
665 crypto_unregister_shash(&ghash_alg
);
670 static void __exit
ghash_ce_mod_exit(void)
672 crypto_unregister_shash(&ghash_alg
);
673 crypto_unregister_aead(&gcm_aes_alg
);
676 static const struct cpu_feature ghash_cpu_feature
[] = {
677 { cpu_feature(PMULL
) }, { }
679 MODULE_DEVICE_TABLE(cpu
, ghash_cpu_feature
);
681 module_init(ghash_ce_mod_init
);
682 module_exit(ghash_ce_mod_exit
);