2 * Poly1305 authenticator algorithm, RFC7539, SIMD glue code
4 * Copyright (C) 2015 Martin Willi
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
12 #include <crypto/algapi.h>
13 #include <crypto/internal/hash.h>
14 #include <crypto/poly1305.h>
15 #include <linux/crypto.h>
16 #include <linux/kernel.h>
17 #include <linux/module.h>
18 #include <asm/fpu/api.h>
21 struct poly1305_simd_desc_ctx
{
22 struct poly1305_desc_ctx base
;
23 /* derived key u set? */
26 /* derived keys r^3, r^4 set? */
29 /* derived Poly1305 key r^2 */
31 /* ... silently appended r^3 and r^4 when using AVX2 */
34 asmlinkage
void poly1305_block_sse2(u32
*h
, const u8
*src
,
35 const u32
*r
, unsigned int blocks
);
36 asmlinkage
void poly1305_2block_sse2(u32
*h
, const u8
*src
, const u32
*r
,
37 unsigned int blocks
, const u32
*u
);
39 asmlinkage
void poly1305_4block_avx2(u32
*h
, const u8
*src
, const u32
*r
,
40 unsigned int blocks
, const u32
*u
);
41 static bool poly1305_use_avx2
;
44 static int poly1305_simd_init(struct shash_desc
*desc
)
46 struct poly1305_simd_desc_ctx
*sctx
= shash_desc_ctx(desc
);
53 return crypto_poly1305_init(desc
);
56 static void poly1305_simd_mult(u32
*a
, const u32
*b
)
58 u8 m
[POLY1305_BLOCK_SIZE
];
60 memset(m
, 0, sizeof(m
));
61 /* The poly1305 block function adds a hi-bit to the accumulator which
62 * we don't need for key multiplication; compensate for it. */
64 poly1305_block_sse2(a
, m
, b
, 1);
67 static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx
*dctx
,
68 const u8
*src
, unsigned int srclen
)
70 struct poly1305_simd_desc_ctx
*sctx
;
71 unsigned int blocks
, datalen
;
73 BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx
, base
));
74 sctx
= container_of(dctx
, struct poly1305_simd_desc_ctx
, base
);
76 if (unlikely(!dctx
->sset
)) {
77 datalen
= crypto_poly1305_setdesckey(dctx
, src
, srclen
);
78 src
+= srclen
- datalen
;
83 if (poly1305_use_avx2
&& srclen
>= POLY1305_BLOCK_SIZE
* 4) {
84 if (unlikely(!sctx
->wset
)) {
86 memcpy(sctx
->u
, dctx
->r
, sizeof(sctx
->u
));
87 poly1305_simd_mult(sctx
->u
, dctx
->r
);
90 memcpy(sctx
->u
+ 5, sctx
->u
, sizeof(sctx
->u
));
91 poly1305_simd_mult(sctx
->u
+ 5, dctx
->r
);
92 memcpy(sctx
->u
+ 10, sctx
->u
+ 5, sizeof(sctx
->u
));
93 poly1305_simd_mult(sctx
->u
+ 10, dctx
->r
);
96 blocks
= srclen
/ (POLY1305_BLOCK_SIZE
* 4);
97 poly1305_4block_avx2(dctx
->h
, src
, dctx
->r
, blocks
, sctx
->u
);
98 src
+= POLY1305_BLOCK_SIZE
* 4 * blocks
;
99 srclen
-= POLY1305_BLOCK_SIZE
* 4 * blocks
;
102 if (likely(srclen
>= POLY1305_BLOCK_SIZE
* 2)) {
103 if (unlikely(!sctx
->uset
)) {
104 memcpy(sctx
->u
, dctx
->r
, sizeof(sctx
->u
));
105 poly1305_simd_mult(sctx
->u
, dctx
->r
);
108 blocks
= srclen
/ (POLY1305_BLOCK_SIZE
* 2);
109 poly1305_2block_sse2(dctx
->h
, src
, dctx
->r
, blocks
, sctx
->u
);
110 src
+= POLY1305_BLOCK_SIZE
* 2 * blocks
;
111 srclen
-= POLY1305_BLOCK_SIZE
* 2 * blocks
;
113 if (srclen
>= POLY1305_BLOCK_SIZE
) {
114 poly1305_block_sse2(dctx
->h
, src
, dctx
->r
, 1);
115 srclen
-= POLY1305_BLOCK_SIZE
;
120 static int poly1305_simd_update(struct shash_desc
*desc
,
121 const u8
*src
, unsigned int srclen
)
123 struct poly1305_desc_ctx
*dctx
= shash_desc_ctx(desc
);
126 /* kernel_fpu_begin/end is costly, use fallback for small updates */
127 if (srclen
<= 288 || !may_use_simd())
128 return crypto_poly1305_update(desc
, src
, srclen
);
132 if (unlikely(dctx
->buflen
)) {
133 bytes
= min(srclen
, POLY1305_BLOCK_SIZE
- dctx
->buflen
);
134 memcpy(dctx
->buf
+ dctx
->buflen
, src
, bytes
);
137 dctx
->buflen
+= bytes
;
139 if (dctx
->buflen
== POLY1305_BLOCK_SIZE
) {
140 poly1305_simd_blocks(dctx
, dctx
->buf
,
141 POLY1305_BLOCK_SIZE
);
146 if (likely(srclen
>= POLY1305_BLOCK_SIZE
)) {
147 bytes
= poly1305_simd_blocks(dctx
, src
, srclen
);
148 src
+= srclen
- bytes
;
154 if (unlikely(srclen
)) {
155 dctx
->buflen
= srclen
;
156 memcpy(dctx
->buf
, src
, srclen
);
162 static struct shash_alg alg
= {
163 .digestsize
= POLY1305_DIGEST_SIZE
,
164 .init
= poly1305_simd_init
,
165 .update
= poly1305_simd_update
,
166 .final
= crypto_poly1305_final
,
167 .descsize
= sizeof(struct poly1305_simd_desc_ctx
),
169 .cra_name
= "poly1305",
170 .cra_driver_name
= "poly1305-simd",
172 .cra_blocksize
= POLY1305_BLOCK_SIZE
,
173 .cra_module
= THIS_MODULE
,
177 static int __init
poly1305_simd_mod_init(void)
179 if (!boot_cpu_has(X86_FEATURE_XMM2
))
182 #ifdef CONFIG_AS_AVX2
183 poly1305_use_avx2
= boot_cpu_has(X86_FEATURE_AVX
) &&
184 boot_cpu_has(X86_FEATURE_AVX2
) &&
185 cpu_has_xfeatures(XFEATURE_MASK_SSE
| XFEATURE_MASK_YMM
, NULL
);
186 alg
.descsize
= sizeof(struct poly1305_simd_desc_ctx
);
187 if (poly1305_use_avx2
)
188 alg
.descsize
+= 10 * sizeof(u32
);
190 return crypto_register_shash(&alg
);
193 static void __exit
poly1305_simd_mod_exit(void)
195 crypto_unregister_shash(&alg
);
198 module_init(poly1305_simd_mod_init
);
199 module_exit(poly1305_simd_mod_exit
);
201 MODULE_LICENSE("GPL");
202 MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
203 MODULE_DESCRIPTION("Poly1305 authenticator");
204 MODULE_ALIAS_CRYPTO("poly1305");
205 MODULE_ALIAS_CRYPTO("poly1305-simd");