1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * AES-NI + SSE4.1 implementation of AEGIS-128
5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7 * Copyright 2024 Google LLC
10 #include <linux/linkage.h>
22 .section .rodata.cst16.aegis128_const, "aM", @progbits, 32
25 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
26 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
28 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
29 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
31 .section .rodata.cst32.zeropad_mask, "aM", @progbits, 32
34 .octa 0xffffffffffffffffffffffffffffffff
42 * STATE[0-4] - input state
44 * STATE[0-4] - output state (shifted positions)
48 .macro aegis128_update
58 * Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register
59 * MSG and zeroize any remaining bytes. Clobbers %rax, %rcx, and %r8.
62 sub $8, %ecx /* LEN - 8 */
65 /* Load 9 <= LEN <= 15 bytes: */
66 movq (SRC), MSG /* Load first 8 bytes */
67 mov (SRC, %rcx), %rax /* Load last 8 bytes */
70 shr %cl, %rax /* Discard overlapping bytes */
75 add $4, %ecx /* LEN - 4 */
78 /* Load 4 <= LEN <= 8 bytes: */
79 mov (SRC), %eax /* Load first 4 bytes */
80 mov (SRC, %rcx), %r8d /* Load last 4 bytes */
84 /* Load 1 <= LEN <= 3 bytes: */
85 add $2, %ecx /* LEN - 2 */
86 movzbl (SRC), %eax /* Load first byte */
88 movzwl (SRC, %rcx), %r8d /* Load last 2 bytes */
92 or %r8, %rax /* Combine the two parts */
99 * Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer
100 * DST. Clobbers %rax, %rcx, and %r8.
102 .macro store_partial msg
103 sub $8, %ecx /* LEN - 8 */
106 /* Store 8 <= LEN <= 15 bytes: */
107 pextrq $1, \msg, %rax
111 mov %rax, (DST, %r8) /* Store last LEN - 8 bytes */
112 movq \msg, (DST) /* Store first 8 bytes */
116 add $4, %ecx /* LEN - 4 */
119 /* Store 4 <= LEN <= 7 bytes: */
120 pextrd $1, \msg, %eax
124 mov %eax, (DST, %r8) /* Store last LEN - 4 bytes */
125 movd \msg, (DST) /* Store first 4 bytes */
129 /* Store 1 <= LEN <= 3 bytes: */
130 pextrb $0, \msg, 0(DST)
131 cmp $-2, %ecx /* LEN - 4 == -2, i.e. LEN == 2? */
133 pextrb $1, \msg, 1(DST)
135 pextrb $2, \msg, 2(DST)
140 * void aegis128_aesni_init(struct aegis_state *state,
141 * const struct aegis_block *key,
142 * const u8 iv[AEGIS128_NONCE_SIZE]);
144 SYM_FUNC_START(aegis128_aesni_init)
159 /* load the constants: */
160 movdqa .Laegis128_const_0(%rip), STATE2
161 movdqa .Laegis128_const_1(%rip), STATE1
165 /* update 10 times with KEY / KEY xor IV: */
166 aegis128_update; pxor KEY, STATE4
167 aegis128_update; pxor T1, STATE3
168 aegis128_update; pxor KEY, STATE2
169 aegis128_update; pxor T1, STATE1
170 aegis128_update; pxor KEY, STATE0
171 aegis128_update; pxor T1, STATE4
172 aegis128_update; pxor KEY, STATE3
173 aegis128_update; pxor T1, STATE2
174 aegis128_update; pxor KEY, STATE1
175 aegis128_update; pxor T1, STATE0
177 /* store the state: */
178 movdqu STATE0, 0x00(STATEP)
179 movdqu STATE1, 0x10(STATEP)
180 movdqu STATE2, 0x20(STATEP)
181 movdqu STATE3, 0x30(STATEP)
182 movdqu STATE4, 0x40(STATEP)
184 SYM_FUNC_END(aegis128_aesni_init)
187 * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data,
190 * len must be a multiple of 16.
192 SYM_FUNC_START(aegis128_aesni_ad)
200 /* load the state: */
201 movdqu 0x00(STATEP), STATE0
202 movdqu 0x10(STATEP), STATE1
203 movdqu 0x20(STATEP), STATE2
204 movdqu 0x30(STATEP), STATE3
205 movdqu 0x40(STATEP), STATE4
209 movdqu 0x00(SRC), MSG
215 movdqu 0x10(SRC), MSG
221 movdqu 0x20(SRC), MSG
227 movdqu 0x30(SRC), MSG
233 movdqu 0x40(SRC), MSG
242 /* store the state: */
244 movdqu STATE0, 0x00(STATEP)
245 movdqu STATE1, 0x10(STATEP)
246 movdqu STATE2, 0x20(STATEP)
247 movdqu STATE3, 0x30(STATEP)
248 movdqu STATE4, 0x40(STATEP)
252 movdqu STATE4, 0x00(STATEP)
253 movdqu STATE0, 0x10(STATEP)
254 movdqu STATE1, 0x20(STATEP)
255 movdqu STATE2, 0x30(STATEP)
256 movdqu STATE3, 0x40(STATEP)
260 movdqu STATE3, 0x00(STATEP)
261 movdqu STATE4, 0x10(STATEP)
262 movdqu STATE0, 0x20(STATEP)
263 movdqu STATE1, 0x30(STATEP)
264 movdqu STATE2, 0x40(STATEP)
268 movdqu STATE2, 0x00(STATEP)
269 movdqu STATE3, 0x10(STATEP)
270 movdqu STATE4, 0x20(STATEP)
271 movdqu STATE0, 0x30(STATEP)
272 movdqu STATE1, 0x40(STATEP)
276 movdqu STATE1, 0x00(STATEP)
277 movdqu STATE2, 0x10(STATEP)
278 movdqu STATE3, 0x20(STATEP)
279 movdqu STATE4, 0x30(STATEP)
280 movdqu STATE0, 0x40(STATEP)
283 SYM_FUNC_END(aegis128_aesni_ad)
285 .macro encrypt_block s0 s1 s2 s3 s4 i
286 movdqu (\i * 0x10)(SRC), MSG
293 movdqu T0, (\i * 0x10)(DST)
303 * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst,
306 * len must be nonzero and a multiple of 16.
308 SYM_FUNC_START(aegis128_aesni_enc)
314 /* load the state: */
315 movdqu 0x00(STATEP), STATE0
316 movdqu 0x10(STATEP), STATE1
317 movdqu 0x20(STATEP), STATE2
318 movdqu 0x30(STATEP), STATE3
319 movdqu 0x40(STATEP), STATE4
323 encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
324 encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
325 encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
326 encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
327 encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
333 /* store the state: */
335 movdqu STATE4, 0x00(STATEP)
336 movdqu STATE0, 0x10(STATEP)
337 movdqu STATE1, 0x20(STATEP)
338 movdqu STATE2, 0x30(STATEP)
339 movdqu STATE3, 0x40(STATEP)
343 movdqu STATE3, 0x00(STATEP)
344 movdqu STATE4, 0x10(STATEP)
345 movdqu STATE0, 0x20(STATEP)
346 movdqu STATE1, 0x30(STATEP)
347 movdqu STATE2, 0x40(STATEP)
351 movdqu STATE2, 0x00(STATEP)
352 movdqu STATE3, 0x10(STATEP)
353 movdqu STATE4, 0x20(STATEP)
354 movdqu STATE0, 0x30(STATEP)
355 movdqu STATE1, 0x40(STATEP)
359 movdqu STATE1, 0x00(STATEP)
360 movdqu STATE2, 0x10(STATEP)
361 movdqu STATE3, 0x20(STATEP)
362 movdqu STATE4, 0x30(STATEP)
363 movdqu STATE0, 0x40(STATEP)
367 movdqu STATE0, 0x00(STATEP)
368 movdqu STATE1, 0x10(STATEP)
369 movdqu STATE2, 0x20(STATEP)
370 movdqu STATE3, 0x30(STATEP)
371 movdqu STATE4, 0x40(STATEP)
374 SYM_FUNC_END(aegis128_aesni_enc)
377 * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src,
378 * u8 *dst, unsigned int len);
380 SYM_FUNC_START(aegis128_aesni_enc_tail)
384 .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
386 /* load the state: */
387 movdqu 0x00(STATEP), STATE0
388 movdqu 0x10(STATEP), STATE1
389 movdqu 0x20(STATEP), STATE2
390 movdqu 0x30(STATEP), STATE3
391 movdqu 0x40(STATEP), STATE4
393 /* encrypt message: */
410 /* store the state: */
411 movdqu STATE4, 0x00(STATEP)
412 movdqu STATE0, 0x10(STATEP)
413 movdqu STATE1, 0x20(STATEP)
414 movdqu STATE2, 0x30(STATEP)
415 movdqu STATE3, 0x40(STATEP)
417 SYM_FUNC_END(aegis128_aesni_enc_tail)
419 .macro decrypt_block s0 s1 s2 s3 s4 i
420 movdqu (\i * 0x10)(SRC), MSG
426 movdqu MSG, (\i * 0x10)(DST)
436 * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst,
439 * len must be nonzero and a multiple of 16.
441 SYM_FUNC_START(aegis128_aesni_dec)
447 /* load the state: */
448 movdqu 0x00(STATEP), STATE0
449 movdqu 0x10(STATEP), STATE1
450 movdqu 0x20(STATEP), STATE2
451 movdqu 0x30(STATEP), STATE3
452 movdqu 0x40(STATEP), STATE4
456 decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
457 decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
458 decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
459 decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
460 decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
466 /* store the state: */
468 movdqu STATE4, 0x00(STATEP)
469 movdqu STATE0, 0x10(STATEP)
470 movdqu STATE1, 0x20(STATEP)
471 movdqu STATE2, 0x30(STATEP)
472 movdqu STATE3, 0x40(STATEP)
476 movdqu STATE3, 0x00(STATEP)
477 movdqu STATE4, 0x10(STATEP)
478 movdqu STATE0, 0x20(STATEP)
479 movdqu STATE1, 0x30(STATEP)
480 movdqu STATE2, 0x40(STATEP)
484 movdqu STATE2, 0x00(STATEP)
485 movdqu STATE3, 0x10(STATEP)
486 movdqu STATE4, 0x20(STATEP)
487 movdqu STATE0, 0x30(STATEP)
488 movdqu STATE1, 0x40(STATEP)
492 movdqu STATE1, 0x00(STATEP)
493 movdqu STATE2, 0x10(STATEP)
494 movdqu STATE3, 0x20(STATEP)
495 movdqu STATE4, 0x30(STATEP)
496 movdqu STATE0, 0x40(STATEP)
500 movdqu STATE0, 0x00(STATEP)
501 movdqu STATE1, 0x10(STATEP)
502 movdqu STATE2, 0x20(STATEP)
503 movdqu STATE3, 0x30(STATEP)
504 movdqu STATE4, 0x40(STATEP)
507 SYM_FUNC_END(aegis128_aesni_dec)
510 * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src,
511 * u8 *dst, unsigned int len);
513 SYM_FUNC_START(aegis128_aesni_dec_tail)
517 .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
519 /* load the state: */
520 movdqu 0x00(STATEP), STATE0
521 movdqu 0x10(STATEP), STATE1
522 movdqu 0x20(STATEP), STATE2
523 movdqu 0x30(STATEP), STATE3
524 movdqu 0x40(STATEP), STATE4
526 /* decrypt message: */
539 /* mask with byte count: */
540 lea .Lzeropad_mask+16(%rip), %rax
548 /* store the state: */
549 movdqu STATE4, 0x00(STATEP)
550 movdqu STATE0, 0x10(STATEP)
551 movdqu STATE1, 0x20(STATEP)
552 movdqu STATE2, 0x30(STATEP)
553 movdqu STATE3, 0x40(STATEP)
555 SYM_FUNC_END(aegis128_aesni_dec_tail)
558 * void aegis128_aesni_final(struct aegis_state *state,
559 * struct aegis_block *tag_xor,
560 * unsigned int assoclen, unsigned int cryptlen);
562 SYM_FUNC_START(aegis128_aesni_final)
568 /* load the state: */
569 movdqu 0x00(STATEP), STATE0
570 movdqu 0x10(STATEP), STATE1
571 movdqu 0x20(STATEP), STATE2
572 movdqu 0x30(STATEP), STATE3
573 movdqu 0x40(STATEP), STATE4
575 /* prepare length block: */
577 pinsrd $2, CRYPTLEN, MSG
578 psllq $3, MSG /* multiply by 8 (to get bit count) */
583 aegis128_update; pxor MSG, STATE4
584 aegis128_update; pxor MSG, STATE3
585 aegis128_update; pxor MSG, STATE2
586 aegis128_update; pxor MSG, STATE1
587 aegis128_update; pxor MSG, STATE0
588 aegis128_update; pxor MSG, STATE4
589 aegis128_update; pxor MSG, STATE3
592 movdqu (TAG_XOR), MSG
600 movdqu MSG, (TAG_XOR)
602 SYM_FUNC_END(aegis128_aesni_final)