2 * AVX2 implementation of MORUS-1280
4 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
5 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published
9 * by the Free Software Foundation.
12 #include <linux/linkage.h>
13 #include <asm/frame.h>
15 #define SHUFFLE_MASK(i0, i1, i2, i3) \
16 (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
18 #define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
19 #define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
20 #define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
23 #define STATE0_LOW %xmm0
35 .section .rodata.cst32.morus1280_const, "aM", @progbits, 32
38 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
39 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
40 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
41 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
43 .section .rodata.cst32.morus1280_counter, "aM", @progbits, 32
46 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
47 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
48 .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
49 .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
53 .macro morus1280_round s0, s1, s2, s3, s4, b, w
58 vpsrlq $(64 - \b), \s0, \s0
64 * __morus1280_update: internal ABI
66 * STATE[0-4] - input state
69 * STATE[0-4] - output state
74 morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
75 vpxor MSG, STATE1, STATE1
76 morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
77 vpxor MSG, STATE2, STATE2
78 morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
79 vpxor MSG, STATE3, STATE3
80 morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
81 vpxor MSG, STATE4, STATE4
82 morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
84 ENDPROC(__morus1280_update)
87 * __morus1280_update_zero: internal ABI
89 * STATE[0-4] - input state
91 * STATE[0-4] - output state
95 __morus1280_update_zero:
96 morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
97 morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
98 morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
99 morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
100 morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
102 ENDPROC(__morus1280_update_zero)
105 * __load_partial: internal ABI
110 * MSG - message block
161 pshufd $MASK2, MSG_LOW, MSG_LOW
162 pinsrq $0, (%r8), MSG_LOW
169 vpermq $MASK2, MSG, MSG
170 movdqu (%rsi), MSG_LOW
174 ENDPROC(__load_partial)
177 * __store_partial: internal ABI
196 vpermq $MASK2, T0, T0
208 pextrq $1, T0_LOW, %r10
241 ENDPROC(__store_partial)
244 * void crypto_morus1280_avx2_init(void *state, const void *key,
247 ENTRY(crypto_morus1280_avx2_init)
251 vpxor STATE0, STATE0, STATE0
252 movdqu (%rdx), STATE0_LOW
257 vpcmpeqd STATE2, STATE2, STATE2
258 /* load all zeros: */
259 vpxor STATE3, STATE3, STATE3
260 /* load the constant: */
261 vmovdqa .Lmorus1280_const, STATE4
263 /* update 16 times with zero: */
264 call __morus1280_update_zero
265 call __morus1280_update_zero
266 call __morus1280_update_zero
267 call __morus1280_update_zero
268 call __morus1280_update_zero
269 call __morus1280_update_zero
270 call __morus1280_update_zero
271 call __morus1280_update_zero
272 call __morus1280_update_zero
273 call __morus1280_update_zero
274 call __morus1280_update_zero
275 call __morus1280_update_zero
276 call __morus1280_update_zero
277 call __morus1280_update_zero
278 call __morus1280_update_zero
279 call __morus1280_update_zero
281 /* xor-in the key again after updates: */
282 vpxor KEY, STATE1, STATE1
284 /* store the state: */
285 vmovdqu STATE0, (0 * 32)(%rdi)
286 vmovdqu STATE1, (1 * 32)(%rdi)
287 vmovdqu STATE2, (2 * 32)(%rdi)
288 vmovdqu STATE3, (3 * 32)(%rdi)
289 vmovdqu STATE4, (4 * 32)(%rdi)
293 ENDPROC(crypto_morus1280_avx2_init)
296 * void crypto_morus1280_avx2_ad(void *state, const void *data,
297 * unsigned int length);
299 ENTRY(crypto_morus1280_avx2_ad)
305 /* load the state: */
306 vmovdqu (0 * 32)(%rdi), STATE0
307 vmovdqu (1 * 32)(%rdi), STATE1
308 vmovdqu (2 * 32)(%rdi), STATE2
309 vmovdqu (3 * 32)(%rdi), STATE3
310 vmovdqu (4 * 32)(%rdi), STATE4
319 call __morus1280_update
329 call __morus1280_update
336 /* store the state: */
337 vmovdqu STATE0, (0 * 32)(%rdi)
338 vmovdqu STATE1, (1 * 32)(%rdi)
339 vmovdqu STATE2, (2 * 32)(%rdi)
340 vmovdqu STATE3, (3 * 32)(%rdi)
341 vmovdqu STATE4, (4 * 32)(%rdi)
346 ENDPROC(crypto_morus1280_avx2_ad)
349 * void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst,
350 * unsigned int length);
352 ENTRY(crypto_morus1280_avx2_enc)
358 /* load the state: */
359 vmovdqu (0 * 32)(%rdi), STATE0
360 vmovdqu (1 * 32)(%rdi), STATE1
361 vmovdqu (2 * 32)(%rdi), STATE2
362 vmovdqu (3 * 32)(%rdi), STATE3
363 vmovdqu (4 * 32)(%rdi), STATE4
375 vpermq $MASK3, STATE1, T1
377 vpand STATE2, STATE3, T1
381 call __morus1280_update
394 vpermq $MASK3, STATE1, T1
396 vpand STATE2, STATE3, T1
400 call __morus1280_update
408 /* store the state: */
409 vmovdqu STATE0, (0 * 32)(%rdi)
410 vmovdqu STATE1, (1 * 32)(%rdi)
411 vmovdqu STATE2, (2 * 32)(%rdi)
412 vmovdqu STATE3, (3 * 32)(%rdi)
413 vmovdqu STATE4, (4 * 32)(%rdi)
418 ENDPROC(crypto_morus1280_avx2_enc)
421 * void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst,
422 * unsigned int length);
424 ENTRY(crypto_morus1280_avx2_enc_tail)
427 /* load the state: */
428 vmovdqu (0 * 32)(%rdi), STATE0
429 vmovdqu (1 * 32)(%rdi), STATE1
430 vmovdqu (2 * 32)(%rdi), STATE2
431 vmovdqu (3 * 32)(%rdi), STATE3
432 vmovdqu (4 * 32)(%rdi), STATE4
434 /* encrypt message: */
439 vpermq $MASK3, STATE1, T1
441 vpand STATE2, STATE3, T1
446 call __morus1280_update
448 /* store the state: */
449 vmovdqu STATE0, (0 * 32)(%rdi)
450 vmovdqu STATE1, (1 * 32)(%rdi)
451 vmovdqu STATE2, (2 * 32)(%rdi)
452 vmovdqu STATE3, (3 * 32)(%rdi)
453 vmovdqu STATE4, (4 * 32)(%rdi)
457 ENDPROC(crypto_morus1280_avx2_enc_tail)
460 * void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst,
461 * unsigned int length);
463 ENTRY(crypto_morus1280_avx2_dec)
469 /* load the state: */
470 vmovdqu (0 * 32)(%rdi), STATE0
471 vmovdqu (1 * 32)(%rdi), STATE1
472 vmovdqu (2 * 32)(%rdi), STATE2
473 vmovdqu (3 * 32)(%rdi), STATE3
474 vmovdqu (4 * 32)(%rdi), STATE4
484 vpxor STATE0, MSG, MSG
485 vpermq $MASK3, STATE1, T0
487 vpand STATE2, STATE3, T0
491 call __morus1280_update
502 vpxor STATE0, MSG, MSG
503 vpermq $MASK3, STATE1, T0
505 vpand STATE2, STATE3, T0
509 call __morus1280_update
517 /* store the state: */
518 vmovdqu STATE0, (0 * 32)(%rdi)
519 vmovdqu STATE1, (1 * 32)(%rdi)
520 vmovdqu STATE2, (2 * 32)(%rdi)
521 vmovdqu STATE3, (3 * 32)(%rdi)
522 vmovdqu STATE4, (4 * 32)(%rdi)
527 ENDPROC(crypto_morus1280_avx2_dec)
530 * void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst,
531 * unsigned int length);
533 ENTRY(crypto_morus1280_avx2_dec_tail)
536 /* load the state: */
537 vmovdqu (0 * 32)(%rdi), STATE0
538 vmovdqu (1 * 32)(%rdi), STATE1
539 vmovdqu (2 * 32)(%rdi), STATE2
540 vmovdqu (3 * 32)(%rdi), STATE3
541 vmovdqu (4 * 32)(%rdi), STATE4
543 /* decrypt message: */
546 vpxor STATE0, MSG, MSG
547 vpermq $MASK3, STATE1, T0
549 vpand STATE2, STATE3, T0
555 /* mask with byte count: */
557 vpbroadcastb T0_LOW, T0
558 vmovdqa .Lmorus1280_counter, T1
562 call __morus1280_update
564 /* store the state: */
565 vmovdqu STATE0, (0 * 32)(%rdi)
566 vmovdqu STATE1, (1 * 32)(%rdi)
567 vmovdqu STATE2, (2 * 32)(%rdi)
568 vmovdqu STATE3, (3 * 32)(%rdi)
569 vmovdqu STATE4, (4 * 32)(%rdi)
573 ENDPROC(crypto_morus1280_avx2_dec_tail)
576 * void crypto_morus1280_avx2_final(void *state, void *tag_xor,
577 * u64 assoclen, u64 cryptlen);
579 ENTRY(crypto_morus1280_avx2_final)
582 /* load the state: */
583 vmovdqu (0 * 32)(%rdi), STATE0
584 vmovdqu (1 * 32)(%rdi), STATE1
585 vmovdqu (2 * 32)(%rdi), STATE2
586 vmovdqu (3 * 32)(%rdi), STATE3
587 vmovdqu (4 * 32)(%rdi), STATE4
589 /* xor state[0] into state[4]: */
590 vpxor STATE0, STATE4, STATE4
592 /* prepare length block: */
594 vpinsrq $0, %rdx, MSG_LOW, MSG_LOW
595 vpinsrq $1, %rcx, MSG_LOW, MSG_LOW
596 vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */
599 call __morus1280_update
600 call __morus1280_update
601 call __morus1280_update
602 call __morus1280_update
603 call __morus1280_update
604 call __morus1280_update
605 call __morus1280_update
606 call __morus1280_update
607 call __morus1280_update
608 call __morus1280_update
613 vpxor STATE0, MSG, MSG
614 vpermq $MASK3, STATE1, T0
616 vpand STATE2, STATE3, T0
622 ENDPROC(crypto_morus1280_avx2_final)