2 * SSE2 implementation of MORUS-640
4 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
5 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published
9 * by the Free Software Foundation.
12 #include <linux/linkage.h>
13 #include <asm/frame.h>
15 #define SHUFFLE_MASK(i0, i1, i2, i3) \
16 (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
18 #define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
19 #define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
20 #define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
32 .section .rodata.cst16.morus640_const, "aM", @progbits, 32
35 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
36 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
38 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
39 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
41 .section .rodata.cst16.morus640_counter, "aM", @progbits, 16
44 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
45 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
49 .macro morus640_round s0, s1, s2, s3, s4, b, w
62 * __morus640_update: internal ABI
64 * STATE[0-4] - input state
67 * STATE[0-4] - output state
72 morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
74 morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
76 morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
78 morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
80 morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
82 ENDPROC(__morus640_update)
86 * __morus640_update_zero: internal ABI
88 * STATE[0-4] - input state
90 * STATE[0-4] - output state
94 __morus640_update_zero:
95 morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
96 morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
97 morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
98 morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
99 morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
101 ENDPROC(__morus640_update_zero)
104 * __load_partial: internal ABI
109 * MSG - message block
167 ENDPROC(__load_partial)
170 * __store_partial: internal ABI
225 ENDPROC(__store_partial)
228 * void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
230 ENTRY(crypto_morus640_sse2_init)
234 movdqu (%rdx), STATE0
239 pcmpeqd STATE2, STATE2
240 /* load the constants: */
241 movdqa .Lmorus640_const_0, STATE3
242 movdqa .Lmorus640_const_1, STATE4
244 /* update 16 times with zero: */
245 call __morus640_update_zero
246 call __morus640_update_zero
247 call __morus640_update_zero
248 call __morus640_update_zero
249 call __morus640_update_zero
250 call __morus640_update_zero
251 call __morus640_update_zero
252 call __morus640_update_zero
253 call __morus640_update_zero
254 call __morus640_update_zero
255 call __morus640_update_zero
256 call __morus640_update_zero
257 call __morus640_update_zero
258 call __morus640_update_zero
259 call __morus640_update_zero
260 call __morus640_update_zero
262 /* xor-in the key again after updates: */
265 /* store the state: */
266 movdqu STATE0, (0 * 16)(%rdi)
267 movdqu STATE1, (1 * 16)(%rdi)
268 movdqu STATE2, (2 * 16)(%rdi)
269 movdqu STATE3, (3 * 16)(%rdi)
270 movdqu STATE4, (4 * 16)(%rdi)
274 ENDPROC(crypto_morus640_sse2_init)
277 * void crypto_morus640_sse2_ad(void *state, const void *data,
278 * unsigned int length);
280 ENTRY(crypto_morus640_sse2_ad)
286 /* load the state: */
287 movdqu (0 * 16)(%rdi), STATE0
288 movdqu (1 * 16)(%rdi), STATE1
289 movdqu (2 * 16)(%rdi), STATE2
290 movdqu (3 * 16)(%rdi), STATE3
291 movdqu (4 * 16)(%rdi), STATE4
300 call __morus640_update
310 call __morus640_update
317 /* store the state: */
318 movdqu STATE0, (0 * 16)(%rdi)
319 movdqu STATE1, (1 * 16)(%rdi)
320 movdqu STATE2, (2 * 16)(%rdi)
321 movdqu STATE3, (3 * 16)(%rdi)
322 movdqu STATE4, (4 * 16)(%rdi)
327 ENDPROC(crypto_morus640_sse2_ad)
330 * void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
331 * unsigned int length);
333 ENTRY(crypto_morus640_sse2_enc)
339 /* load the state: */
340 movdqu (0 * 16)(%rdi), STATE0
341 movdqu (1 * 16)(%rdi), STATE1
342 movdqu (2 * 16)(%rdi), STATE2
343 movdqu (3 * 16)(%rdi), STATE3
344 movdqu (4 * 16)(%rdi), STATE4
356 pshufd $MASK3, STATE1, T1
363 call __morus640_update
376 pshufd $MASK3, STATE1, T1
383 call __morus640_update
391 /* store the state: */
392 movdqu STATE0, (0 * 16)(%rdi)
393 movdqu STATE1, (1 * 16)(%rdi)
394 movdqu STATE2, (2 * 16)(%rdi)
395 movdqu STATE3, (3 * 16)(%rdi)
396 movdqu STATE4, (4 * 16)(%rdi)
401 ENDPROC(crypto_morus640_sse2_enc)
404 * void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
405 * unsigned int length);
407 ENTRY(crypto_morus640_sse2_enc_tail)
410 /* load the state: */
411 movdqu (0 * 16)(%rdi), STATE0
412 movdqu (1 * 16)(%rdi), STATE1
413 movdqu (2 * 16)(%rdi), STATE2
414 movdqu (3 * 16)(%rdi), STATE3
415 movdqu (4 * 16)(%rdi), STATE4
417 /* encrypt message: */
422 pshufd $MASK3, STATE1, T1
430 call __morus640_update
432 /* store the state: */
433 movdqu STATE0, (0 * 16)(%rdi)
434 movdqu STATE1, (1 * 16)(%rdi)
435 movdqu STATE2, (2 * 16)(%rdi)
436 movdqu STATE3, (3 * 16)(%rdi)
437 movdqu STATE4, (4 * 16)(%rdi)
441 ENDPROC(crypto_morus640_sse2_enc_tail)
444 * void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
445 * unsigned int length);
447 ENTRY(crypto_morus640_sse2_dec)
453 /* load the state: */
454 movdqu (0 * 16)(%rdi), STATE0
455 movdqu (1 * 16)(%rdi), STATE1
456 movdqu (2 * 16)(%rdi), STATE2
457 movdqu (3 * 16)(%rdi), STATE3
458 movdqu (4 * 16)(%rdi), STATE4
469 pshufd $MASK3, STATE1, T0
476 call __morus640_update
488 pshufd $MASK3, STATE1, T0
495 call __morus640_update
503 /* store the state: */
504 movdqu STATE0, (0 * 16)(%rdi)
505 movdqu STATE1, (1 * 16)(%rdi)
506 movdqu STATE2, (2 * 16)(%rdi)
507 movdqu STATE3, (3 * 16)(%rdi)
508 movdqu STATE4, (4 * 16)(%rdi)
513 ENDPROC(crypto_morus640_sse2_dec)
516 * void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
517 * unsigned int length);
519 ENTRY(crypto_morus640_sse2_dec_tail)
522 /* load the state: */
523 movdqu (0 * 16)(%rdi), STATE0
524 movdqu (1 * 16)(%rdi), STATE1
525 movdqu (2 * 16)(%rdi), STATE2
526 movdqu (3 * 16)(%rdi), STATE3
527 movdqu (4 * 16)(%rdi), STATE4
529 /* decrypt message: */
533 pshufd $MASK3, STATE1, T0
542 /* mask with byte count: */
548 movdqa .Lmorus640_counter, T1
552 call __morus640_update
554 /* store the state: */
555 movdqu STATE0, (0 * 16)(%rdi)
556 movdqu STATE1, (1 * 16)(%rdi)
557 movdqu STATE2, (2 * 16)(%rdi)
558 movdqu STATE3, (3 * 16)(%rdi)
559 movdqu STATE4, (4 * 16)(%rdi)
563 ENDPROC(crypto_morus640_sse2_dec_tail)
566 * void crypto_morus640_sse2_final(void *state, void *tag_xor,
567 * u64 assoclen, u64 cryptlen);
569 ENTRY(crypto_morus640_sse2_final)
572 /* load the state: */
573 movdqu (0 * 16)(%rdi), STATE0
574 movdqu (1 * 16)(%rdi), STATE1
575 movdqu (2 * 16)(%rdi), STATE2
576 movdqu (3 * 16)(%rdi), STATE3
577 movdqu (4 * 16)(%rdi), STATE4
579 /* xor state[0] into state[4]: */
582 /* prepare length block: */
587 psllq $3, MSG /* multiply by 8 (to get bit count) */
590 call __morus640_update
591 call __morus640_update
592 call __morus640_update
593 call __morus640_update
594 call __morus640_update
595 call __morus640_update
596 call __morus640_update
597 call __morus640_update
598 call __morus640_update
599 call __morus640_update
605 pshufd $MASK3, STATE1, T0
615 ENDPROC(crypto_morus640_sse2_final)