Merge tag 'platform-drivers-x86-v4.9-2' of git://git.infradead.org/users/dvhart/linux...
[linux/fpc-iii.git] / arch / arm64 / crypto / ghash-ce-core.S
blobdc457015884e04345ea814d0aa1d274793ad7bc4
1 /*
2  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
3  *
4  * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation.
9  */
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
14         SHASH   .req    v0
15         SHASH2  .req    v1
16         T1      .req    v2
17         T2      .req    v3
18         MASK    .req    v4
19         XL      .req    v5
20         XM      .req    v6
21         XH      .req    v7
22         IN1     .req    v7
24         .text
25         .arch           armv8-a+crypto
27         /*
28          * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
29          *                         struct ghash_key const *k, const char *head)
30          */
31 ENTRY(pmull_ghash_update)
32         ld1             {SHASH.16b}, [x3]
33         ld1             {XL.16b}, [x1]
34         movi            MASK.16b, #0xe1
35         ext             SHASH2.16b, SHASH.16b, SHASH.16b, #8
36         shl             MASK.2d, MASK.2d, #57
37         eor             SHASH2.16b, SHASH2.16b, SHASH.16b
39         /* do the head block first, if supplied */
40         cbz             x4, 0f
41         ld1             {T1.2d}, [x4]
42         b               1f
44 0:      ld1             {T1.2d}, [x2], #16
45         sub             w0, w0, #1
47 1:      /* multiply XL by SHASH in GF(2^128) */
48 CPU_LE( rev64           T1.16b, T1.16b  )
50         ext             T2.16b, XL.16b, XL.16b, #8
51         ext             IN1.16b, T1.16b, T1.16b, #8
52         eor             T1.16b, T1.16b, T2.16b
53         eor             XL.16b, XL.16b, IN1.16b
55         pmull2          XH.1q, SHASH.2d, XL.2d          // a1 * b1
56         eor             T1.16b, T1.16b, XL.16b
57         pmull           XL.1q, SHASH.1d, XL.1d          // a0 * b0
58         pmull           XM.1q, SHASH2.1d, T1.1d         // (a1 + a0)(b1 + b0)
60         ext             T1.16b, XL.16b, XH.16b, #8
61         eor             T2.16b, XL.16b, XH.16b
62         eor             XM.16b, XM.16b, T1.16b
63         eor             XM.16b, XM.16b, T2.16b
64         pmull           T2.1q, XL.1d, MASK.1d
66         mov             XH.d[0], XM.d[1]
67         mov             XM.d[1], XL.d[0]
69         eor             XL.16b, XM.16b, T2.16b
70         ext             T2.16b, XL.16b, XL.16b, #8
71         pmull           XL.1q, XL.1d, MASK.1d
72         eor             T2.16b, T2.16b, XH.16b
73         eor             XL.16b, XL.16b, T2.16b
75         cbnz            w0, 0b
77         st1             {XL.16b}, [x1]
78         ret
79 ENDPROC(pmull_ghash_update)