Merge tag 'xtensa-20180225' of git://github.com/jcmvbkbc/linux-xtensa
[cris-mirror.git] / arch / arm64 / crypto / sha3-ce-core.S
blob332ad75306903fea7cae56339b672f33e2be6299
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
4  *
5  * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
12 #include <linux/linkage.h>
13 #include <asm/assembler.h>
15         .irp    b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
16         .set    .Lv\b\().2d, \b
17         .set    .Lv\b\().16b, \b
18         .endr
20         /*
21          * ARMv8.2 Crypto Extensions instructions
22          */
23         .macro  eor3, rd, rn, rm, ra
24         .inst   0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
25         .endm
27         .macro  rax1, rd, rn, rm
28         .inst   0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
29         .endm
31         .macro  bcax, rd, rn, rm, ra
32         .inst   0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
33         .endm
35         .macro  xar, rd, rn, rm, imm6
36         .inst   0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
37         .endm
39         /*
40          * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
41          */
42         .text
43 ENTRY(sha3_ce_transform)
44         /* load state */
45         add     x8, x0, #32
46         ld1     { v0.1d- v3.1d}, [x0]
47         ld1     { v4.1d- v7.1d}, [x8], #32
48         ld1     { v8.1d-v11.1d}, [x8], #32
49         ld1     {v12.1d-v15.1d}, [x8], #32
50         ld1     {v16.1d-v19.1d}, [x8], #32
51         ld1     {v20.1d-v23.1d}, [x8], #32
52         ld1     {v24.1d}, [x8]
54 0:      sub     w2, w2, #1
55         mov     w8, #24
56         adr_l   x9, .Lsha3_rcon
58         /* load input */
59         ld1     {v25.8b-v28.8b}, [x1], #32
60         ld1     {v29.8b-v31.8b}, [x1], #24
61         eor     v0.8b, v0.8b, v25.8b
62         eor     v1.8b, v1.8b, v26.8b
63         eor     v2.8b, v2.8b, v27.8b
64         eor     v3.8b, v3.8b, v28.8b
65         eor     v4.8b, v4.8b, v29.8b
66         eor     v5.8b, v5.8b, v30.8b
67         eor     v6.8b, v6.8b, v31.8b
69         tbnz    x3, #6, 2f              // SHA3-512
71         ld1     {v25.8b-v28.8b}, [x1], #32
72         ld1     {v29.8b-v30.8b}, [x1], #16
73         eor      v7.8b,  v7.8b, v25.8b
74         eor      v8.8b,  v8.8b, v26.8b
75         eor      v9.8b,  v9.8b, v27.8b
76         eor     v10.8b, v10.8b, v28.8b
77         eor     v11.8b, v11.8b, v29.8b
78         eor     v12.8b, v12.8b, v30.8b
80         tbnz    x3, #4, 1f              // SHA3-384 or SHA3-224
82         // SHA3-256
83         ld1     {v25.8b-v28.8b}, [x1], #32
84         eor     v13.8b, v13.8b, v25.8b
85         eor     v14.8b, v14.8b, v26.8b
86         eor     v15.8b, v15.8b, v27.8b
87         eor     v16.8b, v16.8b, v28.8b
88         b       3f
90 1:      tbz     x3, #2, 3f              // bit 2 cleared? SHA-384
92         // SHA3-224
93         ld1     {v25.8b-v28.8b}, [x1], #32
94         ld1     {v29.8b}, [x1], #8
95         eor     v13.8b, v13.8b, v25.8b
96         eor     v14.8b, v14.8b, v26.8b
97         eor     v15.8b, v15.8b, v27.8b
98         eor     v16.8b, v16.8b, v28.8b
99         eor     v17.8b, v17.8b, v29.8b
100         b       3f
102         // SHA3-512
103 2:      ld1     {v25.8b-v26.8b}, [x1], #16
104         eor      v7.8b,  v7.8b, v25.8b
105         eor      v8.8b,  v8.8b, v26.8b
107 3:      sub     w8, w8, #1
109         eor3    v29.16b,  v4.16b,  v9.16b, v14.16b
110         eor3    v26.16b,  v1.16b,  v6.16b, v11.16b
111         eor3    v28.16b,  v3.16b,  v8.16b, v13.16b
112         eor3    v25.16b,  v0.16b,  v5.16b, v10.16b
113         eor3    v27.16b,  v2.16b,  v7.16b, v12.16b
114         eor3    v29.16b, v29.16b, v19.16b, v24.16b
115         eor3    v26.16b, v26.16b, v16.16b, v21.16b
116         eor3    v28.16b, v28.16b, v18.16b, v23.16b
117         eor3    v25.16b, v25.16b, v15.16b, v20.16b
118         eor3    v27.16b, v27.16b, v17.16b, v22.16b
120         rax1    v30.2d, v29.2d, v26.2d  // bc[0]
121         rax1    v26.2d, v26.2d, v28.2d  // bc[2]
122         rax1    v28.2d, v28.2d, v25.2d  // bc[4]
123         rax1    v25.2d, v25.2d, v27.2d  // bc[1]
124         rax1    v27.2d, v27.2d, v29.2d  // bc[3]
126         eor      v0.16b,  v0.16b, v30.16b
127         xar      v29.2d,   v1.2d,  v25.2d, (64 - 1)
128         xar       v1.2d,   v6.2d,  v25.2d, (64 - 44)
129         xar       v6.2d,   v9.2d,  v28.2d, (64 - 20)
130         xar       v9.2d,  v22.2d,  v26.2d, (64 - 61)
131         xar      v22.2d,  v14.2d,  v28.2d, (64 - 39)
132         xar      v14.2d,  v20.2d,  v30.2d, (64 - 18)
133         xar      v31.2d,   v2.2d,  v26.2d, (64 - 62)
134         xar       v2.2d,  v12.2d,  v26.2d, (64 - 43)
135         xar      v12.2d,  v13.2d,  v27.2d, (64 - 25)
136         xar      v13.2d,  v19.2d,  v28.2d, (64 - 8)
137         xar      v19.2d,  v23.2d,  v27.2d, (64 - 56)
138         xar      v23.2d,  v15.2d,  v30.2d, (64 - 41)
139         xar      v15.2d,   v4.2d,  v28.2d, (64 - 27)
140         xar      v28.2d,  v24.2d,  v28.2d, (64 - 14)
141         xar      v24.2d,  v21.2d,  v25.2d, (64 - 2)
142         xar       v8.2d,   v8.2d,  v27.2d, (64 - 55)
143         xar       v4.2d,  v16.2d,  v25.2d, (64 - 45)
144         xar      v16.2d,   v5.2d,  v30.2d, (64 - 36)
145         xar       v5.2d,   v3.2d,  v27.2d, (64 - 28)
146         xar      v27.2d,  v18.2d,  v27.2d, (64 - 21)
147         xar       v3.2d,  v17.2d,  v26.2d, (64 - 15)
148         xar      v25.2d,  v11.2d,  v25.2d, (64 - 10)
149         xar      v26.2d,   v7.2d,  v26.2d, (64 - 6)
150         xar      v30.2d,  v10.2d,  v30.2d, (64 - 3)
152         bcax    v20.16b, v31.16b, v22.16b,  v8.16b
153         bcax    v21.16b,  v8.16b, v23.16b, v22.16b
154         bcax    v22.16b, v22.16b, v24.16b, v23.16b
155         bcax    v23.16b, v23.16b, v31.16b, v24.16b
156         bcax    v24.16b, v24.16b,  v8.16b, v31.16b
158         ld1r    {v31.2d}, [x9], #8
160         bcax    v17.16b, v25.16b, v19.16b,  v3.16b
161         bcax    v18.16b,  v3.16b, v15.16b, v19.16b
162         bcax    v19.16b, v19.16b, v16.16b, v15.16b
163         bcax    v15.16b, v15.16b, v25.16b, v16.16b
164         bcax    v16.16b, v16.16b,  v3.16b, v25.16b
166         bcax    v10.16b, v29.16b, v12.16b, v26.16b
167         bcax    v11.16b, v26.16b, v13.16b, v12.16b
168         bcax    v12.16b, v12.16b, v14.16b, v13.16b
169         bcax    v13.16b, v13.16b, v29.16b, v14.16b
170         bcax    v14.16b, v14.16b, v26.16b, v29.16b
172         bcax     v7.16b, v30.16b,  v9.16b,  v4.16b
173         bcax     v8.16b,  v4.16b,  v5.16b,  v9.16b
174         bcax     v9.16b,  v9.16b,  v6.16b,  v5.16b
175         bcax     v5.16b,  v5.16b, v30.16b,  v6.16b
176         bcax     v6.16b,  v6.16b,  v4.16b, v30.16b
178         bcax     v3.16b, v27.16b,  v0.16b, v28.16b
179         bcax     v4.16b, v28.16b,  v1.16b,  v0.16b
180         bcax     v0.16b,  v0.16b,  v2.16b,  v1.16b
181         bcax     v1.16b,  v1.16b, v27.16b,  v2.16b
182         bcax     v2.16b,  v2.16b, v28.16b, v27.16b
184         eor      v0.16b,  v0.16b, v31.16b
186         cbnz    w8, 3b
187         cbnz    w2, 0b
189         /* save state */
190         st1     { v0.1d- v3.1d}, [x0], #32
191         st1     { v4.1d- v7.1d}, [x0], #32
192         st1     { v8.1d-v11.1d}, [x0], #32
193         st1     {v12.1d-v15.1d}, [x0], #32
194         st1     {v16.1d-v19.1d}, [x0], #32
195         st1     {v20.1d-v23.1d}, [x0], #32
196         st1     {v24.1d}, [x0]
197         ret
198 ENDPROC(sha3_ce_transform)
200         .section        ".rodata", "a"
201         .align          8
202 .Lsha3_rcon:
203         .quad   0x0000000000000001, 0x0000000000008082, 0x800000000000808a
204         .quad   0x8000000080008000, 0x000000000000808b, 0x0000000080000001
205         .quad   0x8000000080008081, 0x8000000000008009, 0x000000000000008a
206         .quad   0x0000000000000088, 0x0000000080008009, 0x000000008000000a
207         .quad   0x000000008000808b, 0x800000000000008b, 0x8000000000008089
208         .quad   0x8000000000008003, 0x8000000000008002, 0x8000000000000080
209         .quad   0x000000000000800a, 0x800000008000000a, 0x8000000080008081
210         .quad   0x8000000000008080, 0x0000000080000001, 0x8000000080008008