1 /* SPDX-License-Identifier: GPL-2.0 */
3 * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/linkage.h>
13 #include <asm/assembler.h>
15 .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
21 * ARMv8.2 Crypto Extensions instructions
23 .macro eor3, rd, rn, rm, ra
24 .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
27 .macro rax1, rd, rn, rm
28 .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
31 .macro bcax, rd, rn, rm, ra
32 .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
35 .macro xar, rd, rn, rm, imm6
36 .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
40 * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
43 ENTRY(sha3_ce_transform)
46 ld1 { v0.1d- v3.1d}, [x0]
47 ld1 { v4.1d- v7.1d}, [x8], #32
48 ld1 { v8.1d-v11.1d}, [x8], #32
49 ld1 {v12.1d-v15.1d}, [x8], #32
50 ld1 {v16.1d-v19.1d}, [x8], #32
51 ld1 {v20.1d-v23.1d}, [x8], #32
59 ld1 {v25.8b-v28.8b}, [x1], #32
60 ld1 {v29.8b-v31.8b}, [x1], #24
61 eor v0.8b, v0.8b, v25.8b
62 eor v1.8b, v1.8b, v26.8b
63 eor v2.8b, v2.8b, v27.8b
64 eor v3.8b, v3.8b, v28.8b
65 eor v4.8b, v4.8b, v29.8b
66 eor v5.8b, v5.8b, v30.8b
67 eor v6.8b, v6.8b, v31.8b
69 tbnz x3, #6, 2f // SHA3-512
71 ld1 {v25.8b-v28.8b}, [x1], #32
72 ld1 {v29.8b-v30.8b}, [x1], #16
73 eor v7.8b, v7.8b, v25.8b
74 eor v8.8b, v8.8b, v26.8b
75 eor v9.8b, v9.8b, v27.8b
76 eor v10.8b, v10.8b, v28.8b
77 eor v11.8b, v11.8b, v29.8b
78 eor v12.8b, v12.8b, v30.8b
80 tbnz x3, #4, 1f // SHA3-384 or SHA3-224
83 ld1 {v25.8b-v28.8b}, [x1], #32
84 eor v13.8b, v13.8b, v25.8b
85 eor v14.8b, v14.8b, v26.8b
86 eor v15.8b, v15.8b, v27.8b
87 eor v16.8b, v16.8b, v28.8b
90 1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
93 ld1 {v25.8b-v28.8b}, [x1], #32
94 ld1 {v29.8b}, [x1], #8
95 eor v13.8b, v13.8b, v25.8b
96 eor v14.8b, v14.8b, v26.8b
97 eor v15.8b, v15.8b, v27.8b
98 eor v16.8b, v16.8b, v28.8b
99 eor v17.8b, v17.8b, v29.8b
103 2: ld1 {v25.8b-v26.8b}, [x1], #16
104 eor v7.8b, v7.8b, v25.8b
105 eor v8.8b, v8.8b, v26.8b
109 eor3 v29.16b, v4.16b, v9.16b, v14.16b
110 eor3 v26.16b, v1.16b, v6.16b, v11.16b
111 eor3 v28.16b, v3.16b, v8.16b, v13.16b
112 eor3 v25.16b, v0.16b, v5.16b, v10.16b
113 eor3 v27.16b, v2.16b, v7.16b, v12.16b
114 eor3 v29.16b, v29.16b, v19.16b, v24.16b
115 eor3 v26.16b, v26.16b, v16.16b, v21.16b
116 eor3 v28.16b, v28.16b, v18.16b, v23.16b
117 eor3 v25.16b, v25.16b, v15.16b, v20.16b
118 eor3 v27.16b, v27.16b, v17.16b, v22.16b
120 rax1 v30.2d, v29.2d, v26.2d // bc[0]
121 rax1 v26.2d, v26.2d, v28.2d // bc[2]
122 rax1 v28.2d, v28.2d, v25.2d // bc[4]
123 rax1 v25.2d, v25.2d, v27.2d // bc[1]
124 rax1 v27.2d, v27.2d, v29.2d // bc[3]
126 eor v0.16b, v0.16b, v30.16b
127 xar v29.2d, v1.2d, v25.2d, (64 - 1)
128 xar v1.2d, v6.2d, v25.2d, (64 - 44)
129 xar v6.2d, v9.2d, v28.2d, (64 - 20)
130 xar v9.2d, v22.2d, v26.2d, (64 - 61)
131 xar v22.2d, v14.2d, v28.2d, (64 - 39)
132 xar v14.2d, v20.2d, v30.2d, (64 - 18)
133 xar v31.2d, v2.2d, v26.2d, (64 - 62)
134 xar v2.2d, v12.2d, v26.2d, (64 - 43)
135 xar v12.2d, v13.2d, v27.2d, (64 - 25)
136 xar v13.2d, v19.2d, v28.2d, (64 - 8)
137 xar v19.2d, v23.2d, v27.2d, (64 - 56)
138 xar v23.2d, v15.2d, v30.2d, (64 - 41)
139 xar v15.2d, v4.2d, v28.2d, (64 - 27)
140 xar v28.2d, v24.2d, v28.2d, (64 - 14)
141 xar v24.2d, v21.2d, v25.2d, (64 - 2)
142 xar v8.2d, v8.2d, v27.2d, (64 - 55)
143 xar v4.2d, v16.2d, v25.2d, (64 - 45)
144 xar v16.2d, v5.2d, v30.2d, (64 - 36)
145 xar v5.2d, v3.2d, v27.2d, (64 - 28)
146 xar v27.2d, v18.2d, v27.2d, (64 - 21)
147 xar v3.2d, v17.2d, v26.2d, (64 - 15)
148 xar v25.2d, v11.2d, v25.2d, (64 - 10)
149 xar v26.2d, v7.2d, v26.2d, (64 - 6)
150 xar v30.2d, v10.2d, v30.2d, (64 - 3)
152 bcax v20.16b, v31.16b, v22.16b, v8.16b
153 bcax v21.16b, v8.16b, v23.16b, v22.16b
154 bcax v22.16b, v22.16b, v24.16b, v23.16b
155 bcax v23.16b, v23.16b, v31.16b, v24.16b
156 bcax v24.16b, v24.16b, v8.16b, v31.16b
158 ld1r {v31.2d}, [x9], #8
160 bcax v17.16b, v25.16b, v19.16b, v3.16b
161 bcax v18.16b, v3.16b, v15.16b, v19.16b
162 bcax v19.16b, v19.16b, v16.16b, v15.16b
163 bcax v15.16b, v15.16b, v25.16b, v16.16b
164 bcax v16.16b, v16.16b, v3.16b, v25.16b
166 bcax v10.16b, v29.16b, v12.16b, v26.16b
167 bcax v11.16b, v26.16b, v13.16b, v12.16b
168 bcax v12.16b, v12.16b, v14.16b, v13.16b
169 bcax v13.16b, v13.16b, v29.16b, v14.16b
170 bcax v14.16b, v14.16b, v26.16b, v29.16b
172 bcax v7.16b, v30.16b, v9.16b, v4.16b
173 bcax v8.16b, v4.16b, v5.16b, v9.16b
174 bcax v9.16b, v9.16b, v6.16b, v5.16b
175 bcax v5.16b, v5.16b, v30.16b, v6.16b
176 bcax v6.16b, v6.16b, v4.16b, v30.16b
178 bcax v3.16b, v27.16b, v0.16b, v28.16b
179 bcax v4.16b, v28.16b, v1.16b, v0.16b
180 bcax v0.16b, v0.16b, v2.16b, v1.16b
181 bcax v1.16b, v1.16b, v27.16b, v2.16b
182 bcax v2.16b, v2.16b, v28.16b, v27.16b
184 eor v0.16b, v0.16b, v31.16b
190 st1 { v0.1d- v3.1d}, [x0], #32
191 st1 { v4.1d- v7.1d}, [x0], #32
192 st1 { v8.1d-v11.1d}, [x0], #32
193 st1 {v12.1d-v15.1d}, [x0], #32
194 st1 {v16.1d-v19.1d}, [x0], #32
195 st1 {v20.1d-v23.1d}, [x0], #32
198 ENDPROC(sha3_ce_transform)
200 .section ".rodata", "a"
203 .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a
204 .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001
205 .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a
206 .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a
207 .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089
208 .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080
209 .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081
210 .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008