Correct PPTP server firewall rules chain.
[tomato/davidwu.git] / release / src / router / nettle / arm / neon / sha3-permute.asm
blobbeee09f757350e2e8bcf3d7bb03f287fa274a099
1 C nettle, low-level cryptographics library
2 C
3 C Copyright (C) 2013 Niels Möller
4 C
5 C The nettle library is free software; you can redistribute it and/or modify
6 C it under the terms of the GNU Lesser General Public License as published by
7 C the Free Software Foundation; either version 2.1 of the License, or (at your
8 C option) any later version.
9 C
10 C The nettle library is distributed in the hope that it will be useful, but
11 C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 C License for more details.
15 C You should have received a copy of the GNU Lesser General Public License
16 C along with the nettle library; see the file COPYING.LIB. If not, write to
17 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 C MA 02111-1301, USA.
20 .file "sha3-permute.asm"
21 .fpu neon
23 define(<CTX>, <r0>)
24 define(<COUNT>, <r1>)
25 define(<RC>, <r2>)
26 C First column
27 define(<A0>, <d0>)
28 define(<A5>, <d2>)
29 define(<A10>, <d3>)
30 define(<A15>, <d4>)
31 define(<A20>, <d5>)
33 define(<A1>, <d6>)
34 define(<A2>, <d7>)
35 define(<A3>, <d8>)
36 define(<A4>, <d9>)
38 define(<A6>, <d16>)
39 define(<A7>, <d17>)
40 define(<A8>, <d18>)
41 define(<A9>, <d19>)
43 define(<A11>, <d20>)
44 define(<A12>, <d21>)
45 define(<A13>, <d22>)
46 define(<A14>, <d23>)
48 define(<A16>, <d24>)
49 define(<A17>, <d25>)
50 define(<A18>, <d26>)
51 define(<A19>, <d27>)
53 define(<A21>, <d28>)
54 define(<A22>, <d29>)
55 define(<A23>, <d30>)
56 define(<A24>, <d31>)
58 define(<T0>, <d10>)
59 define(<T1>, <d11>)
61 define(<C0>, <d1>)
62 define(<C1>, <d12>)
63 define(<C2>, <d13>)
64 define(<C3>, <d14>)
65 define(<C4>, <d15>)
68 C ROL(DST, SRC, COUNT)
69 C Must have SRC != DST
70 define(<ROL>, <
71 vshr.u64 $1, $2, #eval(64-$3)
72 vsli.i64 $1, $2, #$3
74 C sha3_permute(struct sha3_ctx *ctx)
76 .text
77 .align 3
78 .Lrc:
79 .quad 0x0000000000000001
80 .quad 0x0000000000008082
81 .quad 0x800000000000808A
82 .quad 0x8000000080008000
83 .quad 0x000000000000808B
84 .quad 0x0000000080000001
85 .quad 0x8000000080008081
86 .quad 0x8000000000008009
87 .quad 0x000000000000008A
88 .quad 0x0000000000000088
89 .quad 0x0000000080008009
90 .quad 0x000000008000000A
91 .quad 0x000000008000808B
92 .quad 0x800000000000008B
93 .quad 0x8000000000008089
94 .quad 0x8000000000008003
95 .quad 0x8000000000008002
96 .quad 0x8000000000000080
97 .quad 0x000000000000800A
98 .quad 0x800000008000000A
99 .quad 0x8000000080008081
100 .quad 0x8000000000008080
101 .quad 0x0000000080000001
102 .quad 0x8000000080008008
104 PROLOGUE(nettle_sha3_permute)
105 vpush {d8-d15}
107 vld1.64 {A0}, [CTX]!
108 vldm CTX!, {A1,A2,A3,A4}
109 vld1.64 {A5}, [CTX]!
110 vldm CTX!, {A6,A7,A8,A9}
111 vld1.64 {A10}, [CTX]!
112 vldm CTX!, {A11,A12,A13,A14}
113 vld1.64 {A15}, [CTX]!
114 vldm CTX!, {A16,A17,A18,A19}
115 vld1.64 {A20}, [CTX]!
116 vldm CTX, {A21,A22,A23,A24}
117 sub CTX, CTX, #168
119 mov COUNT, #24
120 adr RC, .Lrc
122 .align 3
123 .Loop:
124 veor QREG(T0), QREG(A5), QREG(A15)
125 veor C0, A0, T0
126 veor C0, C0, T1
127 veor QREG(C1), QREG(A1), QREG(A6)
128 veor QREG(C1), QREG(C1), QREG(A11)
129 veor QREG(C1), QREG(C1), QREG(A16)
130 veor QREG(C1), QREG(C1), QREG(A21)
132 veor QREG(C3), QREG(A3), QREG(A8)
133 veor QREG(C3), QREG(C3), QREG(A13)
134 veor QREG(C3), QREG(C3), QREG(A18)
135 veor QREG(C3), QREG(C3), QREG(A23)
137 C D0 = C4 ^ (C1 <<< 1)
138 C NOTE: Using ROL macro (and vsli) is slightly slower.
139 vshl.i64 T0, C1, #1
140 vshr.u64 T1, C1, #63
141 veor T0, T0, C4
142 veor T0, T0, T1
143 vmov T1, T0
144 veor A0, A0, T0
145 veor QREG(A5), QREG(A5), QREG(T0)
146 veor QREG(A15), QREG(A15), QREG(T0)
148 C D1 = C0 ^ (C2 <<< 1)
149 C D2 = C1 ^ (C3 <<< 1)
150 ROL(T0, C2, 1)
151 ROL(T1, C3, 1)
152 veor T0, T0, C0
153 veor T1, T1, C1
154 veor QREG(A1), QREG(A1), QREG(T0)
155 veor QREG(A6), QREG(A6), QREG(T0)
156 veor QREG(A11), QREG(A11), QREG(T0)
157 veor QREG(A16), QREG(A16), QREG(T0)
158 veor QREG(A21), QREG(A21), QREG(T0)
160 C D3 = C2 ^ (C4 <<< 1)
161 C D4 = C3 ^ (C0 <<< 1)
162 ROL(T0, C4, 1)
163 ROL(T1, C0, 1)
164 veor T0, T0, C2
165 veor T1, T1, C3
166 veor QREG(A3), QREG(A3), QREG(T0)
167 veor QREG(A8), QREG(A8), QREG(T0)
168 veor QREG(A13), QREG(A13), QREG(T0)
169 veor QREG(A18), QREG(A18), QREG(T0)
170 veor QREG(A23), QREG(A23), QREG(T0)
172 ROL( T0, A1, 1)
173 ROL( A1, A6, 44)
174 ROL( A6, A9, 20)
175 ROL( A9, A22, 61)
176 ROL(A22, A14, 39)
177 ROL(A14, A20, 18)
178 ROL(A20, A2, 62)
179 ROL( A2, A12, 43)
180 ROL(A12, A13, 25)
181 ROL(A13, A19, 8)
182 ROL(A19, A23, 56)
183 ROL(A23, A15, 41)
184 ROL(A15, A4, 27)
185 ROL( A4, A24, 14)
186 ROL(A24, A21, 2)
187 ROL(A21, A8, 55)
188 ROL( A8, A16, 45)
189 ROL(A16, A5, 36)
190 ROL( A5, A3, 28)
191 ROL( A3, A18, 21)
192 ROL(A18, A17, 15)
193 ROL(A17, A11, 10)
194 ROL(A11, A7, 6)
195 ROL( A7, A10, 3)
196 C New A10 value left in T0
198 vbic C0, A2, A1
199 vbic C1, A3, A2
200 vbic C2, A4, A3
201 vbic C3, A0, A4
202 vbic C4, A1, A0
204 veor A0, A0, C0
205 vld1.64 {C0}, [RC :64]!
206 veor QREG(A1), QREG(A1), QREG(C1)
207 veor QREG(A3), QREG(A3), QREG(C3)
208 veor A0, A0, C0
210 vbic C0, A7, A6
211 vbic C1, A8, A7
212 vbic C2, A9, A8
213 vbic C3, A5, A9
214 vbic C4, A6, A5
216 veor A5, A5, C0
217 veor QREG(A6), QREG(A6), QREG(C1)
218 veor QREG(A8), QREG(A8), QREG(C3)
220 vbic C0, A12, A11
221 vbic C1, A13, A12
222 vbic C2, A14, A13
223 vbic C3, T0, A14
224 vbic C4, A11, T0
226 veor A10, T0, C0
227 veor QREG(A11), QREG(A11), QREG(C1)
228 veor QREG(A13), QREG(A13), QREG(C3)
230 vbic C0, A17, A16
231 vbic C1, A18, A17
232 vbic C2, A19, A18
233 vbic C3, A15, A19
234 vbic C4, A16, A15
236 veor A15, A15, C0
237 veor QREG(A16), QREG(A16), QREG(C1)
238 veor QREG(A18), QREG(A18), QREG(C3)
240 vbic C0, A22, A21
241 vbic C1, A23, A22
242 vbic C2, A24, A23
243 vbic C3, A20, A24
244 vbic C4, A21, A20
246 subs COUNT, COUNT, #1
247 veor A20, A20, C0
248 veor QREG(A21), QREG(A21), QREG(C1)
249 veor QREG(A23), QREG(A23), QREG(C3)
251 bne .Loop
253 vst1.64 {A0}, [CTX]!
254 vstm CTX!, {A1,A2,A3,A4}
255 vst1.64 {A5}, [CTX]!
256 vstm CTX!, {A6,A7,A8,A9}
257 vst1.64 {A10}, [CTX]!
258 vstm CTX!, {A11,A12,A13,A14}
259 vst1.64 {A15}, [CTX]!
260 vstm CTX!, {A16,A17,A18,A19}
261 vst1.64 {A20}, [CTX]!
262 vstm CTX, {A21,A22,A23,A24}
264 vpop {d8-d15}
265 bx lr
266 EPILOGUE(nettle_sha3_permute)