Correct PPTP server firewall rules chain.
[tomato/davidwu.git] / release / src / router / nettle / arm / neon / salsa20-core-internal.asm
blobfe26e5c50c90cd1dd363fb99faec126b4d7cb104
1 C nettle, low-level cryptographics library
2 C
3 C Copyright (C) 2013 Niels Möller
4 C
5 C The nettle library is free software; you can redistribute it and/or modify
6 C it under the terms of the GNU Lesser General Public License as published by
7 C the Free Software Foundation; either version 2.1 of the License, or (at your
8 C option) any later version.
9 C
10 C The nettle library is distributed in the hope that it will be useful, but
11 C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 C License for more details.
15 C You should have received a copy of the GNU Lesser General Public License
16 C along with the nettle library; see the file COPYING.LIB. If not, write to
17 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 C MA 02111-1301, USA.
20 .file "salsa20-core-internal.asm"
21 .fpu neon
23 define(<DST>, <r0>)
24 define(<SRC>, <r1>)
25 define(<ROUNDS>, <r2>)
27 define(<X0>, <q0>)
28 define(<X1>, <q1>)
29 define(<X2>, <q2>)
30 define(<X3>, <q3>)
31 define(<T0>, <q8>)
32 define(<T1>, <q9>)
33 define(<M0101>, <q10>)
34 define(<M0110>, <q11>)
35 define(<M0011>, <q12>)
36 define(<S1>, <q13>)
37 define(<S2>, <q14>)
38 define(<S3>, <q15>)
40 define(<QROUND>, <
41 vadd.i32 T0, $1, $4
42 vshl.i32 T1, T0, #7
43 vshr.u32 T0, T0, #25
44 veor $2, $2, T0
45 veor $2, $2, T1
47 vadd.i32 T0, $1, $2
48 vshl.i32 T1, T0, #9
49 vshr.u32 T0, T0, #23
50 veor $3, $3, T0
51 veor $3, $3, T1
53 vadd.i32 T0, $2, $3
54 vshl.i32 T1, T0, #13
55 vshr.u32 T0, T0, #19
56 veor $4, $4, T0
57 veor $4, $4, T1
59 vadd.i32 T0, $3, $4
60 vshl.i32 T1, T0, #18
61 vshr.u32 T0, T0, #14
62 veor $1, $1, T0
63 veor $1, $1, T1
66 .text
67 .align 4
68 .Lmasks:
69 .int 0,-1, 0,-1
70 .int 0,-1,-1, 0
71 .int 0, 0,-1,-1
73 C _salsa20_core(uint32_t *dst, const uint32_t *src, unsigned rounds)
75 PROLOGUE(_nettle_salsa20_core)
76 vldm SRC, {X0,X1,X2,X3}
78 C Input rows:
79 C 0 1 2 3 X0
80 C 4 5 6 7 X1
81 C 8 9 10 11 X2
82 C 12 13 14 15 X3
83 C Permuted to:
84 C 0 5 10 15
85 C 4 9 14 3
86 C 8 13 2 7
87 C 12 1 6 11
89 C FIXME: Construct in some other way?
90 adr r12, .Lmasks
91 vldm r12, {M0101, M0110, M0011}
93 vmov S1, X1
94 vmov S2, X2
95 vmov S3, X3
97 C Swaps in columns 1, 3:
98 C 0 5 2 7 X0 ^
99 C 4 1 6 3 T0 v
100 C 8 13 10 15 T1 ^
101 C 12 9 14 11 X3 v
102 vmov T0, X1
103 vmov T1, X2
104 vbit T0, X0, M0101
105 vbit X0, X1, M0101
106 vbit T1, X3, M0101
107 vbit X3, X2, M0101
109 C Swaps in column 1, 2:
110 C 0 5 2 7 X0
111 C 4 9 14 3 X1 ^
112 C 8 13 10 15 T1 |
113 C 12 1 6 11 X3 v
114 vmov X1, T0
115 vbit X1, X3, M0110
116 vbit X3, T0, M0110
118 C Swaps in columm 2,3:
119 C 0 5 10 15 X0 ^
120 C 4 9 14 3 X1 |
121 C 8 13 2 7 X2 v
122 C 12 1 6 11 X3
123 vmov X2, T1
124 vbit X2, X0, M0011
125 vbit X0, T1, M0011
127 .Loop:
128 QROUND(X0, X1, X2, X3)
130 C Rotate rows, to get
131 C 0 5 10 15
132 C 3 4 9 14 >>> 1
133 C 2 7 8 13 >>> 2
134 C 1 6 11 12 >>> 3
135 vext.32 X1, X1, X1, #3
136 vext.32 X2, X2, X2, #2
137 vext.32 X3, X3, X3, #1
139 QROUND(X0, X3, X2, X1)
141 subs ROUNDS, ROUNDS, #2
142 C Inverse rotation
143 vext.32 X1, X1, X1, #1
144 vext.32 X2, X2, X2, #2
145 vext.32 X3, X3, X3, #3
147 bhi .Loop
149 C Inverse swaps
150 vmov T1, X2
151 vbit T1, X0, M0011
152 vbit X0, X2, M0011
154 vmov T0, X1
155 vbit T0, X3, M0110
156 vbit X3, X1, M0110
158 vmov X1, T0
159 vmov X2, T1
160 vbit X1, X0, M0101
161 vbit X0, T0, M0101
162 vbit X2, X3, M0101
163 vbit X3, T1, M0101
165 vld1.64 {T0}, [SRC]
166 vadd.u32 X0, X0, T0
167 vadd.u32 X1, X1, S1
168 vadd.u32 X2, X2, S2
169 vadd.u32 X3, X3, S3
171 vstm DST, {X0,X1,X2,X3}
172 bx lr
173 EPILOGUE(_nettle_salsa20_core)
175 divert(-1)
176 define salsastate
177 p/x $q0.u32
178 p/x $q1.u32
179 p/x $q2.u32
180 p/x $q3.u32