Correct PPTP server firewall rules chain.
[tomato/davidwu.git] / release / src / router / nettle / x86_64 / umac-nh-n.asm
blobbcb994874ea87b16bbbd1fea7e8b4bebbff29b9c
1 C nettle, low-level cryptographics library
2 C
3 C Copyright (C) 2013 Niels Möller
4 C
5 C The nettle library is free software; you can redistribute it and/or modify
6 C it under the terms of the GNU Lesser General Public License as published by
7 C the Free Software Foundation; either version 2.1 of the License, or (at your
8 C option) any later version.
9 C
10 C The nettle library is distributed in the hope that it will be useful, but
11 C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 C License for more details.
15 C You should have received a copy of the GNU Lesser General Public License
16 C along with the nettle library; see the file COPYING.LIB. If not, write to
17 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 C MA 02111-1301, USA.
20 define(<OUT>, <%rdi>)
21 define(<ITERS>, <%rsi>)
22 define(<KEY>, <%rdx>)
23 define(<LENGTH>, <%rcx>)
24 define(<MSG>, <%r8>)
26 define(<XM0>, <%xmm0>)
27 define(<XM1>, <%xmm1>)
28 define(<XM2>, <%xmm2>)
29 define(<XM3>, <%xmm3>)
30 define(<XK0>, <%xmm4>)
31 define(<XK1>, <%xmm5>)
32 define(<XK2>, <%xmm6>)
33 define(<XK3>, <%xmm7>)
34 define(<XT0>, <%xmm8>)
35 define(<XT1>, <%xmm9>)
36 define(<XT2>, <%xmm10>)
37 define(<XT3>, <%xmm11>)
38 define(<XY0>, <%xmm12>)
39 define(<XY1>, <%xmm13>)
41 C Copy [0,1,2,3] to [1,1,3,3]
42 define(<HI2LO>, <pshufd <$>0xf5,>)
44 C FIXME: Would be nice if we could force the key array to be 16-byte
45 C aligned.
47 .file "umac-nh-n.asm"
49 C umac_nh_n(uint64_t *out, unsigned n, const uint32_t *key,
50 C unsigned length, const uint8_t *msg)
51 .text
52 ALIGN(16)
53 PROLOGUE(_nettle_umac_nh_n)
54 W64_ENTRY(5, 14)
55 pxor XY0, XY0
56 cmp $3, ITERS
57 jc .Lnh2
58 je .Lnh3
60 .Lnh4:
61 movups (KEY), XK0
62 movups 16(KEY), XT2
63 movups 32(KEY), XK2
64 lea 48(KEY), KEY
65 C Leave XK2 untouched, and put permuted keys in XK0, XK1, XT2, XT3
66 movaps XK0, XT0
67 movaps XK0, XK1
69 punpcklqdq XT2, XK0 C [0,1,4,5]
70 punpckhqdq XT2, XK1 C [2,3,6,7]
71 movaps XT2, XT3
72 punpcklqdq XK2, XT2 C [4,5, 8, 9]
73 punpckhqdq XK2, XT3 C [6,7,10,11]
75 movaps XY0, XY1
77 .Loop4:
78 movups (MSG), XT0
79 movups 16(MSG), XT1
81 pshufd $0xee, XT1, XM3 C [6,7,6,7]
82 pshufd $0x44, XT1, XM2 C [4,5,4,5]
83 pshufd $0xee, XT0, XM1 C [2,3,2,3]
84 pshufd $0x44, XT0, XM0 C [0,1,0,1]
86 paddd XM0, XK0
87 paddd XM1, XK1
88 paddd XM2, XT2
89 paddd XM3, XT3
91 HI2LO XK0, XT0
92 HI2LO XT2, XT1
93 pmuludq XK0, XT2
94 pmuludq XT0, XT1
95 paddq XT2, XY0
96 paddq XT1, XY0
98 HI2LO XK1, XT0
99 HI2LO XT3, XT1
100 pmuludq XK1, XT3
101 pmuludq XT0, XT1
102 paddq XT3, XY0
103 paddq XT1, XY0
105 movaps XK2, XK0
106 movaps XK2, XK1
107 movups (KEY), XT2
108 movups 16(KEY), XK2
109 punpcklqdq XT2, XK0 C [ 8, 9,12,13]
110 punpckhqdq XT2, XK1 C [10,11,14,15]
111 movaps XT2, XT3
113 punpcklqdq XK2, XT2 C [12,13,16,17]
114 punpckhqdq XK2, XT3 C [14,15,18,19]
116 paddd XK0, XM0
117 paddd XK1, XM1
118 paddd XT2, XM2
119 paddd XT3, XM3
121 HI2LO XM0, XT0
122 HI2LO XM2, XT1
123 pmuludq XM0, XM2
124 pmuludq XT0, XT1
125 paddq XM2, XY1
126 paddq XT1, XY1
128 HI2LO XM1, XT0
129 HI2LO XM3, XT1
130 pmuludq XM1, XM3
131 pmuludq XT0, XT1
132 paddq XM3, XY1
133 paddq XT1, XY1
135 subl $32, XREG(LENGTH)
136 lea 32(MSG), MSG
137 lea 32(KEY), KEY
138 ja .Loop4
140 movups XY0, (OUT)
141 movups XY1, 16(OUT)
143 W64_EXIT(5, 14)
146 .Lnh3:
147 movups (KEY), XK0
148 movups 16(KEY), XK1
149 movaps XY0, XY1
150 .Loop3:
151 lea 32(KEY), KEY
152 movups (MSG), XT0
153 movups 16(MSG), XT1
154 movups (KEY), XK2
155 movups 16(KEY), XK3
156 pshufd $0xee, XT1, XM3 C [6,7,6,7]
157 pshufd $0x44, XT1, XM2 C [4,5,4,5]
158 pshufd $0xee, XT0, XM1 C [2,3,2,3]
159 pshufd $0x44, XT0, XM0 C [0,1,0,1]
161 C Iteration 2
162 paddd XK2, XT0
163 paddd XK3, XT1
164 HI2LO XT0, XT2
165 HI2LO XT1, XT3
166 pmuludq XT0, XT1
167 pmuludq XT2, XT3
168 paddq XT1, XY1
169 paddq XT3, XY1
171 C Iteration 0,1
172 movaps XK0, XT0
173 punpcklqdq XK1, XK0 C [0,1,4,5]
174 punpckhqdq XK1, XT0 C [2,3,6,7]
175 paddd XK0, XM0
176 paddd XT0, XM1
177 movaps XK2, XK0
178 movaps XK1, XT0
179 punpcklqdq XK2, XK1 C [4,5,8,9]
180 punpckhqdq XK2, XT0 C [6,7,10,11]
181 paddd XK1, XM2
182 paddd XT0, XM3
184 HI2LO XM0, XT0
185 HI2LO XM2, XT1
186 pmuludq XM0, XM2
187 pmuludq XT0, XT1
188 paddq XM2, XY0
189 paddq XT1, XY0
191 HI2LO XM1, XT0
192 HI2LO XM3, XT1
193 pmuludq XM1, XM3
194 pmuludq XT0, XT1
195 paddq XM3, XY0
196 paddq XT1, XY0
197 subl $32, XREG(LENGTH)
198 lea 32(MSG), MSG
199 movaps XK2, XK0
200 movaps XK3, XK1
202 ja .Loop3
204 pshufd $0xe, XY1, XT0
205 paddq XT0, XY1
206 movups XY0, (OUT)
207 movlpd XY1, 16(OUT)
209 W64_EXIT(5, 14)
212 .Lnh2:
213 C Explode message as [0,1,0,1] [2,3,2,3] [4,5,4,5] [6,7, 6, 7]
214 C Interleave keys as [0,1,4,5] [2,3,6,7] [4,5,8,9] [7,8,10,11]
215 movups (KEY), XK0
216 lea 16(KEY), KEY
217 .Loop2:
218 movups (MSG), XM0
219 movups 16(MSG), XM1
220 pshufd $0xee, XM1, XM3 C [6,7,6,7]
221 pshufd $0x44, XM1, XM2 C [4,5,4,5]
222 pshufd $0xee, XM0, XM1 C [2,3,2,3]
223 pshufd $0x44, XM0, XM0 C [0,1,0,1]
225 movups (KEY), XK1
226 movups 16(KEY), XK2
227 movaps XK0, XT0
228 punpcklqdq XK1, XK0 C [0,1,4,5]
229 punpckhqdq XK1, XT0 C [2,3,6,7]
230 paddd XK0, XM0
231 paddd XT0, XM1
232 movaps XK2, XK0
233 movaps XK1, XT0
234 punpcklqdq XK2, XK1 C [4,5,8,9]
235 punpckhqdq XK2, XT0 C [6,7,10,11]
236 paddd XK1, XM2
237 paddd XT0, XM3
239 HI2LO XM0, XT0
240 HI2LO XM2, XT1
241 pmuludq XM0, XM2
242 pmuludq XT0, XT1
243 paddq XM2, XY0
244 paddq XT1, XY0
246 HI2LO XM1, XT0
247 HI2LO XM3, XT1
248 pmuludq XM1, XM3
249 pmuludq XT0, XT1
250 paddq XM3, XY0
251 paddq XT1, XY0
252 subl $32, XREG(LENGTH)
253 lea 32(MSG), MSG
254 lea 32(KEY), KEY
256 ja .Loop2
258 movups XY0, (OUT)
259 .Lend:
260 W64_EXIT(5, 14)
262 EPILOGUE(_nettle_umac_nh_n)