Correct PPTP server firewall rules chain.
[tomato/davidwu.git] / release / src / router / nettle / x86_64 / salsa20-core-internal.asm
blob0e0cdf6ac0eeccf32cddd5d5a30ef3bf770e8082
1 C nettle, low-level cryptographics library
2 C
3 C Copyright (C) 2012 Niels Möller
4 C
5 C The nettle library is free software; you can redistribute it and/or modify
6 C it under the terms of the GNU Lesser General Public License as published by
7 C the Free Software Foundation; either version 2.1 of the License, or (at your
8 C option) any later version.
9 C
10 C The nettle library is distributed in the hope that it will be useful, but
11 C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 C License for more details.
15 C You should have received a copy of the GNU Lesser General Public License
16 C along with the nettle library; see the file COPYING.LIB. If not, write to
17 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 C MA 02111-1301, USA.
20 define(<DST>, <%rdi>)
21 define(<SRC>, <%rsi>)
22 define(<COUNT>, <%rdx>)
23 define(<X0>, <%xmm0>)
24 define(<X1>, <%xmm1>)
25 define(<X2>, <%xmm2>)
26 define(<X3>, <%xmm3>)
27 define(<T0>, <%xmm4>)
28 define(<T1>, <%xmm5>)
29 define(<M0101>, <%xmm6>)
30 define(<M0110>, <%xmm7>)
31 define(<M0011>, <%xmm8>)
33 include_src(<x86_64/salsa20.m4>)
35 C _salsa20_core(uint32_t *dst, const uint32_t *src, unsigned rounds)
36 .text
37 ALIGN(16)
38 PROLOGUE(_nettle_salsa20_core)
39 W64_ENTRY(3, 9)
41 C Load mask registers
42 mov $-1, %eax
43 movd %eax, M0101
44 pshufd $0x09, M0101, M0011 C 01 01 00 00
45 pshufd $0x41, M0101, M0110 C 01 00 00 01
46 pshufd $0x22, M0101, M0101 C 01 00 01 00
48 movups (SRC), X0
49 movups 16(SRC), X1
50 movups 32(SRC), X2
51 movups 48(SRC), X3
53 C The original rows are now diagonals.
54 SWAP(X0, X1, M0101)
55 SWAP(X2, X3, M0101)
56 SWAP(X1, X3, M0110)
57 SWAP(X0, X2, M0011)
59 shrl $1, XREG(COUNT)
61 ALIGN(16)
62 .Loop:
63 QROUND(X0, X1, X2, X3)
64 pshufd $0x93, X1, X1 C 11 00 01 10 (least sign. left)
65 pshufd $0x4e, X2, X2 C 10 11 00 01
66 pshufd $0x39, X3, X3 C 01 10 11 00
68 QROUND(X0, X3, X2, X1)
70 C Inverse rotation of the rows
71 pshufd $0x39, X1, X1 C 01 10 11 00
72 pshufd $0x4e, X2, X2 C 10 11 00 01
73 pshufd $0x93, X3, X3 C 11 00 01 10
75 decl XREG(COUNT)
76 jnz .Loop
78 SWAP(X0, X2, M0011)
79 SWAP(X1, X3, M0110)
80 SWAP(X0, X1, M0101)
81 SWAP(X2, X3, M0101)
83 movups (SRC), T0
84 movups 16(SRC), T1
85 paddd T0, X0
86 paddd T1, X1
87 movups X0,(DST)
88 movups X1,16(DST)
89 movups 32(SRC), T0
90 movups 48(SRC), T1
91 paddd T0, X2
92 paddd T1, X3
93 movups X2,32(DST)
94 movups X3,48(DST)
96 W64_EXIT(3, 9)
97 ret
98 EPILOGUE(_nettle_salsa20_core)