Correct PPTP server firewall rules chain.
[tomato/davidwu.git] / release / src / router / nettle / sparc64 / arcfour-crypt.asm
bloba5cd942c212cfb8076e4c150e7c8aa44e00bc415
1 C -*- mode: asm; asm-comment-char: ?C; -*-
2 C nettle, low-level cryptographics library
3 C
4 C Copyright (C) 2002, 2005 Niels Möller
5 C
6 C The nettle library is free software; you can redistribute it and/or modify
7 C it under the terms of the GNU Lesser General Public License as published by
8 C the Free Software Foundation; either version 2.1 of the License, or (at your
9 C option) any later version.
11 C The nettle library is distributed in the hope that it will be useful, but
12 C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 C License for more details.
16 C You should have received a copy of the GNU Lesser General Public License
17 C along with the nettle library; see the file COPYING.LIB. If not, write to
18 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 C MA 02111-1301, USA.
21 C Define to YES, to enable the complex code to special case SRC
22 C and DST with compatible alignment.
24 define(<WITH_ALIGN>, <YES>)
26 C Registers
28 define(<CTX>, <%i0>)
29 define(<LENGTH>,<%i1>)
30 define(<DST>, <%i2>)
31 define(<SRC>, <%i3>)
33 define(<I1>, <%i4>)
34 define(<I2>, <%i5>)
35 define(<J>, <%g1>)
36 define(<SI>, <%g2>)
37 define(<SJ>, <%g3>)
38 define(<TMP>, <%o0>)
39 define(<TMP2>, <%o1>)
40 define(<N>, <%o2>)
41 define(<DATA>, <%o3>)
43 C Computes the next byte of the key stream. As input, i must
44 C already point to the index for the current access, the index
45 C for the next access is stored in ni. The resulting key byte is
46 C stored in res.
47 C ARCFOUR_BYTE(i, ni, res)
48 define(<ARCFOUR_BYTE>, <
49 ldub [CTX + $1], SI
50 add $1, 1, $2
51 add J, SI, J
52 and J, 0xff, J
53 ldub [CTX + J], SJ
54 and $2, 0xff, $2
55 stb SI, [CTX + J]
56 add SI, SJ, SI
57 and SI, 0xff, SI
58 stb SJ, [CTX + $1]
59 ldub [CTX + SI], $3
60 >)dnl
62 define(<FRAME_SIZE>, 192)
64 .file "arcfour-crypt.asm"
66 C arcfour_crypt(struct arcfour_ctx *ctx,
67 C unsigned length, uint8_t *dst,
68 C const uint8_t *src)
70 .section ".text"
71 .align 16
72 .proc 020
74 PROLOGUE(nettle_arcfour_crypt)
76 save %sp, -FRAME_SIZE, %sp
77 cmp LENGTH, 0
78 be .Lend
79 nop
81 C Load both I and J
82 lduh [CTX + ARCFOUR_I], I1
83 and I1, 0xff, J
84 srl I1, 8, I1
86 C We want an even address for DST
87 andcc DST, 1, %g0
88 add I1, 1 ,I1
89 beq .Laligned2
90 and I1, 0xff, I1
92 mov I1, I2
93 ldub [SRC], DATA
94 ARCFOUR_BYTE(I2, I1, TMP)
95 subcc LENGTH, 1, LENGTH
96 add SRC, 1, SRC
97 xor DATA, TMP, DATA
98 stb DATA, [DST]
99 beq .Ldone
100 add DST, 1, DST
102 .Laligned2:
104 cmp LENGTH, 2
105 blu .Lfinal1
106 C Harmless delay slot instruction
107 andcc DST, 2, %g0
108 beq .Laligned4
111 ldub [SRC], DATA
112 ARCFOUR_BYTE(I1, I2, TMP)
113 ldub [SRC + 1], TMP2
114 add SRC, 2, SRC
115 xor DATA, TMP, DATA
116 sll DATA, 8, DATA
118 ARCFOUR_BYTE(I2, I1, TMP)
119 xor TMP2, TMP, TMP
120 subcc LENGTH, 2, LENGTH
121 or DATA, TMP, DATA
123 sth DATA, [DST]
124 beq .Ldone
125 add DST, 2, DST
127 .Laligned4:
128 cmp LENGTH, 4
129 blu .Lfinal2
130 C Harmless delay slot instruction
131 srl LENGTH, 2, N
133 .Loop:
134 C Main loop, with aligned writes
136 C FIXME: Could check if SRC is aligned, and
137 C use 32-bit reads in that case.
139 ldub [SRC], DATA
140 ARCFOUR_BYTE(I1, I2, TMP)
141 ldub [SRC + 1], TMP2
142 xor TMP, DATA, DATA
143 sll DATA, 8, DATA
145 ARCFOUR_BYTE(I2, I1, TMP)
146 xor TMP2, TMP, TMP
147 ldub [SRC + 2], TMP2
148 or TMP, DATA, DATA
149 sll DATA, 8, DATA
151 ARCFOUR_BYTE(I1, I2, TMP)
152 xor TMP2, TMP, TMP
153 ldub [SRC + 3], TMP2
154 or TMP, DATA, DATA
155 sll DATA, 8, DATA
157 ARCFOUR_BYTE(I2, I1, TMP)
158 xor TMP2, TMP, TMP
159 or TMP, DATA, DATA
160 subcc N, 1, N
161 add SRC, 4, SRC
162 st DATA, [DST]
163 bne .Loop
164 add DST, 4, DST
166 andcc LENGTH, 3, LENGTH
167 beq .Ldone
170 .Lfinal2:
171 C DST address must be 2-aligned
172 cmp LENGTH, 2
173 blu .Lfinal1
176 ldub [SRC], DATA
177 ARCFOUR_BYTE(I1, I2, TMP)
178 ldub [SRC + 1], TMP2
179 add SRC, 2, SRC
180 xor DATA, TMP, DATA
181 sll DATA, 8, DATA
183 ARCFOUR_BYTE(I2, I1, TMP)
184 xor TMP2, TMP, TMP
185 or DATA, TMP, DATA
187 sth DATA, [DST]
188 beq .Ldone
189 add DST, 2, DST
191 .Lfinal1:
192 mov I1, I2
193 ldub [SRC], DATA
194 ARCFOUR_BYTE(I2, I1, TMP)
195 xor DATA, TMP, DATA
196 stb DATA, [DST]
198 .Ldone:
199 C Save back I and J
200 sll I2, 8, I2
201 or I2, J, I2
202 stuh I2, [CTX + ARCFOUR_I]
204 .Lend:
206 restore
208 EPILOGUE(nettle_arcfour_crypt)
210 C Stats for AES 128 on sellafield.lysator.liu.se (UE450, 296 MHz)
212 C 1: nettle-1.13 C-code
213 C 2: New assembler code (basically the same as for sparc32)
215 C MB/s cycles/byte
216 C 1: 3.6 77.7
217 C 2: 21.8 13.0