Correct PPTP server firewall rules chain.
[tomato/davidwu.git] / release / src / router / nettle / sparc64 / aes-encrypt-internal.asm
blob3aea16f645045ef8652c80f5a7d8949e7b00b8f3
1 C -*- mode: asm; asm-comment-char: ?C; -*-
2 C nettle, low-level cryptographics library
3 C
4 C Copyright (C) 2002, 2005 Niels Möller
5 C
6 C The nettle library is free software; you can redistribute it and/or modify
7 C it under the terms of the GNU Lesser General Public License as published by
8 C the Free Software Foundation; either version 2.1 of the License, or (at your
9 C option) any later version.
11 C The nettle library is distributed in the hope that it will be useful, but
12 C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 C License for more details.
16 C You should have received a copy of the GNU Lesser General Public License
17 C along with the nettle library; see the file COPYING.LIB. If not, write to
18 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 C MA 02111-1301, USA.
21 C The only difference between this code and the sparc32 code is the
22 C frame offsets, and the magic BIAS when accessing the stack (which
23 C doesn't matter, since we don't access any data on the stack).
26 C Use the same AES macros as on sparc32.
27 include_src(sparc32/aes.m4)
29 C Arguments
30 define(<CTX>, <%i0>)
31 define(<T>, <%i1>)
32 define(<LENGTH>,<%i2>)
33 define(<DST>, <%i3>)
34 define(<SRC>, <%i4>)
36 C AES state, two copies for unrolling
38 define(<W0>, <%l0>)
39 define(<W1>, <%l1>)
40 define(<W2>, <%l2>)
41 define(<W3>, <%l3>)
43 define(<X0>, <%l4>)
44 define(<X1>, <%l5>)
45 define(<X2>, <%l6>)
46 define(<X3>, <%l7>)
48 C %o0-%03 are used for loop invariants T0-T3
49 define(<KEY>, <%o4>)
50 define(<ROUND>, <%o5>)
52 C %g1, %g2, %g3 are TMP1, TMP2 and TMP3
54 C The sparc64 stack frame looks like
56 C %fp - 8: OS-dependent link field
57 C %fp - 16: OS-dependent link field
58 C %fp - 192: OS register save area (22*8 == 176 bytes)
59 define(<FRAME_SIZE>, 192)
61 .file "aes-encrypt-internal.asm"
63 C _aes_encrypt(struct aes_context *ctx,
64 C const struct aes_table *T,
65 C unsigned length, uint8_t *dst,
66 C uint8_t *src)
68 .section ".text"
69 .align 16
70 .proc 020
72 PROLOGUE(_nettle_aes_encrypt)
74 save %sp, -FRAME_SIZE, %sp
75 cmp LENGTH, 0
76 be .Lend
78 C Loop invariants
79 add T, AES_TABLE0, T0
80 add T, AES_TABLE1, T1
81 add T, AES_TABLE2, T2
82 add T, AES_TABLE3, T3
84 .Lblock_loop:
85 C Read src, and add initial subkey
86 add CTX, AES_KEYS, KEY
87 AES_LOAD(0, SRC, KEY, W0)
88 AES_LOAD(1, SRC, KEY, W1)
89 AES_LOAD(2, SRC, KEY, W2)
90 AES_LOAD(3, SRC, KEY, W3)
92 C Must be even, and includes the final round
93 ld [AES_NROUNDS + CTX], ROUND
94 add SRC, 16, SRC
95 add KEY, 16, KEY
97 srl ROUND, 1, ROUND
98 C Last two rounds handled specially
99 sub ROUND, 1, ROUND
100 .Lround_loop:
101 C The AES_ROUND macro uses T0,... T3
102 C Transform W -> X
103 AES_ROUND(0, W0, W1, W2, W3, KEY, X0)
104 AES_ROUND(1, W1, W2, W3, W0, KEY, X1)
105 AES_ROUND(2, W2, W3, W0, W1, KEY, X2)
106 AES_ROUND(3, W3, W0, W1, W2, KEY, X3)
108 C Transform X -> W
109 AES_ROUND(4, X0, X1, X2, X3, KEY, W0)
110 AES_ROUND(5, X1, X2, X3, X0, KEY, W1)
111 AES_ROUND(6, X2, X3, X0, X1, KEY, W2)
112 AES_ROUND(7, X3, X0, X1, X2, KEY, W3)
114 subcc ROUND, 1, ROUND
115 bne .Lround_loop
116 add KEY, 32, KEY
118 C Penultimate round
119 AES_ROUND(0, W0, W1, W2, W3, KEY, X0)
120 AES_ROUND(1, W1, W2, W3, W0, KEY, X1)
121 AES_ROUND(2, W2, W3, W0, W1, KEY, X2)
122 AES_ROUND(3, W3, W0, W1, W2, KEY, X3)
124 add KEY, 16, KEY
125 C Final round
126 AES_FINAL_ROUND(0, T, X0, X1, X2, X3, KEY, DST)
127 AES_FINAL_ROUND(1, T, X1, X2, X3, X0, KEY, DST)
128 AES_FINAL_ROUND(2, T, X2, X3, X0, X1, KEY, DST)
129 AES_FINAL_ROUND(3, T, X3, X0, X1, X2, KEY, DST)
131 subcc LENGTH, 16, LENGTH
132 bne .Lblock_loop
133 add DST, 16, DST
135 .Lend:
137 restore
138 EPILOGUE(_nettle_aes_encrypt)
140 C Stats for AES 128 on sellafield.lysator.liu.se (UE450, 296 MHz)
142 C 1. nettle-1.13 C-code (nettle-1.13 assembler was broken for sparc64)
143 C 2. New C-code
144 C 3. New assembler code (basically the same as for sparc32)
146 C MB/s cycles/block
147 C 1 0.8 5781
148 C 2 1.8 2460
149 C 3 8.2 548