1 C
-*- mode: asm
; asm-comment-char: ?C; -*-
2 C nettle
, low-level cryptographics library
4 C Copyright
(C
) 2002, 2005 Niels Möller
6 C The nettle library is free software
; you can redistribute it and/or modify
7 C it under the terms of the GNU Lesser General
Public License as published by
8 C the Free Software Foundation
; either version 2.1 of the License, or (at your
9 C option
) any later version.
11 C The nettle library is distributed
in the hope that it will be useful
, but
12 C WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
13 C
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
Public
14 C License for more details.
16 C You should have received a copy of the GNU Lesser General
Public License
17 C along with the nettle library
; see the file COPYING.LIB. If not, write to
18 C the Free Software Foundation
, Inc.
, 51 Franklin Street
, Fifth Floor
, Boston
,
21 C Define to YES
, to enable the complex code to special case SRC
22 C
and DST with compatible alignment.
24 define
(<WITH_ALIGN
>, <YES
>)
29 define
(<LENGTH>,<%i1
>)
43 C Computes the next
byte of the key stream. As input
, i must
44 C already point to the index for the current access
, the index
45 C for the next access is stored
in ni. The resulting key
byte is
47 C ARCFOUR_BYTE
(i
, ni
, res
)
48 define
(<ARCFOUR_BYTE
>, <
62 C
FIXME: Consider using the callers window
63 define
(<FRAME_SIZE
>, 104)
65 .file
"arcfour-crypt.asm"
67 C arcfour_crypt
(struct arcfour_ctx
*ctx
,
68 C unsigned
length, uint8_t
*dst
,
75 PROLOGUE
(nettle_arcfour_crypt
)
77 save
%sp, -FRAME_SIZE
, %sp
83 lduh
[CTX
+ ARCFOUR_I
], I1
87 C We want an even address for DST
95 ARCFOUR_BYTE
(I2
, I1
, TMP
)
96 subcc
LENGTH, 1, LENGTH
107 C Harmless delay slot instruction
113 ARCFOUR_BYTE
(I1
, I2
, TMP
)
119 ARCFOUR_BYTE
(I2
, I1
, TMP
)
121 subcc
LENGTH, 2, LENGTH
131 C Harmless delay slot instruction
135 C Main
loop, with aligned writes
137 C
FIXME: Could check if SRC is aligned
, and
138 C use
32-bit reads
in that case.
141 ARCFOUR_BYTE
(I1
, I2
, TMP
)
146 ARCFOUR_BYTE
(I2
, I1
, TMP
)
152 ARCFOUR_BYTE
(I1
, I2
, TMP
)
158 ARCFOUR_BYTE
(I2
, I1
, TMP
)
167 andcc
LENGTH, 3, LENGTH
172 C DST address must be
2-aligned
178 ARCFOUR_BYTE
(I1
, I2
, TMP
)
184 ARCFOUR_BYTE
(I2
, I1
, TMP
)
195 ARCFOUR_BYTE
(I2
, I1
, TMP
)
203 stuh I2
, [CTX
+ ARCFOUR_I
]
209 EPILOGUE
(nettle_arcfour_crypt
)
211 C Some stats from adriana.lysator.liu.se
(SS1000E
, 85 MHz
), for AES
128
213 C
1: nettle
-1.13 C
-code
214 C
2: First working version of the assembler code
215 C
3: Moved load of source
byte
216 C
4: Better instruction scheduling
217 C
5: Special case SRC
and DST with compatible alignment
218 C
6: After bugfix
(reorder of ld
[CTX
+SI+SJ
] and st [CTX
+ SI])
219 C
7: Unrolled only twice
, with
byte-accesses
220 C
8: Unrolled
, using
8-bit reads
and aligned
32-bit writes.
222 C MB
/s cycles
/byte Code
size (bytes
)