1 C
-*- mode: asm
; asm-comment-char: ?C; -*-
2 C nettle
, low-level cryptographics library
4 C Copyright
(C
) 2002, 2005 Niels Möller
6 C The nettle library is free software
; you can redistribute it and/or modify
7 C it under the terms of the GNU Lesser General
Public License as published by
8 C the Free Software Foundation
; either version 2.1 of the License, or (at your
9 C option
) any later version.
11 C The nettle library is distributed
in the hope that it will be useful
, but
12 C WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
13 C
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
Public
14 C License for more details.
16 C You should have received a copy of the GNU Lesser General
Public License
17 C along with the nettle library
; see the file COPYING.LIB. If not, write to
18 C the Free Software Foundation
, Inc.
, 51 Franklin Street
, Fifth Floor
, Boston
,
21 include_src
(<sparc32
/aes.m4
>)
26 define
(<LENGTH>,<%i2
>)
30 C AES state
, two copies for unrolling
42 C
%o0
-%03 are used for
loop invariants T0
-T3
44 define
(<ROUND
>, <%o5
>)
46 C
%g1
, %g2
, %g3 are TMP1
, TMP2
and TMP3
48 C I
'm still slightly confused by the frame layout, specified in
49 C "SYSTEM V APPLICATION BINARY INTERFACE SPARC Processor Supplement".
50 C However, Sun's cc generates a
104 byte stack frame for a function
51 C with no
local variables
, so that should be good enough for us too.
53 C The sparc32 stack frame looks like
55 C
%fp
- 4: OS
-dependent link field
56 C
%fp
- 8: OS
-dependent link field
57 C
%fp
- 104: OS register save area
58 define
(<FRAME_SIZE
>, 104)
60 .file
"aes-encrypt-internal.asm"
62 C _aes_encrypt
(struct aes_context
*ctx
,
63 C const
struct aes_table
*T
,
64 C unsigned
length, uint8_t
*dst
,
71 PROLOGUE
(_nettle_aes_encrypt
)
73 save
%sp, -FRAME_SIZE
, %sp
84 C Read src
, and add initial subkey
85 add CTX
, AES_KEYS
, KEY
86 AES_LOAD
(0, SRC
, KEY
, W0
)
87 AES_LOAD
(1, SRC
, KEY
, W1
)
88 AES_LOAD
(2, SRC
, KEY
, W2
)
89 AES_LOAD
(3, SRC
, KEY
, W3
)
91 C Must be even
, and includes the final round
92 ld
[AES_NROUNDS
+ CTX
], ROUND
97 C Last two rounds handled specially
100 C The AES_ROUND
macro uses T0
,... T3
102 AES_ROUND
(0, W0
, W1
, W2
, W3
, KEY
, X0
)
103 AES_ROUND
(1, W1
, W2
, W3
, W0
, KEY
, X1
)
104 AES_ROUND
(2, W2
, W3
, W0
, W1
, KEY
, X2
)
105 AES_ROUND
(3, W3
, W0
, W1
, W2
, KEY
, X3
)
108 AES_ROUND
(4, X0
, X1
, X2
, X3
, KEY
, W0
)
109 AES_ROUND
(5, X1
, X2
, X3
, X0
, KEY
, W1
)
110 AES_ROUND
(6, X2
, X3
, X0
, X1
, KEY
, W2
)
111 AES_ROUND
(7, X3
, X0
, X1
, X2
, KEY
, W3
)
113 subcc ROUND
, 1, ROUND
118 AES_ROUND
(0, W0
, W1
, W2
, W3
, KEY
, X0
)
119 AES_ROUND
(1, W1
, W2
, W3
, W0
, KEY
, X1
)
120 AES_ROUND
(2, W2
, W3
, W0
, W1
, KEY
, X2
)
121 AES_ROUND
(3, W3
, W0
, W1
, W2
, KEY
, X3
)
125 AES_FINAL_ROUND
(0, T
, X0
, X1
, X2
, X3
, KEY
, DST
)
126 AES_FINAL_ROUND
(1, T
, X1
, X2
, X3
, X0
, KEY
, DST
)
127 AES_FINAL_ROUND
(2, T
, X2
, X3
, X0
, X1
, KEY
, DST
)
128 AES_FINAL_ROUND
(3, T
, X3
, X0
, X1
, X2
, KEY
, DST
)
130 subcc
LENGTH, 16, LENGTH
137 EPILOGUE
(_nettle_aes_encrypt
)
139 C Some stats from adriana.lysator.liu.se
(SS1000$
, 85 MHz
), for AES
128
141 C
1: nettle
-1.13 C
-code
142 C
2: nettle
-1.13 assembler
144 C
4: New assembler
, first correct version
145 C
5: New assembler
, with basic scheduling of AES_ROUND.
146 C
6: New assembpler
, with
loop invariants T0
-T3.
147 C
7: New assembler
, with basic scheduling also of AES_FINAL_ROUND.
149 C MB
/s cycles
/block Code
size (bytes
)