1 C nettle
, low-level cryptographics library
3 C Copyright
(C
) 2013 Niels Möller
5 C The nettle library is free software
; you can redistribute it and/or modify
6 C it under the terms of the GNU Lesser General
Public License as published by
7 C the Free Software Foundation
; either version 2.1 of the License, or (at your
8 C option
) any later version.
10 C The nettle library is distributed
in the hope that it will be useful
, but
11 C WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
12 C
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
Public
13 C License for more details.
15 C You should have received a copy of the GNU Lesser General
Public License
16 C along with the nettle library
; see the file COPYING.LIB. If not, write to
17 C the Free Software Foundation
, Inc.
, 51 Franklin Street
, Fifth Floor
, Boston
,
20 .file
"sha3-permute.asm"
68 C
ROL(DST
, SRC
, COUNT
)
69 C Must have SRC
!= DST
71 vshr.u64
$1, $2, #eval
(64-$3)
74 C sha3_permute
(struct sha3_ctx
*ctx
)
79 .quad
0x0000000000000001
80 .quad
0x0000000000008082
81 .quad
0x800000000000808A
82 .quad
0x8000000080008000
83 .quad
0x000000000000808B
84 .quad
0x0000000080000001
85 .quad
0x8000000080008081
86 .quad
0x8000000000008009
87 .quad
0x000000000000008A
88 .quad
0x0000000000000088
89 .quad
0x0000000080008009
90 .quad
0x000000008000000A
91 .quad
0x000000008000808B
92 .quad
0x800000000000008B
93 .quad
0x8000000000008089
94 .quad
0x8000000000008003
95 .quad
0x8000000000008002
96 .quad
0x8000000000000080
97 .quad
0x000000000000800A
98 .quad
0x800000008000000A
99 .quad
0x8000000080008081
100 .quad
0x8000000000008080
101 .quad
0x0000000080000001
102 .quad
0x8000000080008008
104 PROLOGUE
(nettle_sha3_permute
)
108 vldm CTX
!, {A1,A2,A3,A4}
110 vldm CTX
!, {A6,A7,A8,A9}
111 vld1.64
{A10}, [CTX
]!
112 vldm CTX
!, {A11,A12,A13,A14}
113 vld1.64
{A15}, [CTX
]!
114 vldm CTX
!, {A16,A17,A18,A19}
115 vld1.64
{A20}, [CTX
]!
116 vldm CTX
, {A21,A22,A23,A24}
124 veor QREG
(T0
), QREG
(A5
), QREG
(A15
)
127 veor QREG
(C1
), QREG
(A1
), QREG
(A6
)
128 veor QREG
(C1
), QREG
(C1
), QREG
(A11
)
129 veor QREG
(C1
), QREG
(C1
), QREG
(A16
)
130 veor QREG
(C1
), QREG
(C1
), QREG
(A21
)
132 veor QREG
(C3
), QREG
(A3
), QREG
(A8
)
133 veor QREG
(C3
), QREG
(C3
), QREG
(A13
)
134 veor QREG
(C3
), QREG
(C3
), QREG
(A18
)
135 veor QREG
(C3
), QREG
(C3
), QREG
(A23
)
137 C D0
= C4 ^
(C1
<<< 1)
138 C
NOTE: Using
ROL macro (and vsli
) is slightly slower.
145 veor QREG
(A5
), QREG
(A5
), QREG
(T0
)
146 veor QREG
(A15
), QREG
(A15
), QREG
(T0
)
148 C D1
= C0 ^
(C2
<<< 1)
149 C D2
= C1 ^
(C3
<<< 1)
154 veor QREG
(A1
), QREG
(A1
), QREG
(T0
)
155 veor QREG
(A6
), QREG
(A6
), QREG
(T0
)
156 veor QREG
(A11
), QREG
(A11
), QREG
(T0
)
157 veor QREG
(A16
), QREG
(A16
), QREG
(T0
)
158 veor QREG
(A21
), QREG
(A21
), QREG
(T0
)
160 C D3
= C2 ^
(C4
<<< 1)
161 C D4
= C3 ^
(C0
<<< 1)
166 veor QREG
(A3
), QREG
(A3
), QREG
(T0
)
167 veor QREG
(A8
), QREG
(A8
), QREG
(T0
)
168 veor QREG
(A13
), QREG
(A13
), QREG
(T0
)
169 veor QREG
(A18
), QREG
(A18
), QREG
(T0
)
170 veor QREG
(A23
), QREG
(A23
), QREG
(T0
)
196 C New A10 value left
in T0
205 vld1.64
{C0}, [RC
:64]!
206 veor QREG
(A1
), QREG
(A1
), QREG
(C1
)
207 veor QREG
(A3
), QREG
(A3
), QREG
(C3
)
217 veor QREG
(A6
), QREG
(A6
), QREG
(C1
)
218 veor QREG
(A8
), QREG
(A8
), QREG
(C3
)
227 veor QREG
(A11
), QREG
(A11
), QREG
(C1
)
228 veor QREG
(A13
), QREG
(A13
), QREG
(C3
)
237 veor QREG
(A16
), QREG
(A16
), QREG
(C1
)
238 veor QREG
(A18
), QREG
(A18
), QREG
(C3
)
246 subs COUNT
, COUNT
, #
1
248 veor QREG
(A21
), QREG
(A21
), QREG
(C1
)
249 veor QREG
(A23
), QREG
(A23
), QREG
(C3
)
254 vstm CTX
!, {A1,A2,A3,A4}
256 vstm CTX
!, {A6,A7,A8,A9}
257 vst1.64
{A10}, [CTX
]!
258 vstm CTX
!, {A11,A12,A13,A14}
259 vst1.64
{A15}, [CTX
]!
260 vstm CTX
!, {A16,A17,A18,A19}
261 vst1.64
{A20}, [CTX
]!
262 vstm CTX
, {A21,A22,A23,A24}
266 EPILOGUE
(nettle_sha3_permute
)