Merge commit 'dfc115332c94a2f62058ac7f2bce7631fbd20b3d'
[unleashed/tickless.git] / lib / libcrypto / whrlpool / wp_block.c
blob1e00a013304eb2aa04b38792461ae1ac75a6d9c0
1 /* $OpenBSD: wp_block.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */
2 /**
3 * The Whirlpool hashing function.
5 * <P>
6 * <b>References</b>
8 * <P>
9 * The Whirlpool algorithm was developed by
10 * <a href="mailto:pbarreto@scopus.com.br">Paulo S. L. M. Barreto</a> and
11 * <a href="mailto:vincent.rijmen@cryptomathic.com">Vincent Rijmen</a>.
13 * See
14 * P.S.L.M. Barreto, V. Rijmen,
15 * ``The Whirlpool hashing function,''
16 * NESSIE submission, 2000 (tweaked version, 2001),
17 * <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
19 * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
20 * Vincent Rijmen. Lookup "reference implementations" on
21 * <http://planeta.terra.com.br/informatica/paulobarreto/>
23 * =============================================================================
25 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
33 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
34 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
35 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include <string.h>
40 #include <openssl/crypto.h>
41 #include <machine/endian.h>
43 #include "wp_locl.h"
45 typedef unsigned char u8;
46 #if defined(_LP64)
47 typedef unsigned long u64;
48 #else
49 typedef unsigned long long u64;
50 #endif
52 #define ROUNDS 10
54 #undef SMALL_REGISTER_BANK
55 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
56 # define SMALL_REGISTER_BANK
57 # if defined(WHIRLPOOL_ASM)
58 # ifndef OPENSSL_SMALL_FOOTPRINT
59 # define OPENSSL_SMALL_FOOTPRINT /* it appears that for elder non-MMX
60 CPUs this is actually faster! */
61 # endif
62 #include "x86_arch.h"
63 # define GO_FOR_MMX(ctx,inp,num) \
64 do { \
65 void whirlpool_block_mmx(void *,const void *,size_t); \
66 if ((OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) == 0) \
67 break; \
68 whirlpool_block_mmx(ctx->H.c,inp,num); \
69 return; \
70 } while (0)
71 # endif
72 #elif defined(__arm__)
73 # define SMALL_REGISTER_BANK
74 #endif
76 #undef ROTATE
77 #if defined(__GNUC__) && __GNUC__>=2
78 # if defined(__x86_64) || defined(__x86_64__)
79 # define ROTATE(a,n) ({ u64 ret; asm ("rolq %1,%0" \
80 : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
81 # endif
82 #endif
84 #if defined(OPENSSL_SMALL_FOOTPRINT)
85 # if !defined(ROTATE)
86 # if BYTE_ORDER == LITTLE_ENDIAN /* little-endians have to rotate left */
87 # define ROTATE(i,n) ((i)<<(n) ^ (i)>>(64-n))
88 # else /* big-endians have to rotate right */
89 # define ROTATE(i,n) ((i)>>(n) ^ (i)<<(64-n))
90 # endif
91 # endif
92 # if defined(ROTATE) && !defined(__STRICT_ALIGNMENT)
93 # define __STRICT_ALIGNMENT /* ensure smallest table size */
94 # endif
95 #endif
98 * Table size depends on __STRICT_ALIGNMENT and whether or not endian-
99 * specific ROTATE macro is defined. If __STRICT_ALIGNMENT is not
100 * defined, which is normally the case on x86[_64] CPUs, the table is
101 * 4KB large unconditionally. Otherwise if ROTATE is defined, the
102 * table is 2KB large, and otherwise - 16KB. 2KB table requires a
103 * whole bunch of additional rotations, but I'm willing to "trade,"
104 * because 16KB table certainly trashes L1 cache. I wish all CPUs
105 * could handle unaligned load as 4KB table doesn't trash the cache,
106 * nor does it require additional rotations.
109 * Note that every Cn macro expands as two loads: one byte load and
110 * one quadword load. One can argue that that many single-byte loads
111 * is too excessive, as one could load a quadword and "milk" it for
112 * eight 8-bit values instead. Well, yes, but in order to do so *and*
113 * avoid excessive loads you have to accommodate a handful of 64-bit
114 * values in the register bank and issue a bunch of shifts and mask.
115 * It's a tradeoff: loads vs. shift and mask in big register bank[!].
116 * On most CPUs eight single-byte loads are faster and I let other
117 * ones to depend on smart compiler to fold byte loads if beneficial.
118 * Hand-coded assembler would be another alternative:-)
120 #ifdef __STRICT_ALIGNMENT
121 # if defined(ROTATE)
122 # define N 1
123 # define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7
124 # define C0(K,i) (Cx.q[K.c[(i)*8+0]])
125 # define C1(K,i) ROTATE(Cx.q[K.c[(i)*8+1]],8)
126 # define C2(K,i) ROTATE(Cx.q[K.c[(i)*8+2]],16)
127 # define C3(K,i) ROTATE(Cx.q[K.c[(i)*8+3]],24)
128 # define C4(K,i) ROTATE(Cx.q[K.c[(i)*8+4]],32)
129 # define C5(K,i) ROTATE(Cx.q[K.c[(i)*8+5]],40)
130 # define C6(K,i) ROTATE(Cx.q[K.c[(i)*8+6]],48)
131 # define C7(K,i) ROTATE(Cx.q[K.c[(i)*8+7]],56)
132 # else
133 # define N 8
134 # define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \
135 c7,c0,c1,c2,c3,c4,c5,c6, \
136 c6,c7,c0,c1,c2,c3,c4,c5, \
137 c5,c6,c7,c0,c1,c2,c3,c4, \
138 c4,c5,c6,c7,c0,c1,c2,c3, \
139 c3,c4,c5,c6,c7,c0,c1,c2, \
140 c2,c3,c4,c5,c6,c7,c0,c1, \
141 c1,c2,c3,c4,c5,c6,c7,c0
142 # define C0(K,i) (Cx.q[0+8*K.c[(i)*8+0]])
143 # define C1(K,i) (Cx.q[1+8*K.c[(i)*8+1]])
144 # define C2(K,i) (Cx.q[2+8*K.c[(i)*8+2]])
145 # define C3(K,i) (Cx.q[3+8*K.c[(i)*8+3]])
146 # define C4(K,i) (Cx.q[4+8*K.c[(i)*8+4]])
147 # define C5(K,i) (Cx.q[5+8*K.c[(i)*8+5]])
148 # define C6(K,i) (Cx.q[6+8*K.c[(i)*8+6]])
149 # define C7(K,i) (Cx.q[7+8*K.c[(i)*8+7]])
150 # endif
151 #else
152 # define N 2
153 # define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \
154 c0,c1,c2,c3,c4,c5,c6,c7
155 # define C0(K,i) (((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
156 # define C1(K,i) (((u64*)(Cx.c+7))[2*K.c[(i)*8+1]])
157 # define C2(K,i) (((u64*)(Cx.c+6))[2*K.c[(i)*8+2]])
158 # define C3(K,i) (((u64*)(Cx.c+5))[2*K.c[(i)*8+3]])
159 # define C4(K,i) (((u64*)(Cx.c+4))[2*K.c[(i)*8+4]])
160 # define C5(K,i) (((u64*)(Cx.c+3))[2*K.c[(i)*8+5]])
161 # define C6(K,i) (((u64*)(Cx.c+2))[2*K.c[(i)*8+6]])
162 # define C7(K,i) (((u64*)(Cx.c+1))[2*K.c[(i)*8+7]])
163 #endif
165 static const
166 union {
167 u8 c[(256*N+ROUNDS)*sizeof(u64)];
168 u64 q[(256*N+ROUNDS)];
169 } Cx = { {
170 /* Note endian-neutral representation:-) */
171 LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8),
172 LL(0x23,0x23,0x8c,0x23,0x05,0xaf,0x46,0x26),
173 LL(0xc6,0xc6,0x3f,0xc6,0x7e,0xf9,0x91,0xb8),
174 LL(0xe8,0xe8,0x87,0xe8,0x13,0x6f,0xcd,0xfb),
175 LL(0x87,0x87,0x26,0x87,0x4c,0xa1,0x13,0xcb),
176 LL(0xb8,0xb8,0xda,0xb8,0xa9,0x62,0x6d,0x11),
177 LL(0x01,0x01,0x04,0x01,0x08,0x05,0x02,0x09),
178 LL(0x4f,0x4f,0x21,0x4f,0x42,0x6e,0x9e,0x0d),
179 LL(0x36,0x36,0xd8,0x36,0xad,0xee,0x6c,0x9b),
180 LL(0xa6,0xa6,0xa2,0xa6,0x59,0x04,0x51,0xff),
181 LL(0xd2,0xd2,0x6f,0xd2,0xde,0xbd,0xb9,0x0c),
182 LL(0xf5,0xf5,0xf3,0xf5,0xfb,0x06,0xf7,0x0e),
183 LL(0x79,0x79,0xf9,0x79,0xef,0x80,0xf2,0x96),
184 LL(0x6f,0x6f,0xa1,0x6f,0x5f,0xce,0xde,0x30),
185 LL(0x91,0x91,0x7e,0x91,0xfc,0xef,0x3f,0x6d),
186 LL(0x52,0x52,0x55,0x52,0xaa,0x07,0xa4,0xf8),
187 LL(0x60,0x60,0x9d,0x60,0x27,0xfd,0xc0,0x47),
188 LL(0xbc,0xbc,0xca,0xbc,0x89,0x76,0x65,0x35),
189 LL(0x9b,0x9b,0x56,0x9b,0xac,0xcd,0x2b,0x37),
190 LL(0x8e,0x8e,0x02,0x8e,0x04,0x8c,0x01,0x8a),
191 LL(0xa3,0xa3,0xb6,0xa3,0x71,0x15,0x5b,0xd2),
192 LL(0x0c,0x0c,0x30,0x0c,0x60,0x3c,0x18,0x6c),
193 LL(0x7b,0x7b,0xf1,0x7b,0xff,0x8a,0xf6,0x84),
194 LL(0x35,0x35,0xd4,0x35,0xb5,0xe1,0x6a,0x80),
195 LL(0x1d,0x1d,0x74,0x1d,0xe8,0x69,0x3a,0xf5),
196 LL(0xe0,0xe0,0xa7,0xe0,0x53,0x47,0xdd,0xb3),
197 LL(0xd7,0xd7,0x7b,0xd7,0xf6,0xac,0xb3,0x21),
198 LL(0xc2,0xc2,0x2f,0xc2,0x5e,0xed,0x99,0x9c),
199 LL(0x2e,0x2e,0xb8,0x2e,0x6d,0x96,0x5c,0x43),
200 LL(0x4b,0x4b,0x31,0x4b,0x62,0x7a,0x96,0x29),
201 LL(0xfe,0xfe,0xdf,0xfe,0xa3,0x21,0xe1,0x5d),
202 LL(0x57,0x57,0x41,0x57,0x82,0x16,0xae,0xd5),
203 LL(0x15,0x15,0x54,0x15,0xa8,0x41,0x2a,0xbd),
204 LL(0x77,0x77,0xc1,0x77,0x9f,0xb6,0xee,0xe8),
205 LL(0x37,0x37,0xdc,0x37,0xa5,0xeb,0x6e,0x92),
206 LL(0xe5,0xe5,0xb3,0xe5,0x7b,0x56,0xd7,0x9e),
207 LL(0x9f,0x9f,0x46,0x9f,0x8c,0xd9,0x23,0x13),
208 LL(0xf0,0xf0,0xe7,0xf0,0xd3,0x17,0xfd,0x23),
209 LL(0x4a,0x4a,0x35,0x4a,0x6a,0x7f,0x94,0x20),
210 LL(0xda,0xda,0x4f,0xda,0x9e,0x95,0xa9,0x44),
211 LL(0x58,0x58,0x7d,0x58,0xfa,0x25,0xb0,0xa2),
212 LL(0xc9,0xc9,0x03,0xc9,0x06,0xca,0x8f,0xcf),
213 LL(0x29,0x29,0xa4,0x29,0x55,0x8d,0x52,0x7c),
214 LL(0x0a,0x0a,0x28,0x0a,0x50,0x22,0x14,0x5a),
215 LL(0xb1,0xb1,0xfe,0xb1,0xe1,0x4f,0x7f,0x50),
216 LL(0xa0,0xa0,0xba,0xa0,0x69,0x1a,0x5d,0xc9),
217 LL(0x6b,0x6b,0xb1,0x6b,0x7f,0xda,0xd6,0x14),
218 LL(0x85,0x85,0x2e,0x85,0x5c,0xab,0x17,0xd9),
219 LL(0xbd,0xbd,0xce,0xbd,0x81,0x73,0x67,0x3c),
220 LL(0x5d,0x5d,0x69,0x5d,0xd2,0x34,0xba,0x8f),
221 LL(0x10,0x10,0x40,0x10,0x80,0x50,0x20,0x90),
222 LL(0xf4,0xf4,0xf7,0xf4,0xf3,0x03,0xf5,0x07),
223 LL(0xcb,0xcb,0x0b,0xcb,0x16,0xc0,0x8b,0xdd),
224 LL(0x3e,0x3e,0xf8,0x3e,0xed,0xc6,0x7c,0xd3),
225 LL(0x05,0x05,0x14,0x05,0x28,0x11,0x0a,0x2d),
226 LL(0x67,0x67,0x81,0x67,0x1f,0xe6,0xce,0x78),
227 LL(0xe4,0xe4,0xb7,0xe4,0x73,0x53,0xd5,0x97),
228 LL(0x27,0x27,0x9c,0x27,0x25,0xbb,0x4e,0x02),
229 LL(0x41,0x41,0x19,0x41,0x32,0x58,0x82,0x73),
230 LL(0x8b,0x8b,0x16,0x8b,0x2c,0x9d,0x0b,0xa7),
231 LL(0xa7,0xa7,0xa6,0xa7,0x51,0x01,0x53,0xf6),
232 LL(0x7d,0x7d,0xe9,0x7d,0xcf,0x94,0xfa,0xb2),
233 LL(0x95,0x95,0x6e,0x95,0xdc,0xfb,0x37,0x49),
234 LL(0xd8,0xd8,0x47,0xd8,0x8e,0x9f,0xad,0x56),
235 LL(0xfb,0xfb,0xcb,0xfb,0x8b,0x30,0xeb,0x70),
236 LL(0xee,0xee,0x9f,0xee,0x23,0x71,0xc1,0xcd),
237 LL(0x7c,0x7c,0xed,0x7c,0xc7,0x91,0xf8,0xbb),
238 LL(0x66,0x66,0x85,0x66,0x17,0xe3,0xcc,0x71),
239 LL(0xdd,0xdd,0x53,0xdd,0xa6,0x8e,0xa7,0x7b),
240 LL(0x17,0x17,0x5c,0x17,0xb8,0x4b,0x2e,0xaf),
241 LL(0x47,0x47,0x01,0x47,0x02,0x46,0x8e,0x45),
242 LL(0x9e,0x9e,0x42,0x9e,0x84,0xdc,0x21,0x1a),
243 LL(0xca,0xca,0x0f,0xca,0x1e,0xc5,0x89,0xd4),
244 LL(0x2d,0x2d,0xb4,0x2d,0x75,0x99,0x5a,0x58),
245 LL(0xbf,0xbf,0xc6,0xbf,0x91,0x79,0x63,0x2e),
246 LL(0x07,0x07,0x1c,0x07,0x38,0x1b,0x0e,0x3f),
247 LL(0xad,0xad,0x8e,0xad,0x01,0x23,0x47,0xac),
248 LL(0x5a,0x5a,0x75,0x5a,0xea,0x2f,0xb4,0xb0),
249 LL(0x83,0x83,0x36,0x83,0x6c,0xb5,0x1b,0xef),
250 LL(0x33,0x33,0xcc,0x33,0x85,0xff,0x66,0xb6),
251 LL(0x63,0x63,0x91,0x63,0x3f,0xf2,0xc6,0x5c),
252 LL(0x02,0x02,0x08,0x02,0x10,0x0a,0x04,0x12),
253 LL(0xaa,0xaa,0x92,0xaa,0x39,0x38,0x49,0x93),
254 LL(0x71,0x71,0xd9,0x71,0xaf,0xa8,0xe2,0xde),
255 LL(0xc8,0xc8,0x07,0xc8,0x0e,0xcf,0x8d,0xc6),
256 LL(0x19,0x19,0x64,0x19,0xc8,0x7d,0x32,0xd1),
257 LL(0x49,0x49,0x39,0x49,0x72,0x70,0x92,0x3b),
258 LL(0xd9,0xd9,0x43,0xd9,0x86,0x9a,0xaf,0x5f),
259 LL(0xf2,0xf2,0xef,0xf2,0xc3,0x1d,0xf9,0x31),
260 LL(0xe3,0xe3,0xab,0xe3,0x4b,0x48,0xdb,0xa8),
261 LL(0x5b,0x5b,0x71,0x5b,0xe2,0x2a,0xb6,0xb9),
262 LL(0x88,0x88,0x1a,0x88,0x34,0x92,0x0d,0xbc),
263 LL(0x9a,0x9a,0x52,0x9a,0xa4,0xc8,0x29,0x3e),
264 LL(0x26,0x26,0x98,0x26,0x2d,0xbe,0x4c,0x0b),
265 LL(0x32,0x32,0xc8,0x32,0x8d,0xfa,0x64,0xbf),
266 LL(0xb0,0xb0,0xfa,0xb0,0xe9,0x4a,0x7d,0x59),
267 LL(0xe9,0xe9,0x83,0xe9,0x1b,0x6a,0xcf,0xf2),
268 LL(0x0f,0x0f,0x3c,0x0f,0x78,0x33,0x1e,0x77),
269 LL(0xd5,0xd5,0x73,0xd5,0xe6,0xa6,0xb7,0x33),
270 LL(0x80,0x80,0x3a,0x80,0x74,0xba,0x1d,0xf4),
271 LL(0xbe,0xbe,0xc2,0xbe,0x99,0x7c,0x61,0x27),
272 LL(0xcd,0xcd,0x13,0xcd,0x26,0xde,0x87,0xeb),
273 LL(0x34,0x34,0xd0,0x34,0xbd,0xe4,0x68,0x89),
274 LL(0x48,0x48,0x3d,0x48,0x7a,0x75,0x90,0x32),
275 LL(0xff,0xff,0xdb,0xff,0xab,0x24,0xe3,0x54),
276 LL(0x7a,0x7a,0xf5,0x7a,0xf7,0x8f,0xf4,0x8d),
277 LL(0x90,0x90,0x7a,0x90,0xf4,0xea,0x3d,0x64),
278 LL(0x5f,0x5f,0x61,0x5f,0xc2,0x3e,0xbe,0x9d),
279 LL(0x20,0x20,0x80,0x20,0x1d,0xa0,0x40,0x3d),
280 LL(0x68,0x68,0xbd,0x68,0x67,0xd5,0xd0,0x0f),
281 LL(0x1a,0x1a,0x68,0x1a,0xd0,0x72,0x34,0xca),
282 LL(0xae,0xae,0x82,0xae,0x19,0x2c,0x41,0xb7),
283 LL(0xb4,0xb4,0xea,0xb4,0xc9,0x5e,0x75,0x7d),
284 LL(0x54,0x54,0x4d,0x54,0x9a,0x19,0xa8,0xce),
285 LL(0x93,0x93,0x76,0x93,0xec,0xe5,0x3b,0x7f),
286 LL(0x22,0x22,0x88,0x22,0x0d,0xaa,0x44,0x2f),
287 LL(0x64,0x64,0x8d,0x64,0x07,0xe9,0xc8,0x63),
288 LL(0xf1,0xf1,0xe3,0xf1,0xdb,0x12,0xff,0x2a),
289 LL(0x73,0x73,0xd1,0x73,0xbf,0xa2,0xe6,0xcc),
290 LL(0x12,0x12,0x48,0x12,0x90,0x5a,0x24,0x82),
291 LL(0x40,0x40,0x1d,0x40,0x3a,0x5d,0x80,0x7a),
292 LL(0x08,0x08,0x20,0x08,0x40,0x28,0x10,0x48),
293 LL(0xc3,0xc3,0x2b,0xc3,0x56,0xe8,0x9b,0x95),
294 LL(0xec,0xec,0x97,0xec,0x33,0x7b,0xc5,0xdf),
295 LL(0xdb,0xdb,0x4b,0xdb,0x96,0x90,0xab,0x4d),
296 LL(0xa1,0xa1,0xbe,0xa1,0x61,0x1f,0x5f,0xc0),
297 LL(0x8d,0x8d,0x0e,0x8d,0x1c,0x83,0x07,0x91),
298 LL(0x3d,0x3d,0xf4,0x3d,0xf5,0xc9,0x7a,0xc8),
299 LL(0x97,0x97,0x66,0x97,0xcc,0xf1,0x33,0x5b),
300 LL(0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00),
301 LL(0xcf,0xcf,0x1b,0xcf,0x36,0xd4,0x83,0xf9),
302 LL(0x2b,0x2b,0xac,0x2b,0x45,0x87,0x56,0x6e),
303 LL(0x76,0x76,0xc5,0x76,0x97,0xb3,0xec,0xe1),
304 LL(0x82,0x82,0x32,0x82,0x64,0xb0,0x19,0xe6),
305 LL(0xd6,0xd6,0x7f,0xd6,0xfe,0xa9,0xb1,0x28),
306 LL(0x1b,0x1b,0x6c,0x1b,0xd8,0x77,0x36,0xc3),
307 LL(0xb5,0xb5,0xee,0xb5,0xc1,0x5b,0x77,0x74),
308 LL(0xaf,0xaf,0x86,0xaf,0x11,0x29,0x43,0xbe),
309 LL(0x6a,0x6a,0xb5,0x6a,0x77,0xdf,0xd4,0x1d),
310 LL(0x50,0x50,0x5d,0x50,0xba,0x0d,0xa0,0xea),
311 LL(0x45,0x45,0x09,0x45,0x12,0x4c,0x8a,0x57),
312 LL(0xf3,0xf3,0xeb,0xf3,0xcb,0x18,0xfb,0x38),
313 LL(0x30,0x30,0xc0,0x30,0x9d,0xf0,0x60,0xad),
314 LL(0xef,0xef,0x9b,0xef,0x2b,0x74,0xc3,0xc4),
315 LL(0x3f,0x3f,0xfc,0x3f,0xe5,0xc3,0x7e,0xda),
316 LL(0x55,0x55,0x49,0x55,0x92,0x1c,0xaa,0xc7),
317 LL(0xa2,0xa2,0xb2,0xa2,0x79,0x10,0x59,0xdb),
318 LL(0xea,0xea,0x8f,0xea,0x03,0x65,0xc9,0xe9),
319 LL(0x65,0x65,0x89,0x65,0x0f,0xec,0xca,0x6a),
320 LL(0xba,0xba,0xd2,0xba,0xb9,0x68,0x69,0x03),
321 LL(0x2f,0x2f,0xbc,0x2f,0x65,0x93,0x5e,0x4a),
322 LL(0xc0,0xc0,0x27,0xc0,0x4e,0xe7,0x9d,0x8e),
323 LL(0xde,0xde,0x5f,0xde,0xbe,0x81,0xa1,0x60),
324 LL(0x1c,0x1c,0x70,0x1c,0xe0,0x6c,0x38,0xfc),
325 LL(0xfd,0xfd,0xd3,0xfd,0xbb,0x2e,0xe7,0x46),
326 LL(0x4d,0x4d,0x29,0x4d,0x52,0x64,0x9a,0x1f),
327 LL(0x92,0x92,0x72,0x92,0xe4,0xe0,0x39,0x76),
328 LL(0x75,0x75,0xc9,0x75,0x8f,0xbc,0xea,0xfa),
329 LL(0x06,0x06,0x18,0x06,0x30,0x1e,0x0c,0x36),
330 LL(0x8a,0x8a,0x12,0x8a,0x24,0x98,0x09,0xae),
331 LL(0xb2,0xb2,0xf2,0xb2,0xf9,0x40,0x79,0x4b),
332 LL(0xe6,0xe6,0xbf,0xe6,0x63,0x59,0xd1,0x85),
333 LL(0x0e,0x0e,0x38,0x0e,0x70,0x36,0x1c,0x7e),
334 LL(0x1f,0x1f,0x7c,0x1f,0xf8,0x63,0x3e,0xe7),
335 LL(0x62,0x62,0x95,0x62,0x37,0xf7,0xc4,0x55),
336 LL(0xd4,0xd4,0x77,0xd4,0xee,0xa3,0xb5,0x3a),
337 LL(0xa8,0xa8,0x9a,0xa8,0x29,0x32,0x4d,0x81),
338 LL(0x96,0x96,0x62,0x96,0xc4,0xf4,0x31,0x52),
339 LL(0xf9,0xf9,0xc3,0xf9,0x9b,0x3a,0xef,0x62),
340 LL(0xc5,0xc5,0x33,0xc5,0x66,0xf6,0x97,0xa3),
341 LL(0x25,0x25,0x94,0x25,0x35,0xb1,0x4a,0x10),
342 LL(0x59,0x59,0x79,0x59,0xf2,0x20,0xb2,0xab),
343 LL(0x84,0x84,0x2a,0x84,0x54,0xae,0x15,0xd0),
344 LL(0x72,0x72,0xd5,0x72,0xb7,0xa7,0xe4,0xc5),
345 LL(0x39,0x39,0xe4,0x39,0xd5,0xdd,0x72,0xec),
346 LL(0x4c,0x4c,0x2d,0x4c,0x5a,0x61,0x98,0x16),
347 LL(0x5e,0x5e,0x65,0x5e,0xca,0x3b,0xbc,0x94),
348 LL(0x78,0x78,0xfd,0x78,0xe7,0x85,0xf0,0x9f),
349 LL(0x38,0x38,0xe0,0x38,0xdd,0xd8,0x70,0xe5),
350 LL(0x8c,0x8c,0x0a,0x8c,0x14,0x86,0x05,0x98),
351 LL(0xd1,0xd1,0x63,0xd1,0xc6,0xb2,0xbf,0x17),
352 LL(0xa5,0xa5,0xae,0xa5,0x41,0x0b,0x57,0xe4),
353 LL(0xe2,0xe2,0xaf,0xe2,0x43,0x4d,0xd9,0xa1),
354 LL(0x61,0x61,0x99,0x61,0x2f,0xf8,0xc2,0x4e),
355 LL(0xb3,0xb3,0xf6,0xb3,0xf1,0x45,0x7b,0x42),
356 LL(0x21,0x21,0x84,0x21,0x15,0xa5,0x42,0x34),
357 LL(0x9c,0x9c,0x4a,0x9c,0x94,0xd6,0x25,0x08),
358 LL(0x1e,0x1e,0x78,0x1e,0xf0,0x66,0x3c,0xee),
359 LL(0x43,0x43,0x11,0x43,0x22,0x52,0x86,0x61),
360 LL(0xc7,0xc7,0x3b,0xc7,0x76,0xfc,0x93,0xb1),
361 LL(0xfc,0xfc,0xd7,0xfc,0xb3,0x2b,0xe5,0x4f),
362 LL(0x04,0x04,0x10,0x04,0x20,0x14,0x08,0x24),
363 LL(0x51,0x51,0x59,0x51,0xb2,0x08,0xa2,0xe3),
364 LL(0x99,0x99,0x5e,0x99,0xbc,0xc7,0x2f,0x25),
365 LL(0x6d,0x6d,0xa9,0x6d,0x4f,0xc4,0xda,0x22),
366 LL(0x0d,0x0d,0x34,0x0d,0x68,0x39,0x1a,0x65),
367 LL(0xfa,0xfa,0xcf,0xfa,0x83,0x35,0xe9,0x79),
368 LL(0xdf,0xdf,0x5b,0xdf,0xb6,0x84,0xa3,0x69),
369 LL(0x7e,0x7e,0xe5,0x7e,0xd7,0x9b,0xfc,0xa9),
370 LL(0x24,0x24,0x90,0x24,0x3d,0xb4,0x48,0x19),
371 LL(0x3b,0x3b,0xec,0x3b,0xc5,0xd7,0x76,0xfe),
372 LL(0xab,0xab,0x96,0xab,0x31,0x3d,0x4b,0x9a),
373 LL(0xce,0xce,0x1f,0xce,0x3e,0xd1,0x81,0xf0),
374 LL(0x11,0x11,0x44,0x11,0x88,0x55,0x22,0x99),
375 LL(0x8f,0x8f,0x06,0x8f,0x0c,0x89,0x03,0x83),
376 LL(0x4e,0x4e,0x25,0x4e,0x4a,0x6b,0x9c,0x04),
377 LL(0xb7,0xb7,0xe6,0xb7,0xd1,0x51,0x73,0x66),
378 LL(0xeb,0xeb,0x8b,0xeb,0x0b,0x60,0xcb,0xe0),
379 LL(0x3c,0x3c,0xf0,0x3c,0xfd,0xcc,0x78,0xc1),
380 LL(0x81,0x81,0x3e,0x81,0x7c,0xbf,0x1f,0xfd),
381 LL(0x94,0x94,0x6a,0x94,0xd4,0xfe,0x35,0x40),
382 LL(0xf7,0xf7,0xfb,0xf7,0xeb,0x0c,0xf3,0x1c),
383 LL(0xb9,0xb9,0xde,0xb9,0xa1,0x67,0x6f,0x18),
384 LL(0x13,0x13,0x4c,0x13,0x98,0x5f,0x26,0x8b),
385 LL(0x2c,0x2c,0xb0,0x2c,0x7d,0x9c,0x58,0x51),
386 LL(0xd3,0xd3,0x6b,0xd3,0xd6,0xb8,0xbb,0x05),
387 LL(0xe7,0xe7,0xbb,0xe7,0x6b,0x5c,0xd3,0x8c),
388 LL(0x6e,0x6e,0xa5,0x6e,0x57,0xcb,0xdc,0x39),
389 LL(0xc4,0xc4,0x37,0xc4,0x6e,0xf3,0x95,0xaa),
390 LL(0x03,0x03,0x0c,0x03,0x18,0x0f,0x06,0x1b),
391 LL(0x56,0x56,0x45,0x56,0x8a,0x13,0xac,0xdc),
392 LL(0x44,0x44,0x0d,0x44,0x1a,0x49,0x88,0x5e),
393 LL(0x7f,0x7f,0xe1,0x7f,0xdf,0x9e,0xfe,0xa0),
394 LL(0xa9,0xa9,0x9e,0xa9,0x21,0x37,0x4f,0x88),
395 LL(0x2a,0x2a,0xa8,0x2a,0x4d,0x82,0x54,0x67),
396 LL(0xbb,0xbb,0xd6,0xbb,0xb1,0x6d,0x6b,0x0a),
397 LL(0xc1,0xc1,0x23,0xc1,0x46,0xe2,0x9f,0x87),
398 LL(0x53,0x53,0x51,0x53,0xa2,0x02,0xa6,0xf1),
399 LL(0xdc,0xdc,0x57,0xdc,0xae,0x8b,0xa5,0x72),
400 LL(0x0b,0x0b,0x2c,0x0b,0x58,0x27,0x16,0x53),
401 LL(0x9d,0x9d,0x4e,0x9d,0x9c,0xd3,0x27,0x01),
402 LL(0x6c,0x6c,0xad,0x6c,0x47,0xc1,0xd8,0x2b),
403 LL(0x31,0x31,0xc4,0x31,0x95,0xf5,0x62,0xa4),
404 LL(0x74,0x74,0xcd,0x74,0x87,0xb9,0xe8,0xf3),
405 LL(0xf6,0xf6,0xff,0xf6,0xe3,0x09,0xf1,0x15),
406 LL(0x46,0x46,0x05,0x46,0x0a,0x43,0x8c,0x4c),
407 LL(0xac,0xac,0x8a,0xac,0x09,0x26,0x45,0xa5),
408 LL(0x89,0x89,0x1e,0x89,0x3c,0x97,0x0f,0xb5),
409 LL(0x14,0x14,0x50,0x14,0xa0,0x44,0x28,0xb4),
410 LL(0xe1,0xe1,0xa3,0xe1,0x5b,0x42,0xdf,0xba),
411 LL(0x16,0x16,0x58,0x16,0xb0,0x4e,0x2c,0xa6),
412 LL(0x3a,0x3a,0xe8,0x3a,0xcd,0xd2,0x74,0xf7),
413 LL(0x69,0x69,0xb9,0x69,0x6f,0xd0,0xd2,0x06),
414 LL(0x09,0x09,0x24,0x09,0x48,0x2d,0x12,0x41),
415 LL(0x70,0x70,0xdd,0x70,0xa7,0xad,0xe0,0xd7),
416 LL(0xb6,0xb6,0xe2,0xb6,0xd9,0x54,0x71,0x6f),
417 LL(0xd0,0xd0,0x67,0xd0,0xce,0xb7,0xbd,0x1e),
418 LL(0xed,0xed,0x93,0xed,0x3b,0x7e,0xc7,0xd6),
419 LL(0xcc,0xcc,0x17,0xcc,0x2e,0xdb,0x85,0xe2),
420 LL(0x42,0x42,0x15,0x42,0x2a,0x57,0x84,0x68),
421 LL(0x98,0x98,0x5a,0x98,0xb4,0xc2,0x2d,0x2c),
422 LL(0xa4,0xa4,0xaa,0xa4,0x49,0x0e,0x55,0xed),
423 LL(0x28,0x28,0xa0,0x28,0x5d,0x88,0x50,0x75),
424 LL(0x5c,0x5c,0x6d,0x5c,0xda,0x31,0xb8,0x86),
425 LL(0xf8,0xf8,0xc7,0xf8,0x93,0x3f,0xed,0x6b),
426 LL(0x86,0x86,0x22,0x86,0x44,0xa4,0x11,0xc2),
427 #define RC (&(Cx.q[256*N]))
428 0x18,0x23,0xc6,0xe8,0x87,0xb8,0x01,0x4f, /* rc[ROUNDS] */
429 0x36,0xa6,0xd2,0xf5,0x79,0x6f,0x91,0x52,
430 0x60,0xbc,0x9b,0x8e,0xa3,0x0c,0x7b,0x35,
431 0x1d,0xe0,0xd7,0xc2,0x2e,0x4b,0xfe,0x57,
432 0x15,0x77,0x37,0xe5,0x9f,0xf0,0x4a,0xda,
433 0x58,0xc9,0x29,0x0a,0xb1,0xa0,0x6b,0x85,
434 0xbd,0x5d,0x10,0xf4,0xcb,0x3e,0x05,0x67,
435 0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8,
436 0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e,
437 0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33
441 void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n)
443 int r;
444 const u8 *p=inp;
445 union { u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q;
447 #ifdef GO_FOR_MMX
448 GO_FOR_MMX(ctx,inp,n);
449 #endif
450 do {
451 #ifdef OPENSSL_SMALL_FOOTPRINT
452 u64 L[8];
453 int i;
455 for (i=0;i<64;i++) S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
456 for (r=0;r<ROUNDS;r++)
458 for (i=0;i<8;i++)
460 L[i] = i ? 0 : RC[r];
461 L[i] ^= C0(K,i) ^ C1(K,(i-1)&7) ^
462 C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^
463 C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^
464 C6(K,(i-6)&7) ^ C7(K,(i-7)&7);
466 memcpy (K.q,L,64);
467 for (i=0;i<8;i++)
469 L[i] ^= C0(S,i) ^ C1(S,(i-1)&7) ^
470 C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^
471 C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^
472 C6(S,(i-6)&7) ^ C7(S,(i-7)&7);
474 memcpy (S.q,L,64);
476 for (i=0;i<64;i++) H->c[i] ^= S.c[i] ^ p[i];
477 #else
478 u64 L0,L1,L2,L3,L4,L5,L6,L7;
480 #ifdef __STRICT_ALIGNMENT
481 if ((size_t)p & 7)
483 memcpy (S.c,p,64);
484 S.q[0] ^= (K.q[0] = H->q[0]);
485 S.q[1] ^= (K.q[1] = H->q[1]);
486 S.q[2] ^= (K.q[2] = H->q[2]);
487 S.q[3] ^= (K.q[3] = H->q[3]);
488 S.q[4] ^= (K.q[4] = H->q[4]);
489 S.q[5] ^= (K.q[5] = H->q[5]);
490 S.q[6] ^= (K.q[6] = H->q[6]);
491 S.q[7] ^= (K.q[7] = H->q[7]);
493 else
494 #endif
496 const u64 *pa = (const u64*)p;
497 S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
498 S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
499 S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
500 S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
501 S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
502 S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
503 S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
504 S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
507 for(r=0;r<ROUNDS;r++)
509 #ifdef SMALL_REGISTER_BANK
510 L0 = C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^
511 C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r];
512 L1 = C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^
513 C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2);
514 L2 = C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^
515 C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3);
516 L3 = C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^
517 C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4);
518 L4 = C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^
519 C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5);
520 L5 = C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^
521 C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6);
522 L6 = C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^
523 C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7);
524 L7 = C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^
525 C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0);
527 K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
528 K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
530 L0 ^= C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^
531 C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1);
532 L1 ^= C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^
533 C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2);
534 L2 ^= C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^
535 C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3);
536 L3 ^= C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^
537 C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4);
538 L4 ^= C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^
539 C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5);
540 L5 ^= C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^
541 C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6);
542 L6 ^= C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^
543 C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7);
544 L7 ^= C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^
545 C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0);
547 S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
548 S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
549 #else
550 L0 = C0(K,0); L1 = C1(K,0); L2 = C2(K,0); L3 = C3(K,0);
551 L4 = C4(K,0); L5 = C5(K,0); L6 = C6(K,0); L7 = C7(K,0);
552 L0 ^= RC[r];
554 L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1);
555 L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1);
557 L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2);
558 L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2);
560 L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3);
561 L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3);
563 L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4);
564 L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4);
566 L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5);
567 L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5);
569 L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6);
570 L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6);
572 L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7);
573 L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7);
575 K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
576 K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
578 L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0);
579 L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0);
581 L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1);
582 L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1);
584 L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2);
585 L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2);
587 L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3);
588 L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3);
590 L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4);
591 L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4);
593 L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5);
594 L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5);
596 L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6);
597 L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6);
599 L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7);
600 L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7);
602 S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
603 S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
604 #endif
607 #ifdef __STRICT_ALIGNMENT
608 if ((size_t)p & 7)
610 int i;
611 for(i=0;i<64;i++) H->c[i] ^= S.c[i] ^ p[i];
613 else
614 #endif
616 const u64 *pa=(const u64 *)p;
617 H->q[0] ^= S.q[0] ^ pa[0];
618 H->q[1] ^= S.q[1] ^ pa[1];
619 H->q[2] ^= S.q[2] ^ pa[2];
620 H->q[3] ^= S.q[3] ^ pa[3];
621 H->q[4] ^= S.q[4] ^ pa[4];
622 H->q[5] ^= S.q[5] ^ pa[5];
623 H->q[6] ^= S.q[6] ^ pa[6];
624 H->q[7] ^= S.q[7] ^ pa[7];
626 #endif
627 p += 64;
628 } while(--n);