Correct PPTP server firewall rules chain.
[tomato/davidwu.git] / release / src / router / nettle / serpent-encrypt.c
blob2c77f12da592efdb985ddb8691b26989e7547450
1 /* serpent-encrypt.c
3 * The serpent block cipher.
5 * For more details on this algorithm, see the Serpent website at
6 * http://www.cl.cam.ac.uk/~rja14/serpent.html
7 */
9 /* nettle, low-level cryptographics library
11 * Copyright (C) 2011 Niels Möller
12 * Copyright (C) 2010, 2011 Simon Josefsson
13 * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
15 * The nettle library is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU Lesser General Public License as published by
17 * the Free Software Foundation; either version 2.1 of the License, or (at your
18 * option) any later version.
20 * The nettle library is distributed in the hope that it will be useful, but
21 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
23 * License for more details.
25 * You should have received a copy of the GNU Lesser General Public License
26 * along with the nettle library; see the file COPYING.LIB. If not, write to
27 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
28 * MA 02111-1301, USA.
31 /* This file is derived from cipher/serpent.c in Libgcrypt v1.4.6.
32 The adaption to Nettle was made by Simon Josefsson on 2010-12-07
33 with final touches on 2011-05-30. Changes include replacing
34 libgcrypt with nettle in the license template, renaming
35 serpent_context to serpent_ctx, renaming u32 to uint32_t, removing
36 libgcrypt stubs and selftests, modifying entry function prototypes,
37 using FOR_BLOCKS to iterate through data in encrypt/decrypt, using
38 LE_READ_UINT32 and LE_WRITE_UINT32 to access data in
39 encrypt/decrypt, and running indent on the code. */
41 #if HAVE_CONFIG_H
42 #include "config.h"
43 #endif
45 #include <assert.h>
46 #include <limits.h>
48 #include "serpent.h"
50 #include "macros.h"
51 #include "serpent-internal.h"
53 /* These are the S-Boxes of Serpent. They are copied from Serpents
54 reference implementation (the optimized one, contained in
55 `floppy2') and are therefore:
57 Copyright (C) 1998 Ross Anderson, Eli Biham, Lars Knudsen.
59 To quote the Serpent homepage
60 (http://www.cl.cam.ac.uk/~rja14/serpent.html):
62 "Serpent is now completely in the public domain, and we impose no
63 restrictions on its use. This was announced on the 21st August at
64 the First AES Candidate Conference. The optimised implementations
65 in the submission package are now under the GNU PUBLIC LICENSE
66 (GPL), although some comments in the code still say otherwise. You
67 are welcome to use Serpent for any application." */
69 /* S0: 3 8 15 1 10 6 5 11 14 13 4 2 7 0 9 12 */
70 /* Could easily let y0, y1 overlap with x0, x1, and possibly also x2 and y2 */
71 #define SBOX0(x0, x1, x2, x3, y0, y1, y2, y3) \
72 do { \
73 y3 = x1 ^ x2; \
74 y0 = x0 | x3; \
75 y1 = x0 ^ x1; \
76 y3 ^= y0; \
77 y2 = x2 | y3; \
78 x0 ^= x3; \
79 y2 &= x3; \
80 x3 ^= x2; \
81 x2 |= x1; \
82 y0 = y1 & x2; \
83 y2 ^= y0; \
84 y0 &= y2; \
85 y0 ^= x2; \
86 x1 &= x0; \
87 y0 ^= x0; \
88 y0 = ~ y0; \
89 y1 = y0 ^ x1; \
90 y1 ^= x3; \
91 } while (0)
93 /* FIXME: Arrange for some overlap between inputs and outputs? */
94 /* S1: 15 12 2 7 9 0 5 10 1 11 14 8 6 13 3 4 */
95 /* Original single-assignment form:
97 t01 = x0 | x3;
98 t02 = x2 ^ x3;
99 t03 = ~ x1;
100 t04 = x0 ^ x2;
101 t05 = x0 | t03;
102 t06 = x3 & t04;
103 t07 = t01 & t02;
104 t08 = x1 | t06;
105 y2 = t02 ^ t05;
106 t10 = t07 ^ t08;
107 t11 = t01 ^ t10;
108 t12 = y2 ^ t11;
109 t13 = x1 & x3;
110 y3 = ~ t10;
111 y1 = t13 ^ t12;
112 t16 = t10 | y1;
113 t17 = t05 & t16;
114 y0 = x2 ^ t17;
116 #define SBOX1(x0, x1, x2, x3, y0, y1, y2, y3) \
117 do { \
118 y1 = x0 | x3; \
119 y2 = x2 ^ x3; \
120 y0 = ~ x1; \
121 y3 = x0 ^ x2; \
122 y0 |= x0; \
123 y3 &= x3; \
124 x0 = y1 & y2; \
125 y3 |= x1; \
126 y2 ^= y0; \
127 y3 ^= x0; \
128 x0 = y1 ^ y3; \
129 x0 ^= y2; \
130 y1 = x1 & x3; \
131 y1 ^= x0; \
132 x3 = y1 | y3; \
133 y3 = ~ y3; \
134 y0 &= x3; \
135 y0 ^= x2; \
136 } while (0)
138 /* FIXME: Arrange for some overlap between inputs and outputs? */
139 /* S2: 8 6 7 9 3 12 10 15 13 1 14 4 0 11 5 2 */
140 #define SBOX2(x0, x1, x2, x3, y0, y1, y2, y3) \
141 do { \
142 y2 = x0 | x2; \
143 y1 = x0 ^ x1; \
144 y3 = x3 ^ y2; \
145 y0 = y1 ^ y3; \
146 x3 |= x0; \
147 x2 ^= y0; \
148 x0 = x1 ^ x2; \
149 x2 |= x1; \
150 x0 &= y2; \
151 y3 ^= x2; \
152 y1 |= y3; \
153 y1 ^= x0; \
154 y2 = y3 ^ y1; \
155 y2 ^= x1; \
156 y3 = ~ y3; \
157 y2 ^= x3; \
158 } while (0)
160 /* S3: 0 15 11 8 12 9 6 3 13 1 2 4 10 7 5 14 */
161 /* Original single-assignment form:
163 t01 = x0 ^ x2;
164 t02 = x0 | x3;
165 t03 = x0 & x3;
166 t04 = t01 & t02;
167 t05 = x1 | t03;
168 t06 = x0 & x1;
169 t07 = x3 ^ t04;
170 t08 = x2 | t06;
171 t09 = x1 ^ t07;
172 t10 = x3 & t05;
173 t11 = t02 ^ t10;
174 y3 = t08 ^ t09;
175 t13 = x3 | y3;
176 t14 = x0 | t07;
177 t15 = x1 & t13;
178 y2 = t08 ^ t11;
179 y0 = t14 ^ t15;
180 y1 = t05 ^ t04;
182 #define SBOX3(x0, x1, x2, x3, y0, y1, y2, y3) \
183 do { \
184 y1 = x0 ^ x2; \
185 y0 = x0 | x3; \
186 y3 = x0 & x3; \
187 y1 &= y0; \
188 y3 |= x1; \
189 y2 = x0 & x1; \
190 y2 |= x2; \
191 x2 = x3 ^ y1; \
192 y1 ^= y3; \
193 x0 |= x2; \
194 x2 ^= x1; \
195 y3 &= x3; \
196 y0 ^= y3; \
197 y3 = y2 ^ x2; \
198 y2 ^= y0; \
199 x3 |= y3; \
200 x1 &= x3; \
201 y0 = x0 ^ x1; \
202 } while (0)
205 /* S4: 1 15 8 3 12 0 11 6 2 5 4 10 9 14 7 13 */
206 /* Original single-assignment form:
207 t01 = x0 | x1;
208 t02 = x1 | x2;
209 t03 = x0 ^ t02;
210 t04 = x1 ^ x3;
211 t05 = x3 | t03;
212 t06 = x3 & t01;
213 y3 = t03 ^ t06;
214 t08 = y3 & t04;
215 t09 = t04 & t05;
216 t10 = x2 ^ t06;
217 t11 = x1 & x2;
218 t12 = t04 ^ t08;
219 t13 = t11 | t03;
220 t14 = t10 ^ t09;
221 t15 = x0 & t05;
222 t16 = t11 | t12;
223 y2 = t13 ^ t08;
224 y1 = t15 ^ t16;
225 y0 = ~ t14;
227 #define SBOX4(x0, x1, x2, x3, y0, y1, y2, y3) \
228 do { \
229 y3 = x0 | x1; \
230 y2 = x1 | x2; \
231 y2 ^= x0; \
232 y3 &= x3; \
233 y0 = x1 ^ x3; \
234 x3 |= y2; \
235 x0 &= x3; \
236 x1 &= x2; \
237 x2 ^= y3; \
238 y3 ^= y2; \
239 y2 |= x1; \
240 y1 = y3 & y0; \
241 y2 ^= y1; \
242 y1 ^= y0; \
243 y1 |= x1; \
244 y1 ^= x0; \
245 y0 &= x3; \
246 y0 ^= x2; \
247 y0 = ~y0; \
248 } while (0)
250 /* S5: 15 5 2 11 4 10 9 12 0 3 14 8 13 6 7 1 */
251 /* Original single-assignment form:
252 t01 = x1 ^ x3;
253 t02 = x1 | x3;
254 t03 = x0 & t01;
255 t04 = x2 ^ t02;
256 t05 = t03 ^ t04;
257 y0 = ~ t05;
258 t07 = x0 ^ t01;
259 t08 = x3 | y0;
260 t09 = x1 | t05;
261 t10 = x3 ^ t08;
262 t11 = x1 | t07;
263 t12 = t03 | y0;
264 t13 = t07 | t10;
265 t14 = t01 ^ t11;
266 y2 = t09 ^ t13;
267 y1 = t07 ^ t08;
268 y3 = t12 ^ t14;
270 #define SBOX5(x0, x1, x2, x3, y0, y1, y2, y3) \
271 do { \
272 y0 = x1 | x3; \
273 y0 ^= x2; \
274 x2 = x1 ^ x3; \
275 y2 = x0 ^ x2; \
276 x0 &= x2; \
277 y0 ^= x0; \
278 y3 = x1 | y2; \
279 x1 |= y0; \
280 y0 = ~y0; \
281 x0 |= y0; \
282 y3 ^= x2; \
283 y3 ^= x0; \
284 y1 = x3 | y0; \
285 x3 ^= y1; \
286 y1 ^= y2; \
287 y2 |= x3; \
288 y2 ^= x1; \
289 } while (0)
291 /* S6: 7 2 12 5 8 4 6 11 14 9 1 15 13 3 10 0 */
292 /* Original single-assignment form:
293 t01 = x0 & x3;
294 t02 = x1 ^ x2;
295 t03 = x0 ^ x3;
296 t04 = t01 ^ t02;
297 t05 = x1 | x2;
298 y1 = ~ t04;
299 t07 = t03 & t05;
300 t08 = x1 & y1;
301 t09 = x0 | x2;
302 t10 = t07 ^ t08;
303 t11 = x1 | x3;
304 t12 = x2 ^ t11;
305 t13 = t09 ^ t10;
306 y2 = ~ t13;
307 t15 = y1 & t03;
308 y3 = t12 ^ t07;
309 t17 = x0 ^ x1;
310 t18 = y2 ^ t15;
311 y0 = t17 ^ t18;
313 #define SBOX6(x0, x1, x2, x3, y0, y1, y2, y3) \
314 do { \
315 y0 = x0 ^ x3; \
316 y1 = x0 & x3; \
317 y2 = x0 | x2; \
318 x3 |= x1; \
319 x3 ^= x2; \
320 x0 ^= x1; \
321 y3 = x1 | x2; \
322 x2 ^= x1; \
323 y3 &= y0; \
324 y1 ^= x2; \
325 y1 = ~y1; \
326 y0 &= y1; \
327 x1 &= y1; \
328 x1 ^= y3; \
329 y3 ^= x3; \
330 y2 ^= x1; \
331 y2 = ~y2; \
332 y0 ^= y2; \
333 y0 ^= x0; \
334 } while (0)
336 /* S7: 1 13 15 0 14 8 2 11 7 4 12 10 9 3 5 6 */
337 /* Original single-assignment form:
338 t01 = x0 & x2;
339 t02 = ~ x3;
340 t03 = x0 & t02;
341 t04 = x1 | t01;
342 t05 = x0 & x1;
343 t06 = x2 ^ t04;
344 y3 = t03 ^ t06;
345 t08 = x2 | y3;
346 t09 = x3 | t05;
347 t10 = x0 ^ t08;
348 t11 = t04 & y3;
349 y1 = t09 ^ t10;
350 t13 = x1 ^ y1;
351 t14 = t01 ^ y1;
352 t15 = x2 ^ t05;
353 t16 = t11 | t13;
354 t17 = t02 | t14;
355 y0 = t15 ^ t17;
356 y2 = x0 ^ t16;
358 /* It appears impossible to do this with only 8 registers. We
359 recompute t02, and t04 (if we have spare registers, hopefully the
360 compiler can recognize them as common subexpressions). */
361 #define SBOX7(x0, x1, x2, x3, y0, y1, y2, y3) \
362 do { \
363 y0 = x0 & x2; \
364 y3 = x1 | y0; /* t04 */ \
365 y3 ^= x2; \
366 y1 = ~x3; /* t02 */ \
367 y1 &= x0; \
368 y3 ^= y1; \
369 y1 = x2 | y3; \
370 y1 ^= x0; \
371 y2 = x0 & x1; \
372 x2 ^= y2; \
373 y2 |= x3; \
374 y1 ^= y2; \
375 y2 = x1 | y0; /* t04 */ \
376 y2 &= y3; \
377 x1 ^= y1; \
378 y2 |= x1; \
379 y2 ^= x0; \
380 y0 ^= y1; \
381 x3 = ~x3; /* t02 */ \
382 y0 |= x3; \
383 y0 ^= x2; \
384 } while (0)
386 /* In-place linear transformation. */
387 #define LINEAR_TRANSFORMATION(x0,x1,x2,x3) \
388 do { \
389 x0 = ROTL32 (13, x0); \
390 x2 = ROTL32 (3, x2); \
391 x1 = x1 ^ x0 ^ x2; \
392 x3 = x3 ^ x2 ^ (x0 << 3); \
393 x1 = ROTL32 (1, x1); \
394 x3 = ROTL32 (7, x3); \
395 x0 = x0 ^ x1 ^ x3; \
396 x2 = x2 ^ x3 ^ (x1 << 7); \
397 x0 = ROTL32 (5, x0); \
398 x2 = ROTL32 (22, x2); \
399 } while (0)
401 /* Round inputs are x0,x1,x2,x3 (destroyed), and round outputs are
402 y0,y1,y2,y3. */
403 #define ROUND(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
404 do { \
405 KEYXOR(x0,x1,x2,x3, subkey); \
406 SBOX##which(x0,x1,x2,x3, y0,y1,y2,y3); \
407 LINEAR_TRANSFORMATION(y0,y1,y2,y3); \
408 } while (0)
410 #if HAVE_NATIVE_64_BIT
412 #define LINEAR_TRANSFORMATION64(x0,x1,x2,x3) \
413 do { \
414 x0 = DROTL32 (13, x0); \
415 x2 = DROTL32 (3, x2); \
416 x1 = x1 ^ x0 ^ x2; \
417 x3 = x3 ^ x2 ^ DRSHIFT32(3, x0); \
418 x1 = DROTL32 (1, x1); \
419 x3 = DROTL32 (7, x3); \
420 x0 = x0 ^ x1 ^ x3; \
421 x2 = x2 ^ x3 ^ DRSHIFT32(7, x1); \
422 x0 = DROTL32 (5, x0); \
423 x2 = DROTL32 (22, x2); \
424 } while (0)
426 #define ROUND64(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
427 do { \
428 KEYXOR64(x0,x1,x2,x3, subkey); \
429 SBOX##which(x0,x1,x2,x3, y0,y1,y2,y3); \
430 LINEAR_TRANSFORMATION64(y0,y1,y2,y3); \
431 } while (0)
433 #endif /* HAVE_NATIVE_64_BIT */
435 void
436 serpent_encrypt (const struct serpent_ctx *ctx,
437 unsigned length, uint8_t * dst, const uint8_t * src)
439 assert( !(length % SERPENT_BLOCK_SIZE));
441 #if HAVE_NATIVE_64_BIT
442 if (length & SERPENT_BLOCK_SIZE)
443 #else
444 while (length >= SERPENT_BLOCK_SIZE)
445 #endif
447 uint32_t x0,x1,x2,x3, y0,y1,y2,y3;
448 unsigned k;
450 x0 = LE_READ_UINT32 (src);
451 x1 = LE_READ_UINT32 (src + 4);
452 x2 = LE_READ_UINT32 (src + 8);
453 x3 = LE_READ_UINT32 (src + 12);
455 for (k = 0; ; k += 8)
457 ROUND (0, ctx->keys[k+0], x0,x1,x2,x3, y0,y1,y2,y3);
458 ROUND (1, ctx->keys[k+1], y0,y1,y2,y3, x0,x1,x2,x3);
459 ROUND (2, ctx->keys[k+2], x0,x1,x2,x3, y0,y1,y2,y3);
460 ROUND (3, ctx->keys[k+3], y0,y1,y2,y3, x0,x1,x2,x3);
461 ROUND (4, ctx->keys[k+4], x0,x1,x2,x3, y0,y1,y2,y3);
462 ROUND (5, ctx->keys[k+5], y0,y1,y2,y3, x0,x1,x2,x3);
463 ROUND (6, ctx->keys[k+6], x0,x1,x2,x3, y0,y1,y2,y3);
464 if (k == 24)
465 break;
466 ROUND (7, ctx->keys[k+7], y0,y1,y2,y3, x0,x1,x2,x3);
469 /* Special final round, using two subkeys. */
470 KEYXOR (y0,y1,y2,y3, ctx->keys[31]);
471 SBOX7 (y0,y1,y2,y3, x0,x1,x2,x3);
472 KEYXOR (x0,x1,x2,x3, ctx->keys[32]);
474 LE_WRITE_UINT32 (dst, x0);
475 LE_WRITE_UINT32 (dst + 4, x1);
476 LE_WRITE_UINT32 (dst + 8, x2);
477 LE_WRITE_UINT32 (dst + 12, x3);
479 src += SERPENT_BLOCK_SIZE;
480 dst += SERPENT_BLOCK_SIZE;
481 length -= SERPENT_BLOCK_SIZE;
483 #if HAVE_NATIVE_64_BIT
484 FOR_BLOCKS(length, dst, src, 2*SERPENT_BLOCK_SIZE)
486 uint64_t x0,x1,x2,x3, y0,y1,y2,y3;
487 unsigned k;
489 x0 = LE_READ_UINT32 (src);
490 x1 = LE_READ_UINT32 (src + 4);
491 x2 = LE_READ_UINT32 (src + 8);
492 x3 = LE_READ_UINT32 (src + 12);
494 x0 <<= 32; x0 |= LE_READ_UINT32 (src + 16);
495 x1 <<= 32; x1 |= LE_READ_UINT32 (src + 20);
496 x2 <<= 32; x2 |= LE_READ_UINT32 (src + 24);
497 x3 <<= 32; x3 |= LE_READ_UINT32 (src + 28);
499 for (k = 0; ; k += 8)
501 ROUND64 (0, ctx->keys[k+0], x0,x1,x2,x3, y0,y1,y2,y3);
502 ROUND64 (1, ctx->keys[k+1], y0,y1,y2,y3, x0,x1,x2,x3);
503 ROUND64 (2, ctx->keys[k+2], x0,x1,x2,x3, y0,y1,y2,y3);
504 ROUND64 (3, ctx->keys[k+3], y0,y1,y2,y3, x0,x1,x2,x3);
505 ROUND64 (4, ctx->keys[k+4], x0,x1,x2,x3, y0,y1,y2,y3);
506 ROUND64 (5, ctx->keys[k+5], y0,y1,y2,y3, x0,x1,x2,x3);
507 ROUND64 (6, ctx->keys[k+6], x0,x1,x2,x3, y0,y1,y2,y3);
508 if (k == 24)
509 break;
510 ROUND64 (7, ctx->keys[k+7], y0,y1,y2,y3, x0,x1,x2,x3);
513 /* Special final round, using two subkeys. */
514 KEYXOR64 (y0,y1,y2,y3, ctx->keys[31]);
515 SBOX7 (y0,y1,y2,y3, x0,x1,x2,x3);
516 KEYXOR64 (x0,x1,x2,x3, ctx->keys[32]);
518 LE_WRITE_UINT32 (dst + 16, x0);
519 LE_WRITE_UINT32 (dst + 20, x1);
520 LE_WRITE_UINT32 (dst + 24, x2);
521 LE_WRITE_UINT32 (dst + 28, x3);
522 x0 >>= 32; LE_WRITE_UINT32 (dst, x0);
523 x1 >>= 32; LE_WRITE_UINT32 (dst + 4, x1);
524 x2 >>= 32; LE_WRITE_UINT32 (dst + 8, x2);
525 x3 >>= 32; LE_WRITE_UINT32 (dst + 12, x3);
527 #endif /* HAVE_NATIVE_64_BIT */