3 * The twofish block cipher.
6 /* twofish - An implementation of the twofish cipher.
7 * Copyright (C) 1999 Ruud de Rooij <ruud@debian.org>
9 * Modifications for lsh, integrated testing
10 * Copyright (C) 1999 J.H.M. Dassen (Ray) <jdassen@wi.LeidenUniv.nl>
12 * Integrated with the nettle library,
13 * Copyright (C) 2001 Niels Möller
16 /* nettle, low-level cryptographics library
18 * The nettle library is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU Lesser General Public License as published by
20 * the Free Software Foundation; either version 2.1 of the License, or (at your
21 * option) any later version.
23 * The nettle Library is distributed in the hope that it will be useful, but
24 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
26 * License for more details.
28 * You should have received a copy of the GNU Lesser General Public License
29 * along with the nettle library; see the file COPYING.LIB. If not, write to
30 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
45 /* Bitwise rotations on 32-bit words. These are defined as macros that
46 * evaluate their argument twice, so do not apply to any expressions with
50 #define rol1(x) (((x) << 1) | (((x) & 0x80000000) >> 31))
51 #define rol8(x) (((x) << 8) | (((x) & 0xFF000000) >> 24))
52 #define rol9(x) (((x) << 9) | (((x) & 0xFF800000) >> 23))
53 #define ror1(x) (((x) >> 1) | (((x) & 0x00000001) << 31))
55 /* ------------------------------------------------------------------------- */
57 /* The permutations q0 and q1. These are fixed permutations on 8-bit values.
58 * The permutations have been computed using the program twofish-data,
59 * which is distributed along with this file.
62 static const uint8_t q0
[256] = {
63 0xA9,0x67,0xB3,0xE8,0x04,0xFD,0xA3,0x76,
64 0x9A,0x92,0x80,0x78,0xE4,0xDD,0xD1,0x38,
65 0x0D,0xC6,0x35,0x98,0x18,0xF7,0xEC,0x6C,
66 0x43,0x75,0x37,0x26,0xFA,0x13,0x94,0x48,
67 0xF2,0xD0,0x8B,0x30,0x84,0x54,0xDF,0x23,
68 0x19,0x5B,0x3D,0x59,0xF3,0xAE,0xA2,0x82,
69 0x63,0x01,0x83,0x2E,0xD9,0x51,0x9B,0x7C,
70 0xA6,0xEB,0xA5,0xBE,0x16,0x0C,0xE3,0x61,
71 0xC0,0x8C,0x3A,0xF5,0x73,0x2C,0x25,0x0B,
72 0xBB,0x4E,0x89,0x6B,0x53,0x6A,0xB4,0xF1,
73 0xE1,0xE6,0xBD,0x45,0xE2,0xF4,0xB6,0x66,
74 0xCC,0x95,0x03,0x56,0xD4,0x1C,0x1E,0xD7,
75 0xFB,0xC3,0x8E,0xB5,0xE9,0xCF,0xBF,0xBA,
76 0xEA,0x77,0x39,0xAF,0x33,0xC9,0x62,0x71,
77 0x81,0x79,0x09,0xAD,0x24,0xCD,0xF9,0xD8,
78 0xE5,0xC5,0xB9,0x4D,0x44,0x08,0x86,0xE7,
79 0xA1,0x1D,0xAA,0xED,0x06,0x70,0xB2,0xD2,
80 0x41,0x7B,0xA0,0x11,0x31,0xC2,0x27,0x90,
81 0x20,0xF6,0x60,0xFF,0x96,0x5C,0xB1,0xAB,
82 0x9E,0x9C,0x52,0x1B,0x5F,0x93,0x0A,0xEF,
83 0x91,0x85,0x49,0xEE,0x2D,0x4F,0x8F,0x3B,
84 0x47,0x87,0x6D,0x46,0xD6,0x3E,0x69,0x64,
85 0x2A,0xCE,0xCB,0x2F,0xFC,0x97,0x05,0x7A,
86 0xAC,0x7F,0xD5,0x1A,0x4B,0x0E,0xA7,0x5A,
87 0x28,0x14,0x3F,0x29,0x88,0x3C,0x4C,0x02,
88 0xB8,0xDA,0xB0,0x17,0x55,0x1F,0x8A,0x7D,
89 0x57,0xC7,0x8D,0x74,0xB7,0xC4,0x9F,0x72,
90 0x7E,0x15,0x22,0x12,0x58,0x07,0x99,0x34,
91 0x6E,0x50,0xDE,0x68,0x65,0xBC,0xDB,0xF8,
92 0xC8,0xA8,0x2B,0x40,0xDC,0xFE,0x32,0xA4,
93 0xCA,0x10,0x21,0xF0,0xD3,0x5D,0x0F,0x00,
94 0x6F,0x9D,0x36,0x42,0x4A,0x5E,0xC1,0xE0,
97 static const uint8_t q1
[256] = {
98 0x75,0xF3,0xC6,0xF4,0xDB,0x7B,0xFB,0xC8,
99 0x4A,0xD3,0xE6,0x6B,0x45,0x7D,0xE8,0x4B,
100 0xD6,0x32,0xD8,0xFD,0x37,0x71,0xF1,0xE1,
101 0x30,0x0F,0xF8,0x1B,0x87,0xFA,0x06,0x3F,
102 0x5E,0xBA,0xAE,0x5B,0x8A,0x00,0xBC,0x9D,
103 0x6D,0xC1,0xB1,0x0E,0x80,0x5D,0xD2,0xD5,
104 0xA0,0x84,0x07,0x14,0xB5,0x90,0x2C,0xA3,
105 0xB2,0x73,0x4C,0x54,0x92,0x74,0x36,0x51,
106 0x38,0xB0,0xBD,0x5A,0xFC,0x60,0x62,0x96,
107 0x6C,0x42,0xF7,0x10,0x7C,0x28,0x27,0x8C,
108 0x13,0x95,0x9C,0xC7,0x24,0x46,0x3B,0x70,
109 0xCA,0xE3,0x85,0xCB,0x11,0xD0,0x93,0xB8,
110 0xA6,0x83,0x20,0xFF,0x9F,0x77,0xC3,0xCC,
111 0x03,0x6F,0x08,0xBF,0x40,0xE7,0x2B,0xE2,
112 0x79,0x0C,0xAA,0x82,0x41,0x3A,0xEA,0xB9,
113 0xE4,0x9A,0xA4,0x97,0x7E,0xDA,0x7A,0x17,
114 0x66,0x94,0xA1,0x1D,0x3D,0xF0,0xDE,0xB3,
115 0x0B,0x72,0xA7,0x1C,0xEF,0xD1,0x53,0x3E,
116 0x8F,0x33,0x26,0x5F,0xEC,0x76,0x2A,0x49,
117 0x81,0x88,0xEE,0x21,0xC4,0x1A,0xEB,0xD9,
118 0xC5,0x39,0x99,0xCD,0xAD,0x31,0x8B,0x01,
119 0x18,0x23,0xDD,0x1F,0x4E,0x2D,0xF9,0x48,
120 0x4F,0xF2,0x65,0x8E,0x78,0x5C,0x58,0x19,
121 0x8D,0xE5,0x98,0x57,0x67,0x7F,0x05,0x64,
122 0xAF,0x63,0xB6,0xFE,0xF5,0xB7,0x3C,0xA5,
123 0xCE,0xE9,0x68,0x44,0xE0,0x4D,0x43,0x69,
124 0x29,0x2E,0xAC,0x15,0x59,0xA8,0x0A,0x9E,
125 0x6E,0x47,0xDF,0x34,0x35,0x6A,0xCF,0xDC,
126 0x22,0xC9,0xC0,0x9B,0x89,0xD4,0xED,0xAB,
127 0x12,0xA2,0x0D,0x52,0xBB,0x02,0x2F,0xA9,
128 0xD7,0x61,0x1E,0xB4,0x50,0x04,0xF6,0xC2,
129 0x16,0x25,0x86,0x56,0x55,0x09,0xBE,0x91,
132 /* ------------------------------------------------------------------------- */
134 /* uint8_t gf_multiply(uint8_t p, uint8_t a, uint8_t b)
136 * Multiplication in GF(2^8).
138 * This function multiplies a times b in the Galois Field GF(2^8) with
139 * primitive polynomial p.
140 * The representation of the polynomials a, b, and p uses bits with
141 * values 2^i to represent the terms x^i. The polynomial p contains an
144 * Note that addition and subtraction in GF(2^8) is simply the XOR
149 gf_multiply(uint8_t p
, uint8_t a
, uint8_t b
)
155 if (a
& 1) result
^= shift
;
158 if (shift
& 0x100) shift
^= p
;
163 /* ------------------------------------------------------------------------- */
165 /* The matrix RS as specified in section 4.3 the twofish paper. */
167 static const uint8_t rs_matrix
[4][8] = {
168 { 0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E },
169 { 0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5 },
170 { 0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19 },
171 { 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03 } };
173 /* uint32_t compute_s(uint32_t m1, uint32_t m2);
175 * Computes the value RS * M, where M is a byte vector composed of the
176 * bytes of m1 and m2. Arithmetic is done in GF(2^8) with primitive
177 * polynomial x^8 + x^6 + x^3 + x^2 + 1.
179 * This function is used to compute the sub-keys S which are in turn used
180 * to generate the S-boxes.
184 compute_s(uint32_t m1
, uint32_t m2
)
188 for (i
= 0; i
< 4; i
++)
189 s
|= (( gf_multiply(0x4D, m1
, rs_matrix
[i
][0])
190 ^ gf_multiply(0x4D, m1
>> 8, rs_matrix
[i
][1])
191 ^ gf_multiply(0x4D, m1
>> 16, rs_matrix
[i
][2])
192 ^ gf_multiply(0x4D, m1
>> 24, rs_matrix
[i
][3])
193 ^ gf_multiply(0x4D, m2
, rs_matrix
[i
][4])
194 ^ gf_multiply(0x4D, m2
>> 8, rs_matrix
[i
][5])
195 ^ gf_multiply(0x4D, m2
>> 16, rs_matrix
[i
][6])
196 ^ gf_multiply(0x4D, m2
>> 24, rs_matrix
[i
][7])) << (i
*8));
200 /* ------------------------------------------------------------------------- */
202 /* This table describes which q S-boxes are used for each byte in each stage
203 * of the function h, cf. figure 2 of the twofish paper.
206 static const uint8_t * const q_table
[4][5] =
207 { { q1
, q1
, q0
, q0
, q1
},
208 { q0
, q1
, q1
, q0
, q0
},
209 { q0
, q0
, q0
, q1
, q1
},
210 { q1
, q0
, q1
, q1
, q0
} };
212 /* The matrix MDS as specified in section 4.3.2 of the twofish paper. */
214 static const uint8_t mds_matrix
[4][4] = { { 0x01, 0xEF, 0x5B, 0x5B },
215 { 0x5B, 0xEF, 0xEF, 0x01 },
216 { 0xEF, 0x5B, 0x01, 0xEF },
217 { 0xEF, 0x01, 0xEF, 0x5B } };
219 /* uint32_t h_uint8_t(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3);
221 * Perform the h function (section 4.3.2) on one byte. It consists of
222 * repeated applications of the q permutation, followed by a XOR with
223 * part of a sub-key. Finally, the value is multiplied by one column of
224 * the MDS matrix. To obtain the result for a full word, the results of
225 * h for the individual bytes are XORed.
227 * k is the key size (/ 64 bits), i is the byte number (0 = LSB), x is the
228 * actual byte to apply the function to; l0, l1, l2, and l3 are the
229 * appropriate bytes from the subkey. Note that only l0..l(k-1) are used.
233 h_byte(int k
, int i
, uint8_t x
, uint8_t l0
, uint8_t l1
, uint8_t l2
, uint8_t l3
)
235 uint8_t y
= q_table
[i
][4][l0
^
237 q_table
[i
][2][k
== 2 ? x
: l2
^
238 q_table
[i
][1][k
== 3 ? x
: l3
^ q_table
[i
][0][x
]]]]];
240 return ( ((uint32_t)gf_multiply(0x69, mds_matrix
[0][i
], y
))
241 | ((uint32_t)gf_multiply(0x69, mds_matrix
[1][i
], y
) << 8)
242 | ((uint32_t)gf_multiply(0x69, mds_matrix
[2][i
], y
) << 16)
243 | ((uint32_t)gf_multiply(0x69, mds_matrix
[3][i
], y
) << 24) );
246 /* uint32_t h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3);
248 * Perform the function h on a word. See the description of h_byte() above.
252 h(int k
, uint8_t x
, uint32_t l0
, uint32_t l1
, uint32_t l2
, uint32_t l3
)
254 return ( h_byte(k
, 0, x
, l0
, l1
, l2
, l3
)
255 ^ h_byte(k
, 1, x
, l0
>> 8, l1
>> 8, l2
>> 8, l3
>> 8)
256 ^ h_byte(k
, 2, x
, l0
>> 16, l1
>> 16, l2
>> 16, l3
>> 16)
257 ^ h_byte(k
, 3, x
, l0
>> 24, l1
>> 24, l2
>> 24, l3
>> 24) );
261 /* ------------------------------------------------------------------------- */
265 /* Structure which contains the tables containing the subkeys and the
266 * key-dependent s-boxes.
270 /* Set up internal tables required for twofish encryption and decryption.
272 * The key size is specified in bytes. Key sizes up to 32 bytes are
273 * supported. Larger key sizes are silently truncated.
277 twofish_set_key(struct twofish_ctx
*context
,
278 unsigned keysize
, const uint8_t *key
)
280 uint8_t key_copy
[32];
281 uint32_t m
[8], s
[4], t
;
284 /* Extend key as necessary */
286 assert(keysize
<= 32);
288 /* We do a little more copying than necessary, but that doesn't
290 memset(key_copy
, 0, 32);
291 memcpy(key_copy
, key
, keysize
);
293 for (i
= 0; i
<8; i
++)
294 m
[i
] = LE_READ_UINT32(key_copy
+ i
*4);
298 else if (keysize
<= 24)
303 /* Compute sub-keys */
305 for (i
= 0; i
< 20; i
++)
307 t
= h(k
, 2*i
+1, m
[1], m
[3], m
[5], m
[7]);
309 t
+= (context
->keys
[2*i
] =
310 t
+ h(k
, 2*i
, m
[0], m
[2], m
[4], m
[6]));
312 context
->keys
[2*i
+1] = t
;
315 /* Compute key-dependent S-boxes */
317 for (i
= 0; i
< k
; i
++)
318 s
[k
-1-i
] = compute_s(m
[2*i
], m
[2*i
+1]);
320 for (i
= 0; i
< 4; i
++)
321 for (j
= 0; j
< 256; j
++)
322 context
->s_box
[i
][j
] = h_byte(k
, i
, j
,
329 /* Encrypt blocks of 16 bytes of data with the twofish algorithm.
331 * Before this function can be used, twofish_set_key() must be used in order to
332 * set up various tables required for the encryption algorithm.
334 * This function always encrypts 16 bytes of plaintext to 16 bytes of
335 * ciphertext. The memory areas of the plaintext and the ciphertext can
340 twofish_encrypt(const struct twofish_ctx
*context
,
343 const uint8_t *plaintext
)
345 const uint32_t * keys
= context
->keys
;
346 const uint32_t (*s_box
)[256] = context
->s_box
;
348 assert( !(length
% TWOFISH_BLOCK_SIZE
) );
349 for ( ; length
; length
-= TWOFISH_BLOCK_SIZE
)
352 uint32_t r0
, r1
, r2
, r3
, t0
, t1
;
355 for (i
= 0; i
<4; i
++, plaintext
+= 4)
356 words
[i
] = LE_READ_UINT32(plaintext
);
358 r0
= words
[0] ^ keys
[0];
359 r1
= words
[1] ^ keys
[1];
360 r2
= words
[2] ^ keys
[2];
361 r3
= words
[3] ^ keys
[3];
363 for (i
= 0; i
< 8; i
++) {
364 t1
= ( s_box
[1][r1
& 0xFF]
365 ^ s_box
[2][(r1
>> 8) & 0xFF]
366 ^ s_box
[3][(r1
>> 16) & 0xFF]
367 ^ s_box
[0][(r1
>> 24) & 0xFF]);
368 t0
= ( s_box
[0][r0
& 0xFF]
369 ^ s_box
[1][(r0
>> 8) & 0xFF]
370 ^ s_box
[2][(r0
>> 16) & 0xFF]
371 ^ s_box
[3][(r0
>> 24) & 0xFF]) + t1
;
372 r3
= (t1
+ t0
+ keys
[4*i
+9]) ^ rol1(r3
);
373 r2
= (t0
+ keys
[4*i
+8]) ^ r2
;
376 t1
= ( s_box
[1][r3
& 0xFF]
377 ^ s_box
[2][(r3
>> 8) & 0xFF]
378 ^ s_box
[3][(r3
>> 16) & 0xFF]
379 ^ s_box
[0][(r3
>> 24) & 0xFF]);
380 t0
= ( s_box
[0][r2
& 0xFF]
381 ^ s_box
[1][(r2
>> 8) & 0xFF]
382 ^ s_box
[2][(r2
>> 16) & 0xFF]
383 ^ s_box
[3][(r2
>> 24) & 0xFF]) + t1
;
384 r1
= (t1
+ t0
+ keys
[4*i
+11]) ^ rol1(r1
);
385 r0
= (t0
+ keys
[4*i
+10]) ^ r0
;
389 words
[0] = r2
^ keys
[4];
390 words
[1] = r3
^ keys
[5];
391 words
[2] = r0
^ keys
[6];
392 words
[3] = r1
^ keys
[7];
394 for (i
= 0; i
<4; i
++, ciphertext
+= 4)
395 LE_WRITE_UINT32(ciphertext
, words
[i
]);
399 /* Decrypt blocks of 16 bytes of data with the twofish algorithm.
401 * Before this function can be used, twofish_set_key() must be used in order to
402 * set up various tables required for the decryption algorithm.
404 * This function always decrypts 16 bytes of ciphertext to 16 bytes of
405 * plaintext. The memory areas of the plaintext and the ciphertext can
410 twofish_decrypt(const struct twofish_ctx
*context
,
413 const uint8_t *ciphertext
)
416 const uint32_t *keys
= context
->keys
;
417 const uint32_t (*s_box
)[256] = context
->s_box
;
419 assert( !(length
% TWOFISH_BLOCK_SIZE
) );
420 for ( ; length
; length
-= TWOFISH_BLOCK_SIZE
)
423 uint32_t r0
, r1
, r2
, r3
, t0
, t1
;
426 for (i
= 0; i
<4; i
++, ciphertext
+= 4)
427 words
[i
] = LE_READ_UINT32(ciphertext
);
429 r0
= words
[2] ^ keys
[6];
430 r1
= words
[3] ^ keys
[7];
431 r2
= words
[0] ^ keys
[4];
432 r3
= words
[1] ^ keys
[5];
434 for (i
= 0; i
< 8; i
++) {
435 t1
= ( s_box
[1][r3
& 0xFF]
436 ^ s_box
[2][(r3
>> 8) & 0xFF]
437 ^ s_box
[3][(r3
>> 16) & 0xFF]
438 ^ s_box
[0][(r3
>> 24) & 0xFF]);
439 t0
= ( s_box
[0][r2
& 0xFF]
440 ^ s_box
[1][(r2
>> 8) & 0xFF]
441 ^ s_box
[2][(r2
>> 16) & 0xFF]
442 ^ s_box
[3][(r2
>> 24) & 0xFF]) + t1
;
443 r1
= (t1
+ t0
+ keys
[39-4*i
]) ^ r1
;
445 r0
= (t0
+ keys
[38-4*i
]) ^ rol1(r0
);
447 t1
= ( s_box
[1][r1
& 0xFF]
448 ^ s_box
[2][(r1
>> 8) & 0xFF]
449 ^ s_box
[3][(r1
>> 16) & 0xFF]
450 ^ s_box
[0][(r1
>> 24) & 0xFF]);
451 t0
= ( s_box
[0][r0
& 0xFF]
452 ^ s_box
[1][(r0
>> 8) & 0xFF]
453 ^ s_box
[2][(r0
>> 16) & 0xFF]
454 ^ s_box
[3][(r0
>> 24) & 0xFF]) + t1
;
455 r3
= (t1
+ t0
+ keys
[37-4*i
]) ^ r3
;
457 r2
= (t0
+ keys
[36-4*i
]) ^ rol1(r2
);
460 words
[0] = r0
^ keys
[0];
461 words
[1] = r1
^ keys
[1];
462 words
[2] = r2
^ keys
[2];
463 words
[3] = r3
^ keys
[3];
465 for (i
= 0; i
<4; i
++, plaintext
+= 4)
466 LE_WRITE_UINT32(plaintext
, words
[i
]);