[tcp] Store local port in host byte order
[gpxe.git] / src / crypto / axtls / aes.c
blob0c0d7247aacf4be9702a418c46ecd1008bbe9877
1 /*
2 * Copyright(C) 2006 Cameron Rich
4 * This library is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 /**
20 * AES implementation - this is a small code version. There are much faster
21 * versions around but they are much larger in size (i.e. they use large
22 * submix tables).
25 #include <string.h>
26 #include "crypto.h"
28 /* all commented out in skeleton mode */
29 #ifndef CONFIG_SSL_SKELETON_MODE
31 #define rot1(x) (((x) << 24) | ((x) >> 8))
32 #define rot2(x) (((x) << 16) | ((x) >> 16))
33 #define rot3(x) (((x) << 8) | ((x) >> 24))
35 /*
36 * This cute trick does 4 'mul by two' at once. Stolen from
37 * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
38 * a standard graphics trick
39 * The key to this is that we need to xor with 0x1b if the top bit is set.
40 * a 1xxx xxxx 0xxx 0xxx First we mask the 7bit,
41 * b 1000 0000 0000 0000 then we shift right by 7 putting the 7bit in 0bit,
42 * c 0000 0001 0000 0000 we then subtract (c) from (b)
43 * d 0111 1111 0000 0000 and now we and with our mask
44 * e 0001 1011 0000 0000
46 #define mt 0x80808080
47 #define ml 0x7f7f7f7f
48 #define mh 0xfefefefe
49 #define mm 0x1b1b1b1b
50 #define mul2(x,t) ((t)=((x)&mt), \
51 ((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
53 #define inv_mix_col(x,f2,f4,f8,f9) (\
54 (f2)=mul2(x,f2), \
55 (f4)=mul2(f2,f4), \
56 (f8)=mul2(f4,f8), \
57 (f9)=(x)^(f8), \
58 (f8)=((f2)^(f4)^(f8)), \
59 (f2)^=(f9), \
60 (f4)^=(f9), \
61 (f8)^=rot3(f2), \
62 (f8)^=rot2(f4), \
63 (f8)^rot1(f9))
65 /* some macros to do endian independent byte extraction */
66 #define n2l(c,l) l=ntohl(*c); c++
67 #define l2n(l,c) *c++=htonl(l)
70 * AES S-box
72 static const uint8_t aes_sbox[256] =
74 0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
75 0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
76 0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
77 0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
78 0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
79 0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
80 0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
81 0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
82 0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
83 0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
84 0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
85 0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
86 0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
87 0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
88 0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
89 0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
90 0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
91 0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
92 0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
93 0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
94 0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
95 0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
96 0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
97 0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
98 0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
99 0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
100 0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
101 0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
102 0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
103 0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
104 0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
105 0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
109 * AES is-box
111 static const uint8_t aes_isbox[256] =
113 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
114 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
115 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
116 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
117 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
118 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
119 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
120 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
121 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
122 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
123 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
124 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
125 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
126 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
127 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
128 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
129 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
130 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
131 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
132 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
133 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
134 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
135 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
136 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
137 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
138 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
139 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
140 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
141 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
142 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
143 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
144 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
147 static const unsigned char Rcon[30]=
149 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
150 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
151 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
152 0xb3,0x7d,0xfa,0xef,0xc5,0x91,
155 /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
156 x^8+x^4+x^3+x+1 */
157 static unsigned char AES_xtime(uint32_t x)
159 return x = (x&0x80) ? (x<<1)^0x1b : x<<1;
163 * Set up AES with the key/iv and cipher size.
165 void AES_set_key(AES_CTX *ctx, const uint8_t *key,
166 const uint8_t *iv, AES_MODE mode)
168 int i, ii;
169 uint32_t *W, tmp, tmp2;
170 const unsigned char *ip;
171 int words;
173 switch (mode)
175 case AES_MODE_128:
176 i = 10;
177 words = 4;
178 break;
180 case AES_MODE_256:
181 i = 14;
182 words = 8;
183 break;
185 default: /* fail silently */
186 return;
189 ctx->rounds = i;
190 ctx->key_size = words;
191 W = ctx->ks;
192 for (i = 0; i < words; i+=2)
194 W[i+0]= ((uint32_t)key[ 0]<<24)|
195 ((uint32_t)key[ 1]<<16)|
196 ((uint32_t)key[ 2]<< 8)|
197 ((uint32_t)key[ 3] );
198 W[i+1]= ((uint32_t)key[ 4]<<24)|
199 ((uint32_t)key[ 5]<<16)|
200 ((uint32_t)key[ 6]<< 8)|
201 ((uint32_t)key[ 7] );
202 key += 8;
205 ip = Rcon;
206 ii = 4 * (ctx->rounds+1);
207 for (i = words; i<ii; i++)
209 tmp = W[i-1];
211 if ((i % words) == 0)
213 tmp2 =(uint32_t)aes_sbox[(tmp )&0xff]<< 8;
214 tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
215 tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
216 tmp2|=(uint32_t)aes_sbox[(tmp>>24) ];
217 tmp=tmp2^(((unsigned int)*ip)<<24);
218 ip++;
221 if ((words == 8) && ((i % words) == 4))
223 tmp2 =(uint32_t)aes_sbox[(tmp )&0xff] ;
224 tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
225 tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
226 tmp2|=(uint32_t)aes_sbox[(tmp>>24) ]<<24;
227 tmp=tmp2;
230 W[i]=W[i-words]^tmp;
233 /* copy the iv across */
234 memcpy(ctx->iv, iv, 16);
238 * Change a key for decryption.
240 void AES_convert_key(AES_CTX *ctx)
242 int i;
243 uint32_t *k,w,t1,t2,t3,t4;
245 k = ctx->ks;
246 k += 4;
248 for (i=ctx->rounds*4; i>4; i--)
250 w= *k;
251 w = inv_mix_col(w,t1,t2,t3,t4);
252 *k++ =w;
256 #if 0
258 * Encrypt a byte sequence (with a block size 16) using the AES cipher.
260 void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
262 uint32_t tin0, tin1, tin2, tin3;
263 uint32_t tout0, tout1, tout2, tout3;
264 uint32_t tin[4];
265 uint32_t *iv = (uint32_t *)ctx->iv;
266 uint32_t *msg_32 = (uint32_t *)msg;
267 uint32_t *out_32 = (uint32_t *)out;
269 n2l(iv, tout0);
270 n2l(iv, tout1);
271 n2l(iv, tout2);
272 n2l(iv, tout3);
273 iv -= 4;
275 for (length -= 16; length >= 0; length -= 16)
277 n2l(msg_32, tin0);
278 n2l(msg_32, tin1);
279 n2l(msg_32, tin2);
280 n2l(msg_32, tin3);
281 tin[0] = tin0^tout0;
282 tin[1] = tin1^tout1;
283 tin[2] = tin2^tout2;
284 tin[3] = tin3^tout3;
286 AES_encrypt(ctx, tin);
288 tout0 = tin[0];
289 l2n(tout0, out_32);
290 tout1 = tin[1];
291 l2n(tout1, out_32);
292 tout2 = tin[2];
293 l2n(tout2, out_32);
294 tout3 = tin[3];
295 l2n(tout3, out_32);
298 l2n(tout0, iv);
299 l2n(tout1, iv);
300 l2n(tout2, iv);
301 l2n(tout3, iv);
305 * Decrypt a byte sequence (with a block size 16) using the AES cipher.
307 void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
309 uint32_t tin0, tin1, tin2, tin3;
310 uint32_t xor0,xor1,xor2,xor3;
311 uint32_t tout0,tout1,tout2,tout3;
312 uint32_t data[4];
313 uint32_t *iv = (uint32_t *)ctx->iv;
314 uint32_t *msg_32 = (uint32_t *)msg;
315 uint32_t *out_32 = (uint32_t *)out;
317 n2l(iv ,xor0);
318 n2l(iv, xor1);
319 n2l(iv, xor2);
320 n2l(iv, xor3);
321 iv -= 4;
323 for (length-=16; length >= 0; length -= 16)
325 n2l(msg_32, tin0);
326 n2l(msg_32, tin1);
327 n2l(msg_32, tin2);
328 n2l(msg_32, tin3);
330 data[0] = tin0;
331 data[1] = tin1;
332 data[2] = tin2;
333 data[3] = tin3;
335 AES_decrypt(ctx, data);
337 tout0 = data[0]^xor0;
338 tout1 = data[1]^xor1;
339 tout2 = data[2]^xor2;
340 tout3 = data[3]^xor3;
342 xor0 = tin0;
343 xor1 = tin1;
344 xor2 = tin2;
345 xor3 = tin3;
347 l2n(tout0, out_32);
348 l2n(tout1, out_32);
349 l2n(tout2, out_32);
350 l2n(tout3, out_32);
353 l2n(xor0, iv);
354 l2n(xor1, iv);
355 l2n(xor2, iv);
356 l2n(xor3, iv);
358 #endif
361 * Encrypt a single block (16 bytes) of data
363 void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
365 /* To make this code smaller, generate the sbox entries on the fly.
366 * This will have a really heavy effect upon performance.
368 uint32_t tmp[4];
369 uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
370 int curr_rnd;
371 int rounds = ctx->rounds;
372 const uint32_t *k = ctx->ks;
374 /* Pre-round key addition */
375 for (row = 0; row < 4; row++)
377 data[row] ^= *(k++);
380 /* Encrypt one block. */
381 for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
383 /* Perform ByteSub and ShiftRow operations together */
384 for (row = 0; row < 4; row++)
386 a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
387 a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
388 a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF];
389 a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
391 /* Perform MixColumn iff not last round */
392 if (curr_rnd < (rounds - 1))
394 tmp1 = a0 ^ a1 ^ a2 ^ a3;
395 old_a0 = a0;
397 a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
398 a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
399 a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
400 a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
404 tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
407 /* KeyAddition - note that it is vital that this loop is separate from
408 the MixColumn operation, which must be atomic...*/
409 for (row = 0; row < 4; row++)
411 data[row] = tmp[row] ^ *(k++);
417 * Decrypt a single block (16 bytes) of data
419 void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
421 uint32_t tmp[4];
422 uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
423 uint32_t a0, a1, a2, a3, row;
424 int curr_rnd;
425 int rounds = ctx->rounds;
426 uint32_t *k = (uint32_t*)ctx->ks + ((rounds+1)*4);
428 /* pre-round key addition */
429 for (row=4; row > 0;row--)
431 data[row-1] ^= *(--k);
434 /* Decrypt one block */
435 for (curr_rnd=0; curr_rnd < rounds; curr_rnd++)
437 /* Perform ByteSub and ShiftRow operations together */
438 for (row = 4; row > 0; row--)
440 a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
441 a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
442 a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
443 a3 = aes_isbox[(data[row%4])&0xFF];
445 /* Perform MixColumn iff not last round */
446 if (curr_rnd<(rounds-1))
448 /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
449 are quite large compared to encryption; this
450 operation slows decryption down noticeably. */
451 xt0 = AES_xtime(a0^a1);
452 xt1 = AES_xtime(a1^a2);
453 xt2 = AES_xtime(a2^a3);
454 xt3 = AES_xtime(a3^a0);
455 xt4 = AES_xtime(xt0^xt1);
456 xt5 = AES_xtime(xt1^xt2);
457 xt6 = AES_xtime(xt4^xt5);
459 xt0 ^= a1^a2^a3^xt4^xt6;
460 xt1 ^= a0^a2^a3^xt5^xt6;
461 xt2 ^= a0^a1^a3^xt4^xt6;
462 xt3 ^= a0^a1^a2^xt5^xt6;
463 tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
465 else
466 tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
469 for (row = 4; row > 0; row--)
471 data[row-1] = tmp[row-1] ^ *(--k);
476 #endif