src/crypto/axtls/aes.c

   1 /*
   2  *  Copyright(C) 2006 Cameron Rich
   3  *
   4  *  This library is free software; you can redistribute it and/or modify
   5  *  it under the terms of the GNU Lesser General Public License as published by
   6  *  the Free Software Foundation; either version 2 of the License, or
   7  *  (at your option) any later version.
   8  *
   9  *  This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  *  GNU Lesser General Public License for more details.
  13  *
  14  *  You should have received a copy of the GNU Lesser General Public License
  15  *  along with this library; if not, write to the Free Software
  16  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  17  */
  18
  19 /**
  20  * AES implementation - this is a small code version. There are much faster
  21  * versions around but they are much larger in size (i.e. they use large
  22  * submix tables).
  23  */
  24
  25 #include <string.h>
  26 #include "crypto.h"
  27
  28 /* all commented out in skeleton mode */
  29 #ifndef CONFIG_SSL_SKELETON_MODE
  30
  31 #define rot1(x) (((x) << 24) | ((x) >> 8))
  32 #define rot2(x) (((x) << 16) | ((x) >> 16))
  33 #define rot3(x) (((x) <<  8) | ((x) >> 24))
  34
  35 /*
  36  * This cute trick does 4 'mul by two' at once.  Stolen from
  37  * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
  38  * a standard graphics trick
  39  * The key to this is that we need to xor with 0x1b if the top bit is set.
  40  * a 1xxx xxxx   0xxx 0xxx First we mask the 7bit,
  41  * b 1000 0000   0000 0000 then we shift right by 7 putting the 7bit in 0bit,
  42  * c 0000 0001   0000 0000 we then subtract (c) from (b)
  43  * d 0111 1111   0000 0000 and now we and with our mask
  44  * e 0001 1011   0000 0000
  45  */
  46 #define mt  0x80808080
  47 #define ml  0x7f7f7f7f
  48 #define mh  0xfefefefe
  49 #define mm  0x1b1b1b1b
  50 #define mul2(x,t)       ((t)=((x)&mt), \
  51                         ((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
  52
  53 #define inv_mix_col(x,f2,f4,f8,f9) (\
  54                         (f2)=mul2(x,f2), \
  55                         (f4)=mul2(f2,f4), \
  56                         (f8)=mul2(f4,f8), \
  57                         (f9)=(x)^(f8), \
  58                         (f8)=((f2)^(f4)^(f8)), \
  59                         (f2)^=(f9), \
  60                         (f4)^=(f9), \
  61                         (f8)^=rot3(f2), \
  62                         (f8)^=rot2(f4), \
  63                         (f8)^rot1(f9))
  64
  65 /* some macros to do endian independent byte extraction */
  66 #define n2l(c,l) l=ntohl(*c); c++
  67 #define l2n(l,c) *c++=htonl(l)
  68
  69 /*
  70  * AES S-box
  71  */
  72 static const uint8_t aes_sbox[256] =
  73 {
  74         0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
  75         0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
  76         0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
  77         0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
  78         0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
  79         0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
  80         0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
  81         0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
  82         0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
  83         0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
  84         0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
  85         0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
  86         0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
  87         0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
  88         0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
  89         0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
  90         0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
  91         0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
  92         0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
  93         0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
  94         0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
  95         0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
  96         0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
  97         0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
  98         0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
  99         0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
 100         0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
 101         0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
 102         0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
 103         0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
 104         0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
 105         0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
 106 };
 107
 108 /*
 109  * AES is-box
 110  */
 111 static const uint8_t aes_isbox[256] =
 112 {
 113     0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
 114     0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
 115     0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
 116     0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
 117     0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
 118     0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
 119     0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
 120     0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
 121     0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
 122     0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
 123     0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
 124     0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
 125     0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
 126     0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
 127     0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
 128     0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
 129     0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
 130     0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
 131     0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
 132     0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
 133     0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
 134     0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
 135     0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
 136     0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
 137     0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
 138     0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
 139     0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
 140     0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
 141     0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
 142     0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
 143     0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
 144     0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
 145 };
 146
 147 static const unsigned char Rcon[30]=
 148 {
 149         0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
 150         0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
 151         0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
 152         0xb3,0x7d,0xfa,0xef,0xc5,0x91,
 153 };
 154
 155 /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
 156    x^8+x^4+x^3+x+1 */
 157 static unsigned char AES_xtime(uint32_t x)
 158 {
 159         return x = (x&0x80) ? (x<<1)^0x1b : x<<1;
 160 }
 161
 162 /**
 163  * Set up AES with the key/iv and cipher size.
 164  */
 165 void AES_set_key(AES_CTX *ctx, const uint8_t *key,
 166         const uint8_t *iv, AES_MODE mode)
 167 {
 168     int i, ii;
 169     uint32_t *W, tmp, tmp2;
 170     const unsigned char *ip;
 171     int words;
 172
 173     switch (mode)
 174     {
 175         case AES_MODE_128:
 176             i = 10;
 177             words = 4;
 178             break;
 179
 180         case AES_MODE_256:
 181             i = 14;
 182             words = 8;
 183             break;
 184
 185         default:        /* fail silently */
 186             return;
 187     }
 188
 189     ctx->rounds = i;
 190     ctx->key_size = words;
 191     W = ctx->ks;
 192     for (i = 0; i < words; i+=2)
 193     {
 194         W[i+0]= ((uint32_t)key[ 0]<<24)|
 195             ((uint32_t)key[ 1]<<16)|
 196             ((uint32_t)key[ 2]<< 8)|
 197             ((uint32_t)key[ 3]    );
 198         W[i+1]= ((uint32_t)key[ 4]<<24)|
 199             ((uint32_t)key[ 5]<<16)|
 200             ((uint32_t)key[ 6]<< 8)|
 201             ((uint32_t)key[ 7]    );
 202         key += 8;
 203     }
 204
 205     ip = Rcon;
 206     ii = 4 * (ctx->rounds+1);
 207     for (i = words; i<ii; i++)
 208     {
 209         tmp = W[i-1];
 210
 211         if ((i % words) == 0)
 212         {
 213             tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]<< 8;
 214             tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
 215             tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
 216             tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ];
 217             tmp=tmp2^(((unsigned int)*ip)<<24);
 218             ip++;
 219         }
 220
 221         if ((words == 8) && ((i % words) == 4))
 222         {
 223             tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]    ;
 224             tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
 225             tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
 226             tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ]<<24;
 227             tmp=tmp2;
 228         }
 229
 230         W[i]=W[i-words]^tmp;
 231     }
 232
 233     /* copy the iv across */
 234     memcpy(ctx->iv, iv, 16);
 235 }
 236
 237 /**
 238  * Change a key for decryption.
 239  */
 240 void AES_convert_key(AES_CTX *ctx)
 241 {
 242     int i;
 243     uint32_t *k,w,t1,t2,t3,t4;
 244
 245     k = ctx->ks;
 246     k += 4;
 247
 248     for (i=ctx->rounds*4; i>4; i--)
 249     {
 250         w= *k;
 251         w = inv_mix_col(w,t1,t2,t3,t4);
 252         *k++ =w;
 253     }
 254 }
 255
 256 #if 0
 257 /**
 258  * Encrypt a byte sequence (with a block size 16) using the AES cipher.
 259  */
 260 void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
 261 {
 262     uint32_t tin0, tin1, tin2, tin3;
 263     uint32_t tout0, tout1, tout2, tout3;
 264     uint32_t tin[4];
 265     uint32_t *iv = (uint32_t *)ctx->iv;
 266     uint32_t *msg_32 = (uint32_t *)msg;
 267     uint32_t *out_32 = (uint32_t *)out;
 268
 269     n2l(iv, tout0);
 270     n2l(iv, tout1);
 271     n2l(iv, tout2);
 272     n2l(iv, tout3);
 273     iv -= 4;
 274
 275     for (length -= 16; length >= 0; length -= 16)
 276     {
 277         n2l(msg_32, tin0);
 278         n2l(msg_32, tin1);
 279         n2l(msg_32, tin2);
 280         n2l(msg_32, tin3);
 281         tin[0] = tin0^tout0;
 282         tin[1] = tin1^tout1;
 283         tin[2] = tin2^tout2;
 284         tin[3] = tin3^tout3;
 285
 286         AES_encrypt(ctx, tin);
 287
 288         tout0 = tin[0];
 289         l2n(tout0, out_32);
 290         tout1 = tin[1];
 291         l2n(tout1, out_32);
 292         tout2 = tin[2];
 293         l2n(tout2, out_32);
 294         tout3 = tin[3];
 295         l2n(tout3, out_32);
 296     }
 297
 298     l2n(tout0, iv);
 299     l2n(tout1, iv);
 300     l2n(tout2, iv);
 301     l2n(tout3, iv);
 302 }
 303
 304 /**
 305  * Decrypt a byte sequence (with a block size 16) using the AES cipher.
 306  */
 307 void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
 308 {
 309     uint32_t tin0, tin1, tin2, tin3;
 310     uint32_t xor0,xor1,xor2,xor3;
 311     uint32_t tout0,tout1,tout2,tout3;
 312     uint32_t data[4];
 313     uint32_t *iv = (uint32_t *)ctx->iv;
 314     uint32_t *msg_32 = (uint32_t *)msg;
 315     uint32_t *out_32 = (uint32_t *)out;
 316
 317     n2l(iv ,xor0);
 318     n2l(iv, xor1);
 319     n2l(iv, xor2);
 320     n2l(iv, xor3);
 321     iv -= 4;
 322
 323     for (length-=16; length >= 0; length -= 16)
 324     {
 325         n2l(msg_32, tin0);
 326         n2l(msg_32, tin1);
 327         n2l(msg_32, tin2);
 328         n2l(msg_32, tin3);
 329
 330         data[0] = tin0;
 331         data[1] = tin1;
 332         data[2] = tin2;
 333         data[3] = tin3;
 334
 335         AES_decrypt(ctx, data);
 336
 337         tout0 = data[0]^xor0;
 338         tout1 = data[1]^xor1;
 339         tout2 = data[2]^xor2;
 340         tout3 = data[3]^xor3;
 341
 342         xor0 = tin0;
 343         xor1 = tin1;
 344         xor2 = tin2;
 345         xor3 = tin3;
 346
 347         l2n(tout0, out_32);
 348         l2n(tout1, out_32);
 349         l2n(tout2, out_32);
 350         l2n(tout3, out_32);
 351     }
 352
 353     l2n(xor0, iv);
 354     l2n(xor1, iv);
 355     l2n(xor2, iv);
 356     l2n(xor3, iv);
 357 }
 358 #endif
 359
 360 /**
 361  * Encrypt a single block (16 bytes) of data
 362  */
 363 void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
 364 {
 365     /* To make this code smaller, generate the sbox entries on the fly.
 366      * This will have a really heavy effect upon performance.
 367      */
 368     uint32_t tmp[4];
 369     uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
 370     int curr_rnd;
 371     int rounds = ctx->rounds;
 372     const uint32_t *k = ctx->ks;
 373
 374     /* Pre-round key addition */
 375     for (row = 0; row < 4; row++)
 376     {
 377         data[row] ^= *(k++);
 378     }
 379
 380     /* Encrypt one block. */
 381     for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
 382     {
 383         /* Perform ByteSub and ShiftRow operations together */
 384         for (row = 0; row < 4; row++)
 385         {
 386             a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
 387             a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
 388             a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF];
 389             a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
 390
 391             /* Perform MixColumn iff not last round */
 392             if (curr_rnd < (rounds - 1))
 393             {
 394                 tmp1 = a0 ^ a1 ^ a2 ^ a3;
 395                 old_a0 = a0;
 396
 397                 a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
 398                 a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
 399                 a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
 400                 a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
 401
 402             }
 403
 404             tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
 405         }
 406
 407         /* KeyAddition - note that it is vital that this loop is separate from
 408            the MixColumn operation, which must be atomic...*/
 409         for (row = 0; row < 4; row++)
 410         {
 411             data[row] = tmp[row] ^ *(k++);
 412         }
 413     }
 414 }
 415
 416 /**
 417  * Decrypt a single block (16 bytes) of data
 418  */
 419 void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
 420 {
 421     uint32_t tmp[4];
 422     uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
 423     uint32_t a0, a1, a2, a3, row;
 424     int curr_rnd;
 425     int rounds = ctx->rounds;
 426     uint32_t *k = (uint32_t*)ctx->ks + ((rounds+1)*4);
 427
 428     /* pre-round key addition */
 429     for (row=4; row > 0;row--)
 430     {
 431         data[row-1] ^= *(--k);
 432     }
 433
 434     /* Decrypt one block */
 435     for (curr_rnd=0; curr_rnd < rounds; curr_rnd++)
 436     {
 437         /* Perform ByteSub and ShiftRow operations together */
 438         for (row = 4; row > 0; row--)
 439         {
 440             a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
 441             a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
 442             a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
 443             a3 = aes_isbox[(data[row%4])&0xFF];
 444
 445             /* Perform MixColumn iff not last round */
 446             if (curr_rnd<(rounds-1))
 447             {
 448                 /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
 449                    are quite large compared to encryption; this
 450                    operation slows decryption down noticeably. */
 451                 xt0 = AES_xtime(a0^a1);
 452                 xt1 = AES_xtime(a1^a2);
 453                 xt2 = AES_xtime(a2^a3);
 454                 xt3 = AES_xtime(a3^a0);
 455                 xt4 = AES_xtime(xt0^xt1);
 456                 xt5 = AES_xtime(xt1^xt2);
 457                 xt6 = AES_xtime(xt4^xt5);
 458
 459                 xt0 ^= a1^a2^a3^xt4^xt6;
 460                 xt1 ^= a0^a2^a3^xt5^xt6;
 461                 xt2 ^= a0^a1^a3^xt4^xt6;
 462                 xt3 ^= a0^a1^a2^xt5^xt6;
 463                 tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
 464             }
 465             else
 466                 tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
 467         }
 468
 469         for (row = 4; row > 0; row--)
 470         {
 471             data[row-1] = tmp[row-1] ^ *(--k);
 472         }
 473     }
 474 }
 475
 476 #endif