crypto/gf128mul.c

   1 /* gf128mul.c - GF(2^128) multiplication functions
   2  *
   3  * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
   4  * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
   5  *
   6  * Based on Dr Brian Gladman's (GPL'd) work published at
   7  * http://fp.gladman.plus.com/cryptography_technology/index.htm
   8  * See the original copyright notice below.
   9  *
  10  * This program is free software; you can redistribute it and/or modify it
  11  * under the terms of the GNU General Public License as published by the Free
  12  * Software Foundation; either version 2 of the License, or (at your option)
  13  * any later version.
  14  */
  15
  16 /*
  17  ---------------------------------------------------------------------------
  18  Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
  19
  20  LICENSE TERMS
  21
  22  The free distribution and use of this software in both source and binary
  23  form is allowed (with or without changes) provided that:
  24
  25    1. distributions of this source code include the above copyright
  26       notice, this list of conditions and the following disclaimer;
  27
  28    2. distributions in binary form include the above copyright
  29       notice, this list of conditions and the following disclaimer
  30       in the documentation and/or other associated materials;
  31
  32    3. the copyright holder's name is not used to endorse products
  33       built using this software without specific written permission.
  34
  35  ALTERNATIVELY, provided that this notice is retained in full, this product
  36  may be distributed under the terms of the GNU General Public License (GPL),
  37  in which case the provisions of the GPL apply INSTEAD OF those given above.
  38
  39  DISCLAIMER
  40
  41  This software is provided 'as is' with no explicit or implied warranties
  42  in respect of its properties, including, but not limited to, correctness
  43  and/or fitness for purpose.
  44  ---------------------------------------------------------------------------
  45  Issue 31/01/2006
  46
  47  This file provides fast multiplication in GF(128) as required by several
  48  cryptographic authentication modes
  49 */
  50
  51 #include <crypto/gf128mul.h>
  52 #include <linux/kernel.h>
  53 #include <linux/module.h>
  54 #include <linux/slab.h>
  55
  56 #define gf128mul_dat(q) { \
  57         q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
  58         q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
  59         q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
  60         q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
  61         q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
  62         q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
  63         q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
  64         q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
  65         q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
  66         q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
  67         q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
  68         q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
  69         q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
  70         q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
  71         q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
  72         q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
  73         q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
  74         q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
  75         q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
  76         q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
  77         q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
  78         q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
  79         q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
  80         q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
  81         q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
  82         q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
  83         q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
  84         q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
  85         q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
  86         q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
  87         q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
  88         q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
  89 }
  90
  91 /*      Given the value i in 0..255 as the byte overflow when a field element
  92     in GHASH is multipled by x^8, this function will return the values that
  93     are generated in the lo 16-bit word of the field value by applying the
  94     modular polynomial. The values lo_byte and hi_byte are returned via the
  95     macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
  96     memory as required by a suitable definition of this macro operating on
  97     the table above
  98 */
  99
 100 #define xx(p, q)        0x##p##q
 101
 102 #define xda_bbe(i) ( \
 103         (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
 104         (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
 105         (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
 106         (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
 107 )
 108
 109 #define xda_lle(i) ( \
 110         (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
 111         (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
 112         (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
 113         (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
 114 )
 115
 116 static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
 117 static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
 118
 119 /* These functions multiply a field element by x, by x^4 and by x^8
 120  * in the polynomial field representation. It uses 32-bit word operations
 121  * to gain speed but compensates for machine endianess and hence works
 122  * correctly on both styles of machine.
 123  */
 124
 125 static void gf128mul_x_lle(be128 *r, const be128 *x)
 126 {
 127         u64 a = be64_to_cpu(x->a);
 128         u64 b = be64_to_cpu(x->b);
 129         u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
 130
 131         r->b = cpu_to_be64((b >> 1) | (a << 63));
 132         r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
 133 }
 134
 135 static void gf128mul_x_bbe(be128 *r, const be128 *x)
 136 {
 137         u64 a = be64_to_cpu(x->a);
 138         u64 b = be64_to_cpu(x->b);
 139         u64 _tt = gf128mul_table_bbe[a >> 63];
 140
 141         r->a = cpu_to_be64((a << 1) | (b >> 63));
 142         r->b = cpu_to_be64((b << 1) ^ _tt);
 143 }
 144
 145 static void gf128mul_x8_lle(be128 *x)
 146 {
 147         u64 a = be64_to_cpu(x->a);
 148         u64 b = be64_to_cpu(x->b);
 149         u64 _tt = gf128mul_table_lle[b & 0xff];
 150
 151         x->b = cpu_to_be64((b >> 8) | (a << 56));
 152         x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
 153 }
 154
 155 static void gf128mul_x8_bbe(be128 *x)
 156 {
 157         u64 a = be64_to_cpu(x->a);
 158         u64 b = be64_to_cpu(x->b);
 159         u64 _tt = gf128mul_table_bbe[a >> 56];
 160
 161         x->a = cpu_to_be64((a << 8) | (b >> 56));
 162         x->b = cpu_to_be64((b << 8) ^ _tt);
 163 }
 164
 165 void gf128mul_lle(be128 *r, const be128 *b)
 166 {
 167         be128 p[8];
 168         int i;
 169
 170         p[0] = *r;
 171         for (i = 0; i < 7; ++i)
 172                 gf128mul_x_lle(&p[i + 1], &p[i]);
 173
 174         memset(r, 0, sizeof(r));
 175         for (i = 0;;) {
 176                 u8 ch = ((u8 *)b)[15 - i];
 177
 178                 if (ch & 0x80)
 179                         be128_xor(r, r, &p[0]);
 180                 if (ch & 0x40)
 181                         be128_xor(r, r, &p[1]);
 182                 if (ch & 0x20)
 183                         be128_xor(r, r, &p[2]);
 184                 if (ch & 0x10)
 185                         be128_xor(r, r, &p[3]);
 186                 if (ch & 0x08)
 187                         be128_xor(r, r, &p[4]);
 188                 if (ch & 0x04)
 189                         be128_xor(r, r, &p[5]);
 190                 if (ch & 0x02)
 191                         be128_xor(r, r, &p[6]);
 192                 if (ch & 0x01)
 193                         be128_xor(r, r, &p[7]);
 194
 195                 if (++i >= 16)
 196                         break;
 197
 198                 gf128mul_x8_lle(r);
 199         }
 200 }
 201 EXPORT_SYMBOL(gf128mul_lle);
 202
 203 void gf128mul_bbe(be128 *r, const be128 *b)
 204 {
 205         be128 p[8];
 206         int i;
 207
 208         p[0] = *r;
 209         for (i = 0; i < 7; ++i)
 210                 gf128mul_x_bbe(&p[i + 1], &p[i]);
 211
 212         memset(r, 0, sizeof(r));
 213         for (i = 0;;) {
 214                 u8 ch = ((u8 *)b)[i];
 215
 216                 if (ch & 0x80)
 217                         be128_xor(r, r, &p[7]);
 218                 if (ch & 0x40)
 219                         be128_xor(r, r, &p[6]);
 220                 if (ch & 0x20)
 221                         be128_xor(r, r, &p[5]);
 222                 if (ch & 0x10)
 223                         be128_xor(r, r, &p[4]);
 224                 if (ch & 0x08)
 225                         be128_xor(r, r, &p[3]);
 226                 if (ch & 0x04)
 227                         be128_xor(r, r, &p[2]);
 228                 if (ch & 0x02)
 229                         be128_xor(r, r, &p[1]);
 230                 if (ch & 0x01)
 231                         be128_xor(r, r, &p[0]);
 232
 233                 if (++i >= 16)
 234                         break;
 235
 236                 gf128mul_x8_bbe(r);
 237         }
 238 }
 239 EXPORT_SYMBOL(gf128mul_bbe);
 240
 241 /*      This version uses 64k bytes of table space.
 242     A 16 byte buffer has to be multiplied by a 16 byte key
 243     value in GF(128).  If we consider a GF(128) value in
 244     the buffer's lowest byte, we can construct a table of
 245     the 256 16 byte values that result from the 256 values
 246     of this byte.  This requires 4096 bytes. But we also
 247     need tables for each of the 16 higher bytes in the
 248     buffer as well, which makes 64 kbytes in total.
 249 */
 250 /* additional explanation
 251  * t[0][BYTE] contains g*BYTE
 252  * t[1][BYTE] contains g*x^8*BYTE
 253  *  ..
 254  * t[15][BYTE] contains g*x^120*BYTE */
 255 struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g)
 256 {
 257         struct gf128mul_64k *t;
 258         int i, j, k;
 259
 260         t = kzalloc(sizeof(*t), GFP_KERNEL);
 261         if (!t)
 262                 goto out;
 263
 264         for (i = 0; i < 16; i++) {
 265                 t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
 266                 if (!t->t[i]) {
 267                         gf128mul_free_64k(t);
 268                         t = NULL;
 269                         goto out;
 270                 }
 271         }
 272
 273         t->t[0]->t[128] = *g;
 274         for (j = 64; j > 0; j >>= 1)
 275                 gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]);
 276
 277         for (i = 0;;) {
 278                 for (j = 2; j < 256; j += j)
 279                         for (k = 1; k < j; ++k)
 280                                 be128_xor(&t->t[i]->t[j + k],
 281                                           &t->t[i]->t[j], &t->t[i]->t[k]);
 282
 283                 if (++i >= 16)
 284                         break;
 285
 286                 for (j = 128; j > 0; j >>= 1) {
 287                         t->t[i]->t[j] = t->t[i - 1]->t[j];
 288                         gf128mul_x8_lle(&t->t[i]->t[j]);
 289                 }
 290         }
 291
 292 out:
 293         return t;
 294 }
 295 EXPORT_SYMBOL(gf128mul_init_64k_lle);
 296
 297 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
 298 {
 299         struct gf128mul_64k *t;
 300         int i, j, k;
 301
 302         t = kzalloc(sizeof(*t), GFP_KERNEL);
 303         if (!t)
 304                 goto out;
 305
 306         for (i = 0; i < 16; i++) {
 307                 t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
 308                 if (!t->t[i]) {
 309                         gf128mul_free_64k(t);
 310                         t = NULL;
 311                         goto out;
 312                 }
 313         }
 314
 315         t->t[0]->t[1] = *g;
 316         for (j = 1; j <= 64; j <<= 1)
 317                 gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
 318
 319         for (i = 0;;) {
 320                 for (j = 2; j < 256; j += j)
 321                         for (k = 1; k < j; ++k)
 322                                 be128_xor(&t->t[i]->t[j + k],
 323                                           &t->t[i]->t[j], &t->t[i]->t[k]);
 324
 325                 if (++i >= 16)
 326                         break;
 327
 328                 for (j = 128; j > 0; j >>= 1) {
 329                         t->t[i]->t[j] = t->t[i - 1]->t[j];
 330                         gf128mul_x8_bbe(&t->t[i]->t[j]);
 331                 }
 332         }
 333
 334 out:
 335         return t;
 336 }
 337 EXPORT_SYMBOL(gf128mul_init_64k_bbe);
 338
 339 void gf128mul_free_64k(struct gf128mul_64k *t)
 340 {
 341         int i;
 342
 343         for (i = 0; i < 16; i++)
 344                 kfree(t->t[i]);
 345         kfree(t);
 346 }
 347 EXPORT_SYMBOL(gf128mul_free_64k);
 348
 349 void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t)
 350 {
 351         u8 *ap = (u8 *)a;
 352         be128 r[1];
 353         int i;
 354
 355         *r = t->t[0]->t[ap[0]];
 356         for (i = 1; i < 16; ++i)
 357                 be128_xor(r, r, &t->t[i]->t[ap[i]]);
 358         *a = *r;
 359 }
 360 EXPORT_SYMBOL(gf128mul_64k_lle);
 361
 362 void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
 363 {
 364         u8 *ap = (u8 *)a;
 365         be128 r[1];
 366         int i;
 367
 368         *r = t->t[0]->t[ap[15]];
 369         for (i = 1; i < 16; ++i)
 370                 be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
 371         *a = *r;
 372 }
 373 EXPORT_SYMBOL(gf128mul_64k_bbe);
 374
 375 /*      This version uses 4k bytes of table space.
 376     A 16 byte buffer has to be multiplied by a 16 byte key
 377     value in GF(128).  If we consider a GF(128) value in a
 378     single byte, we can construct a table of the 256 16 byte
 379     values that result from the 256 values of this byte.
 380     This requires 4096 bytes. If we take the highest byte in
 381     the buffer and use this table to get the result, we then
 382     have to multiply by x^120 to get the final value. For the
 383     next highest byte the result has to be multiplied by x^112
 384     and so on. But we can do this by accumulating the result
 385     in an accumulator starting with the result for the top
 386     byte.  We repeatedly multiply the accumulator value by
 387     x^8 and then add in (i.e. xor) the 16 bytes of the next
 388     lower byte in the buffer, stopping when we reach the
 389     lowest byte. This requires a 4096 byte table.
 390 */
 391 struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
 392 {
 393         struct gf128mul_4k *t;
 394         int j, k;
 395
 396         t = kzalloc(sizeof(*t), GFP_KERNEL);
 397         if (!t)
 398                 goto out;
 399
 400         t->t[128] = *g;
 401         for (j = 64; j > 0; j >>= 1)
 402                 gf128mul_x_lle(&t->t[j], &t->t[j+j]);
 403
 404         for (j = 2; j < 256; j += j)
 405                 for (k = 1; k < j; ++k)
 406                         be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
 407
 408 out:
 409         return t;
 410 }
 411 EXPORT_SYMBOL(gf128mul_init_4k_lle);
 412
 413 struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
 414 {
 415         struct gf128mul_4k *t;
 416         int j, k;
 417
 418         t = kzalloc(sizeof(*t), GFP_KERNEL);
 419         if (!t)
 420                 goto out;
 421
 422         t->t[1] = *g;
 423         for (j = 1; j <= 64; j <<= 1)
 424                 gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
 425
 426         for (j = 2; j < 256; j += j)
 427                 for (k = 1; k < j; ++k)
 428                         be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
 429
 430 out:
 431         return t;
 432 }
 433 EXPORT_SYMBOL(gf128mul_init_4k_bbe);
 434
 435 void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
 436 {
 437         u8 *ap = (u8 *)a;
 438         be128 r[1];
 439         int i = 15;
 440
 441         *r = t->t[ap[15]];
 442         while (i--) {
 443                 gf128mul_x8_lle(r);
 444                 be128_xor(r, r, &t->t[ap[i]]);
 445         }
 446         *a = *r;
 447 }
 448 EXPORT_SYMBOL(gf128mul_4k_lle);
 449
 450 void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
 451 {
 452         u8 *ap = (u8 *)a;
 453         be128 r[1];
 454         int i = 0;
 455
 456         *r = t->t[ap[0]];
 457         while (++i < 16) {
 458                 gf128mul_x8_bbe(r);
 459                 be128_xor(r, r, &t->t[ap[i]]);
 460         }
 461         *a = *r;
 462 }
 463 EXPORT_SYMBOL(gf128mul_4k_bbe);
 464
 465 MODULE_LICENSE("GPL");
 466 MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");