Modules/binascii.c

   1 /*
   2 ** Routines to represent binary data in ASCII and vice-versa
   3 **
   4 ** This module currently supports the following encodings:
   5 ** uuencode:
   6 **      each line encodes 45 bytes (except possibly the last)
   7 **      First char encodes (binary) length, rest data
   8 **      each char encodes 6 bits, as follows:
   9 **      binary: 01234567 abcdefgh ijklmnop
  10 **      ascii:  012345 67abcd efghij klmnop
  11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
  12 **      short binary data is zero-extended (so the bits are always in the
  13 **      right place), this does *not* reflect in the length.
  14 ** base64:
  15 **      Line breaks are insignificant, but lines are at most 76 chars
  16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
  17 **      is done via a table.
  18 **      Short binary data is filled (in ASCII) with '='.
  19 ** hqx:
  20 **      File starts with introductory text, real data starts and ends
  21 **      with colons.
  22 **      Data consists of three similar parts: info, datafork, resourcefork.
  23 **      Each part is protected (at the end) with a 16-bit crc
  24 **      The binary data is run-length encoded, and then ascii-fied:
  25 **      binary: 01234567 abcdefgh ijklmnop
  26 **      ascii:  012345 67abcd efghij klmnop
  27 **      ASCII encoding is table-driven, see the code.
  28 **      Short binary data results in the runt ascii-byte being output with
  29 **      the bits in the right place.
  30 **
  31 ** While I was reading dozens of programs that encode or decode the formats
  32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
  33 **
  34 **      Programs that encode binary data in ASCII are written in
  35 **      such a style that they are as unreadable as possible. Devices used
  36 **      include unnecessary global variables, burying important tables
  37 **      in unrelated sourcefiles, putting functions in include files,
  38 **      using seemingly-descriptive variable names for different purposes,
  39 **      calls to empty subroutines and a host of others.
  40 **
  41 ** I have attempted to break with this tradition, but I guess that that
  42 ** does make the performance sub-optimal. Oh well, too bad...
  43 **
  44 ** Jack Jansen, CWI, July 1995.
  45 **
  46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
  47 ** quoted-printable encoding specifies that non printable characters (anything
  48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
  49 ** of the character.  It also specifies some other behavior to enable 8bit data
  50 ** in a mail message with little difficulty (maximum line sizes, protecting
  51 ** some cases of whitespace, etc).
  52 **
  53 ** Brandon Long, September 2001.
  54 */
  55
  56
  57 #include "Python.h"
  58
  59 static PyObject *Error;
  60 static PyObject *Incomplete;
  61
  62 /*
  63 ** hqx lookup table, ascii->binary.
  64 */
  65
  66 #define RUNCHAR 0x90
  67
  68 #define DONE 0x7F
  69 #define SKIP 0x7E
  70 #define FAIL 0x7D
  71
  72 static unsigned char table_a2b_hqx[256] = {
  73 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
  74 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  75 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
  76 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
  77 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
  78 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  79 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
  80 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  81 /*              !     "     #     $     %     &     '   */
  82 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
  83 /*        (     )     *     +     ,     -     .     /   */
  84 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
  85 /*        0     1     2     3     4     5     6     7   */
  86 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
  87 /*        8     9     :     ;     <     =     >     ?   */
  88 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
  89 /*        @     A     B     C     D     E     F     G   */
  90 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
  91 /*        H     I     J     K     L     M     N     O   */
  92 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
  93 /*        P     Q     R     S     T     U     V     W   */
  94 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
  95 /*        X     Y     Z     [     \     ]     ^     _   */
  96 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
  97 /*        `     a     b     c     d     e     f     g   */
  98 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
  99 /*        h     i     j     k     l     m     n     o   */
 100 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
 101 /*        p     q     r     s     t     u     v     w   */
 102 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
 103 /*        x     y     z     {     |     }     ~    ^?   */
 104 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 105 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 106         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 107         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 108         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 109         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 110         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 111         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 112         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 113         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 114         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 115         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 116         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 117         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 118         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 119         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 120         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 121 };
 122
 123 static unsigned char table_b2a_hqx[] =
 124 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
 125
 126 static char table_a2b_base64[] = {
 127         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 128         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 129         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 130         52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
 131         -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 132         15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 133         -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 134         41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 135 };
 136
 137 #define BASE64_PAD '='
 138
 139 /* Max binary chunk size; limited only by available memory */
 140 #define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
 141
 142 static unsigned char table_b2a_base64[] =
 143 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 144
 145
 146
 147 static unsigned short crctab_hqx[256] = {
 148         0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
 149         0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
 150         0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
 151         0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
 152         0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
 153         0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
 154         0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
 155         0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
 156         0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
 157         0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
 158         0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
 159         0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
 160         0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
 161         0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
 162         0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
 163         0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
 164         0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
 165         0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
 166         0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
 167         0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
 168         0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
 169         0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 170         0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
 171         0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
 172         0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
 173         0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
 174         0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
 175         0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
 176         0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
 177         0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
 178         0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
 179         0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
 180 };
 181
 182 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
 183
 184 static PyObject *
 185 binascii_a2b_uu(PyObject *self, PyObject *args)
 186 {
 187         unsigned char *ascii_data, *bin_data;
 188         int leftbits = 0;
 189         unsigned char this_ch;
 190         unsigned int leftchar = 0;
 191         PyObject *rv;
 192         int ascii_len, bin_len;
 193
 194         if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
 195                 return NULL;
 196
 197         /* First byte: binary data length (in bytes) */
 198         bin_len = (*ascii_data++ - ' ') & 077;
 199         ascii_len--;
 200
 201         /* Allocate the buffer */
 202         if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
 203                 return NULL;
 204         bin_data = (unsigned char *)PyString_AsString(rv);
 205
 206         for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
 207                 this_ch = *ascii_data;
 208                 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
 209                         /*
 210                         ** Whitespace. Assume some spaces got eaten at
 211                         ** end-of-line. (We check this later)
 212                         */
 213                         this_ch = 0;
 214                 } else {
 215                         /* Check the character for legality
 216                         ** The 64 in stead of the expected 63 is because
 217                         ** there are a few uuencodes out there that use
 218                         ** '`' as zero instead of space.
 219                         */
 220                         if ( this_ch < ' ' || this_ch > (' ' + 64)) {
 221                                 PyErr_SetString(Error, "Illegal char");
 222                                 Py_DECREF(rv);
 223                                 return NULL;
 224                         }
 225                         this_ch = (this_ch - ' ') & 077;
 226                 }
 227                 /*
 228                 ** Shift it in on the low end, and see if there's
 229                 ** a byte ready for output.
 230                 */
 231                 leftchar = (leftchar << 6) | (this_ch);
 232                 leftbits += 6;
 233                 if ( leftbits >= 8 ) {
 234                         leftbits -= 8;
 235                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 236                         leftchar &= ((1 << leftbits) - 1);
 237                         bin_len--;
 238                 }
 239         }
 240         /*
 241         ** Finally, check that if there's anything left on the line
 242         ** that it's whitespace only.
 243         */
 244         while( ascii_len-- > 0 ) {
 245                 this_ch = *ascii_data++;
 246                 /* Extra '`' may be written as padding in some cases */
 247                 if ( this_ch != ' ' && this_ch != ' '+64 &&
 248                      this_ch != '\n' && this_ch != '\r' ) {
 249                         PyErr_SetString(Error, "Trailing garbage");
 250                         Py_DECREF(rv);
 251                         return NULL;
 252                 }
 253         }
 254         return rv;
 255 }
 256
 257 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
 258
 259 static PyObject *
 260 binascii_b2a_uu(PyObject *self, PyObject *args)
 261 {
 262         unsigned char *ascii_data, *bin_data;
 263         int leftbits = 0;
 264         unsigned char this_ch;
 265         unsigned int leftchar = 0;
 266         PyObject *rv;
 267         int bin_len;
 268
 269         if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
 270                 return NULL;
 271         if ( bin_len > 45 ) {
 272                 /* The 45 is a limit that appears in all uuencode's */
 273                 PyErr_SetString(Error, "At most 45 bytes at once");
 274                 return NULL;
 275         }
 276
 277         /* We're lazy and allocate to much (fixed up later) */
 278         if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2)) == NULL )
 279                 return NULL;
 280         ascii_data = (unsigned char *)PyString_AsString(rv);
 281
 282         /* Store the length */
 283         *ascii_data++ = ' ' + (bin_len & 077);
 284
 285         for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
 286                 /* Shift the data (or padding) into our buffer */
 287                 if ( bin_len > 0 )      /* Data */
 288                         leftchar = (leftchar << 8) | *bin_data;
 289                 else                    /* Padding */
 290                         leftchar <<= 8;
 291                 leftbits += 8;
 292
 293                 /* See if there are 6-bit groups ready */
 294                 while ( leftbits >= 6 ) {
 295                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 296                         leftbits -= 6;
 297                         *ascii_data++ = this_ch + ' ';
 298                 }
 299         }
 300         *ascii_data++ = '\n';   /* Append a courtesy newline */
 301
 302         _PyString_Resize(&rv, (ascii_data -
 303                                (unsigned char *)PyString_AsString(rv)));
 304         return rv;
 305 }
 306
 307
 308 static int
 309 binascii_find_valid(unsigned char *s, int slen, int num)
 310 {
 311         /* Finds & returns the (num+1)th
 312         ** valid character for base64, or -1 if none.
 313         */
 314
 315         int ret = -1;
 316         unsigned char c, b64val;
 317
 318         while ((slen > 0) && (ret == -1)) {
 319                 c = *s;
 320                 b64val = table_a2b_base64[c & 0x7f];
 321                 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
 322                         if (num == 0)
 323                                 ret = *s;
 324                         num--;
 325                 }
 326
 327                 s++;
 328                 slen--;
 329         }
 330         return ret;
 331 }
 332
 333 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
 334
 335 static PyObject *
 336 binascii_a2b_base64(PyObject *self, PyObject *args)
 337 {
 338         unsigned char *ascii_data, *bin_data;
 339         int leftbits = 0;
 340         unsigned char this_ch;
 341         unsigned int leftchar = 0;
 342         PyObject *rv;
 343         int ascii_len, bin_len;
 344         int quad_pos = 0;
 345
 346         if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
 347                 return NULL;
 348
 349         bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
 350
 351         /* Allocate the buffer */
 352         if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
 353                 return NULL;
 354         bin_data = (unsigned char *)PyString_AsString(rv);
 355         bin_len = 0;
 356
 357         for( ; ascii_len > 0; ascii_len--, ascii_data++) {
 358                 this_ch = *ascii_data;
 359
 360                 if (this_ch > 0x7f ||
 361                     this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
 362                         continue;
 363
 364                 /* Check for pad sequences and ignore
 365                 ** the invalid ones.
 366                 */
 367                 if (this_ch == BASE64_PAD) {
 368                         if ( (quad_pos < 2) ||
 369                              ((quad_pos == 2) &&
 370                               (binascii_find_valid(ascii_data, ascii_len, 1)
 371                                != BASE64_PAD)) )
 372                         {
 373                                 continue;
 374                         }
 375                         else {
 376                                 /* A pad sequence means no more input.
 377                                 ** We've already interpreted the data
 378                                 ** from the quad at this point.
 379                                 */
 380                                 leftbits = 0;
 381                                 break;
 382                         }
 383                 }
 384
 385                 this_ch = table_a2b_base64[*ascii_data];
 386                 if ( this_ch == (unsigned char) -1 )
 387                         continue;
 388
 389                 /*
 390                 ** Shift it in on the low end, and see if there's
 391                 ** a byte ready for output.
 392                 */
 393                 quad_pos = (quad_pos + 1) & 0x03;
 394                 leftchar = (leftchar << 6) | (this_ch);
 395                 leftbits += 6;
 396
 397                 if ( leftbits >= 8 ) {
 398                         leftbits -= 8;
 399                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 400                         bin_len++;
 401                         leftchar &= ((1 << leftbits) - 1);
 402                 }
 403         }
 404
 405         if (leftbits != 0) {
 406                 PyErr_SetString(Error, "Incorrect padding");
 407                 Py_DECREF(rv);
 408                 return NULL;
 409         }
 410
 411         /* and set string size correctly */
 412         if (bin_len > 0)
 413                 _PyString_Resize(&rv, bin_len);
 414         return rv;
 415 }
 416
 417 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
 418
 419 static PyObject *
 420 binascii_b2a_base64(PyObject *self, PyObject *args)
 421 {
 422         unsigned char *ascii_data, *bin_data;
 423         int leftbits = 0;
 424         unsigned char this_ch;
 425         unsigned int leftchar = 0;
 426         PyObject *rv;
 427         int bin_len;
 428
 429         if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
 430                 return NULL;
 431         if ( bin_len > BASE64_MAXBIN ) {
 432                 PyErr_SetString(Error, "Too much data for base64 line");
 433                 return NULL;
 434         }
 435
 436         /* We're lazy and allocate too much (fixed up later).
 437            "+3" leaves room for up to two pad characters and a trailing
 438            newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
 439         if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
 440                 return NULL;
 441         ascii_data = (unsigned char *)PyString_AsString(rv);
 442
 443         for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
 444                 /* Shift the data into our buffer */
 445                 leftchar = (leftchar << 8) | *bin_data;
 446                 leftbits += 8;
 447
 448                 /* See if there are 6-bit groups ready */
 449                 while ( leftbits >= 6 ) {
 450                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 451                         leftbits -= 6;
 452                         *ascii_data++ = table_b2a_base64[this_ch];
 453                 }
 454         }
 455         if ( leftbits == 2 ) {
 456                 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
 457                 *ascii_data++ = BASE64_PAD;
 458                 *ascii_data++ = BASE64_PAD;
 459         } else if ( leftbits == 4 ) {
 460                 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
 461                 *ascii_data++ = BASE64_PAD;
 462         }
 463         *ascii_data++ = '\n';   /* Append a courtesy newline */
 464
 465         _PyString_Resize(&rv, (ascii_data -
 466                                (unsigned char *)PyString_AsString(rv)));
 467         return rv;
 468 }
 469
 470 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
 471
 472 static PyObject *
 473 binascii_a2b_hqx(PyObject *self, PyObject *args)
 474 {
 475         unsigned char *ascii_data, *bin_data;
 476         int leftbits = 0;
 477         unsigned char this_ch;
 478         unsigned int leftchar = 0;
 479         PyObject *rv;
 480         int len;
 481         int done = 0;
 482
 483         if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
 484                 return NULL;
 485
 486         /* Allocate a string that is too big (fixed later) */
 487         if ( (rv=PyString_FromStringAndSize(NULL, len)) == NULL )
 488                 return NULL;
 489         bin_data = (unsigned char *)PyString_AsString(rv);
 490
 491         for( ; len > 0 ; len--, ascii_data++ ) {
 492                 /* Get the byte and look it up */
 493                 this_ch = table_a2b_hqx[*ascii_data];
 494                 if ( this_ch == SKIP )
 495                         continue;
 496                 if ( this_ch == FAIL ) {
 497                         PyErr_SetString(Error, "Illegal char");
 498                         Py_DECREF(rv);
 499                         return NULL;
 500                 }
 501                 if ( this_ch == DONE ) {
 502                         /* The terminating colon */
 503                         done = 1;
 504                         break;
 505                 }
 506
 507                 /* Shift it into the buffer and see if any bytes are ready */
 508                 leftchar = (leftchar << 6) | (this_ch);
 509                 leftbits += 6;
 510                 if ( leftbits >= 8 ) {
 511                         leftbits -= 8;
 512                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 513                         leftchar &= ((1 << leftbits) - 1);
 514                 }
 515         }
 516
 517         if ( leftbits && !done ) {
 518                 PyErr_SetString(Incomplete,
 519                                 "String has incomplete number of bytes");
 520                 Py_DECREF(rv);
 521                 return NULL;
 522         }
 523         _PyString_Resize(
 524                 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
 525         if (rv) {
 526                 PyObject *rrv = Py_BuildValue("Oi", rv, done);
 527                 Py_DECREF(rv);
 528                 return rrv;
 529         }
 530
 531         return NULL;
 532 }
 533
 534 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
 535
 536 static PyObject *
 537 binascii_rlecode_hqx(PyObject *self, PyObject *args)
 538 {
 539         unsigned char *in_data, *out_data;
 540         PyObject *rv;
 541         unsigned char ch;
 542         int in, inend, len;
 543
 544         if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
 545                 return NULL;
 546
 547         /* Worst case: output is twice as big as input (fixed later) */
 548         if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
 549                 return NULL;
 550         out_data = (unsigned char *)PyString_AsString(rv);
 551
 552         for( in=0; in<len; in++) {
 553                 ch = in_data[in];
 554                 if ( ch == RUNCHAR ) {
 555                         /* RUNCHAR. Escape it. */
 556                         *out_data++ = RUNCHAR;
 557                         *out_data++ = 0;
 558                 } else {
 559                         /* Check how many following are the same */
 560                         for(inend=in+1;
 561                             inend<len && in_data[inend] == ch &&
 562                                     inend < in+255;
 563                             inend++) ;
 564                         if ( inend - in > 3 ) {
 565                                 /* More than 3 in a row. Output RLE. */
 566                                 *out_data++ = ch;
 567                                 *out_data++ = RUNCHAR;
 568                                 *out_data++ = inend-in;
 569                                 in = inend-1;
 570                         } else {
 571                                 /* Less than 3. Output the byte itself */
 572                                 *out_data++ = ch;
 573                         }
 574                 }
 575         }
 576         _PyString_Resize(&rv, (out_data -
 577                                (unsigned char *)PyString_AsString(rv)));
 578         return rv;
 579 }
 580
 581 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
 582
 583 static PyObject *
 584 binascii_b2a_hqx(PyObject *self, PyObject *args)
 585 {
 586         unsigned char *ascii_data, *bin_data;
 587         int leftbits = 0;
 588         unsigned char this_ch;
 589         unsigned int leftchar = 0;
 590         PyObject *rv;
 591         int len;
 592
 593         if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
 594                 return NULL;
 595
 596         /* Allocate a buffer that is at least large enough */
 597         if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
 598                 return NULL;
 599         ascii_data = (unsigned char *)PyString_AsString(rv);
 600
 601         for( ; len > 0 ; len--, bin_data++ ) {
 602                 /* Shift into our buffer, and output any 6bits ready */
 603                 leftchar = (leftchar << 8) | *bin_data;
 604                 leftbits += 8;
 605                 while ( leftbits >= 6 ) {
 606                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 607                         leftbits -= 6;
 608                         *ascii_data++ = table_b2a_hqx[this_ch];
 609                 }
 610         }
 611         /* Output a possible runt byte */
 612         if ( leftbits ) {
 613                 leftchar <<= (6-leftbits);
 614                 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
 615         }
 616         _PyString_Resize(&rv, (ascii_data -
 617                                (unsigned char *)PyString_AsString(rv)));
 618         return rv;
 619 }
 620
 621 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
 622
 623 static PyObject *
 624 binascii_rledecode_hqx(PyObject *self, PyObject *args)
 625 {
 626         unsigned char *in_data, *out_data;
 627         unsigned char in_byte, in_repeat;
 628         PyObject *rv;
 629         int in_len, out_len, out_len_left;
 630
 631         if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
 632                 return NULL;
 633
 634         /* Empty string is a special case */
 635         if ( in_len == 0 )
 636                 return Py_BuildValue("s", "");
 637
 638         /* Allocate a buffer of reasonable size. Resized when needed */
 639         out_len = in_len*2;
 640         if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
 641                 return NULL;
 642         out_len_left = out_len;
 643         out_data = (unsigned char *)PyString_AsString(rv);
 644
 645         /*
 646         ** We need two macros here to get/put bytes and handle
 647         ** end-of-buffer for input and output strings.
 648         */
 649 #define INBYTE(b) \
 650         do { \
 651                  if ( --in_len < 0 ) { \
 652                            PyErr_SetString(Incomplete, ""); \
 653                            Py_DECREF(rv); \
 654                            return NULL; \
 655                  } \
 656                  b = *in_data++; \
 657         } while(0)
 658
 659 #define OUTBYTE(b) \
 660         do { \
 661                  if ( --out_len_left < 0 ) { \
 662                           _PyString_Resize(&rv, 2*out_len); \
 663                           if ( rv == NULL ) return NULL; \
 664                           out_data = (unsigned char *)PyString_AsString(rv) \
 665                                                                  + out_len; \
 666                           out_len_left = out_len-1; \
 667                           out_len = out_len * 2; \
 668                  } \
 669                  *out_data++ = b; \
 670         } while(0)
 671
 672                 /*
 673                 ** Handle first byte separately (since we have to get angry
 674                 ** in case of an orphaned RLE code).
 675                 */
 676                 INBYTE(in_byte);
 677
 678         if (in_byte == RUNCHAR) {
 679                 INBYTE(in_repeat);
 680                 if (in_repeat != 0) {
 681                         /* Note Error, not Incomplete (which is at the end
 682                         ** of the string only). This is a programmer error.
 683                         */
 684                         PyErr_SetString(Error, "Orphaned RLE code at start");
 685                         Py_DECREF(rv);
 686                         return NULL;
 687                 }
 688                 OUTBYTE(RUNCHAR);
 689         } else {
 690                 OUTBYTE(in_byte);
 691         }
 692
 693         while( in_len > 0 ) {
 694                 INBYTE(in_byte);
 695
 696                 if (in_byte == RUNCHAR) {
 697                         INBYTE(in_repeat);
 698                         if ( in_repeat == 0 ) {
 699                                 /* Just an escaped RUNCHAR value */
 700                                 OUTBYTE(RUNCHAR);
 701                         } else {
 702                                 /* Pick up value and output a sequence of it */
 703                                 in_byte = out_data[-1];
 704                                 while ( --in_repeat > 0 )
 705                                         OUTBYTE(in_byte);
 706                         }
 707                 } else {
 708                         /* Normal byte */
 709                         OUTBYTE(in_byte);
 710                 }
 711         }
 712         _PyString_Resize(&rv, (out_data -
 713                                (unsigned char *)PyString_AsString(rv)));
 714         return rv;
 715 }
 716
 717 PyDoc_STRVAR(doc_crc_hqx,
 718 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
 719
 720 static PyObject *
 721 binascii_crc_hqx(PyObject *self, PyObject *args)
 722 {
 723         unsigned char *bin_data;
 724         unsigned int crc;
 725         int len;
 726
 727         if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
 728                 return NULL;
 729
 730         while(len--) {
 731                 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
 732         }
 733
 734         return Py_BuildValue("i", crc);
 735 }
 736
 737 PyDoc_STRVAR(doc_crc32,
 738 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
 739
 740 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
 741     Also known as: ISO 3307
 742 **********************************************************************|
 743 *                                                                    *|
 744 * Demonstration program to compute the 32-bit CRC used as the frame  *|
 745 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
 746 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
 747 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
 748 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
 749 * this polynomial is or will be included in CCITT V.41, which        *|
 750 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
 751 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
 752 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
 753 *                                                                    *|
 754 **********************************************************************|
 755
 756  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
 757  code or tables extracted from it, as desired without restriction.
 758
 759  First, the polynomial itself and its table of feedback terms.  The
 760  polynomial is
 761  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
 762  Note that we take it "backwards" and put the highest-order term in
 763  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
 764  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
 765  the MSB being 1.
 766
 767  Note that the usual hardware shift register implementation, which
 768  is what we're using (we're merely optimizing it by doing eight-bit
 769  chunks at a time) shifts bits into the lowest-order term.  In our
 770  implementation, that means shifting towards the right.  Why do we
 771  do it this way?  Because the calculated CRC must be transmitted in
 772  order from highest-order term to lowest-order term.  UARTs transmit
 773  characters in order from LSB to MSB.  By storing the CRC this way,
 774  we hand it to the UART in the order low-byte to high-byte; the UART
 775  sends each low-bit to hight-bit; and the result is transmission bit
 776  by bit from highest- to lowest-order term without requiring any bit
 777  shuffling on our part.  Reception works similarly.
 778
 779  The feedback terms table consists of 256, 32-bit entries.  Notes:
 780
 781   1. The table can be generated at runtime if desired; code to do so
 782      is shown later.  It might not be obvious, but the feedback
 783      terms simply represent the results of eight shift/xor opera-
 784      tions for all combinations of data and CRC register values.
 785
 786   2. The CRC accumulation logic is the same for all CRC polynomials,
 787      be they sixteen or thirty-two bits wide.  You simply choose the
 788      appropriate table.  Alternatively, because the table can be
 789      generated at runtime, you can start by generating the table for
 790      the polynomial in question and use exactly the same "updcrc",
 791      if your application needn't simultaneously handle two CRC
 792      polynomials.  (Note, however, that XMODEM is strange.)
 793
 794   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
 795      of course, 32-bit entries work OK if the high 16 bits are zero.
 796
 797   4. The values must be right-shifted by eight bits by the "updcrc"
 798      logic; the shift must be unsigned (bring in zeroes).  On some
 799      hardware you could probably optimize the shift in assembler by
 800      using byte-swap instructions.
 801 ********************************************************************/
 802
 803 static unsigned long crc_32_tab[256] = {
 804 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
 805 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
 806 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
 807 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
 808 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
 809 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
 810 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
 811 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
 812 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
 813 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
 814 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
 815 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
 816 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
 817 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
 818 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
 819 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
 820 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
 821 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
 822 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
 823 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
 824 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
 825 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
 826 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
 827 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
 828 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
 829 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
 830 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
 831 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
 832 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
 833 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
 834 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
 835 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
 836 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
 837 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
 838 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
 839 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
 840 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
 841 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
 842 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
 843 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
 844 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
 845 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
 846 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
 847 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
 848 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
 849 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
 850 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
 851 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
 852 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
 853 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
 854 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
 855 0x2d02ef8dUL
 856 };
 857
 858 static PyObject *
 859 binascii_crc32(PyObject *self, PyObject *args)
 860 { /* By Jim Ahlstrom; All rights transferred to CNRI */
 861         unsigned char *bin_data;
 862         unsigned long crc = 0UL;        /* initial value of CRC */
 863         int len;
 864         long result;
 865
 866         if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
 867                 return NULL;
 868
 869         crc = ~ crc;
 870 #if SIZEOF_LONG > 4
 871         /* only want the trailing 32 bits */
 872         crc &= 0xFFFFFFFFUL;
 873 #endif
 874         while (len--)
 875                 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
 876                 /* Note:  (crc >> 8) MUST zero fill on left */
 877
 878         result = (long)(crc ^ 0xFFFFFFFFUL);
 879 #if SIZEOF_LONG > 4
 880         /* Extend the sign bit.  This is one way to ensure the result is the
 881          * same across platforms.  The other way would be to return an
 882          * unbounded unsigned long, but the evidence suggests that lots of
 883          * code outside this treats the result as if it were a signed 4-byte
 884          * integer.
 885          */
 886         result |= -(result & (1L << 31));
 887 #endif
 888         return PyInt_FromLong(result);
 889 }
 890
 891
 892 static PyObject *
 893 binascii_hexlify(PyObject *self, PyObject *args)
 894 {
 895         char* argbuf;
 896         int arglen;
 897         PyObject *retval;
 898         char* retbuf;
 899         int i, j;
 900
 901         if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen))
 902                 return NULL;
 903
 904         retval = PyString_FromStringAndSize(NULL, arglen*2);
 905         if (!retval)
 906                 return NULL;
 907         retbuf = PyString_AsString(retval);
 908         if (!retbuf)
 909                 goto finally;
 910
 911         /* make hex version of string, taken from shamodule.c */
 912         for (i=j=0; i < arglen; i++) {
 913                 char c;
 914                 c = (argbuf[i] >> 4) & 0xf;
 915                 c = (c>9) ? c+'a'-10 : c + '0';
 916                 retbuf[j++] = c;
 917                 c = argbuf[i] & 0xf;
 918                 c = (c>9) ? c+'a'-10 : c + '0';
 919                 retbuf[j++] = c;
 920         }
 921         return retval;
 922
 923   finally:
 924         Py_DECREF(retval);
 925         return NULL;
 926 }
 927
 928 PyDoc_STRVAR(doc_hexlify,
 929 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
 930 \n\
 931 This function is also available as \"hexlify()\".");
 932
 933
 934 static int
 935 to_int(int c)
 936 {
 937         if (isdigit(c))
 938                 return c - '0';
 939         else {
 940                 if (isupper(c))
 941                         c = tolower(c);
 942                 if (c >= 'a' && c <= 'f')
 943                         return c - 'a' + 10;
 944         }
 945         return -1;
 946 }
 947
 948
 949 static PyObject *
 950 binascii_unhexlify(PyObject *self, PyObject *args)
 951 {
 952         char* argbuf;
 953         int arglen;
 954         PyObject *retval;
 955         char* retbuf;
 956         int i, j;
 957
 958         if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
 959                 return NULL;
 960
 961         /* XXX What should we do about strings with an odd length?  Should
 962          * we add an implicit leading zero, or a trailing zero?  For now,
 963          * raise an exception.
 964          */
 965         if (arglen % 2) {
 966                 PyErr_SetString(PyExc_TypeError, "Odd-length string");
 967                 return NULL;
 968         }
 969
 970         retval = PyString_FromStringAndSize(NULL, (arglen/2));
 971         if (!retval)
 972                 return NULL;
 973         retbuf = PyString_AsString(retval);
 974         if (!retbuf)
 975                 goto finally;
 976
 977         for (i=j=0; i < arglen; i += 2) {
 978                 int top = to_int(Py_CHARMASK(argbuf[i]));
 979                 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
 980                 if (top == -1 || bot == -1) {
 981                         PyErr_SetString(PyExc_TypeError,
 982                                         "Non-hexadecimal digit found");
 983                         goto finally;
 984                 }
 985                 retbuf[j++] = (top << 4) + bot;
 986         }
 987         return retval;
 988
 989   finally:
 990         Py_DECREF(retval);
 991         return NULL;
 992 }
 993
 994 PyDoc_STRVAR(doc_unhexlify,
 995 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
 996 \n\
 997 hexstr must contain an even number of hex digits (upper or lower case).\n\
 998 This function is also available as \"unhexlify()\"");
 999
1000 static int table_hex[128] = {
1001   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1002   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1003   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1004    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1005   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1006   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1007   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1008   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1009 };
1010
1011 #define hexval(c) table_hex[(unsigned int)(c)]
1012
1013 #define MAXLINESIZE 76
1014
1015 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1016
1017 static PyObject*
1018 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1019 {
1020         unsigned int in, out;
1021         char ch;
1022         unsigned char *data, *odata;
1023         unsigned int datalen = 0;
1024         PyObject *rv;
1025         static char *kwlist[] = {"data", "header", NULL};
1026         int header = 0;
1027
1028         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1029               &datalen, &header))
1030                 return NULL;
1031
1032         /* We allocate the output same size as input, this is overkill */
1033         odata = (unsigned char *) calloc(1, datalen);
1034
1035         if (odata == NULL) {
1036                 PyErr_NoMemory();
1037                 return NULL;
1038         }
1039
1040         in = out = 0;
1041         while (in < datalen) {
1042                 if (data[in] == '=') {
1043                         in++;
1044                         if (in >= datalen) break;
1045                         /* Soft line breaks */
1046                         if ((data[in] == '\n') || (data[in] == '\r') ||
1047                             (data[in] == ' ') || (data[in] == '\t')) {
1048                                 if (data[in] != '\n') {
1049                                         while (in < datalen && data[in] != '\n') in++;
1050                                 }
1051                                 if (in < datalen) in++;
1052                         }
1053                         else if (data[in] == '=') {
1054                                 /* broken case from broken python qp */
1055                                 odata[out++] = '=';
1056                                 in++;
1057                         }
1058                         else if (((data[in] >= 'A' && data[in] <= 'F') ||
1059                                   (data[in] >= 'a' && data[in] <= 'f') ||
1060                                   (data[in] >= '0' && data[in] <= '9')) &&
1061                                  ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1062                                   (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1063                                   (data[in+1] >= '0' && data[in+1] <= '9'))) {
1064                                 /* hexval */
1065                                 ch = hexval(data[in]) << 4;
1066                                 in++;
1067                                 ch |= hexval(data[in]);
1068                                 in++;
1069                                 odata[out++] = ch;
1070                         }
1071                         else {
1072                           odata[out++] = '=';
1073                         }
1074                 }
1075                 else if (header && data[in] == '_') {
1076                         odata[out++] = ' ';
1077                         in++;
1078                 }
1079                 else {
1080                         odata[out] = data[in];
1081                         in++;
1082                         out++;
1083                 }
1084         }
1085         if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1086                 free (odata);
1087                 return NULL;
1088         }
1089         free (odata);
1090         return rv;
1091 }
1092
1093 static int
1094 to_hex (unsigned char ch, unsigned char *s)
1095 {
1096         unsigned int uvalue = ch;
1097
1098         s[1] = "0123456789ABCDEF"[uvalue % 16];
1099         uvalue = (uvalue / 16);
1100         s[0] = "0123456789ABCDEF"[uvalue % 16];
1101         return 0;
1102 }
1103
1104 PyDoc_STRVAR(doc_b2a_qp,
1105 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1106  Encode a string using quoted-printable encoding. \n\
1107 \n\
1108 On encoding, when istext is set, newlines are not encoded, and white \n\
1109 space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
1110 both encoded.  When quotetabs is set, space and tabs are encoded.");
1111
1112 /* XXX: This is ridiculously complicated to be backward compatible
1113  * (mostly) with the quopri module.  It doesn't re-create the quopri
1114  * module bug where text ending in CRLF has the CR encoded */
1115 static PyObject*
1116 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1117 {
1118         unsigned int in, out;
1119         unsigned char *data, *odata;
1120         unsigned int datalen = 0, odatalen = 0;
1121         PyObject *rv;
1122         unsigned int linelen = 0;
1123         static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
1124         int istext = 1;
1125         int quotetabs = 0;
1126         int header = 0;
1127         unsigned char ch;
1128         int crlf = 0;
1129         unsigned char *p;
1130
1131         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1132               &datalen, &quotetabs, &istext, &header))
1133                 return NULL;
1134
1135         /* See if this string is using CRLF line ends */
1136         /* XXX: this function has the side effect of converting all of
1137          * the end of lines to be the same depending on this detection
1138          * here */
1139         p = (unsigned char *) strchr((char *)data, '\n');
1140         if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1141                 crlf = 1;
1142
1143         /* First, scan to see how many characters need to be encoded */
1144         in = 0;
1145         while (in < datalen) {
1146                 if ((data[in] > 126) ||
1147                     (data[in] == '=') ||
1148                     (header && data[in] == '_') ||
1149                     ((data[in] == '.') && (linelen == 1)) ||
1150                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1151                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1152                     ((data[in] < 33) &&
1153                      (data[in] != '\r') && (data[in] != '\n') &&
1154                      (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1155                 {
1156                         if ((linelen + 3) >= MAXLINESIZE) {
1157                                 linelen = 0;
1158                                 if (crlf)
1159                                         odatalen += 3;
1160                                 else
1161                                         odatalen += 2;
1162                         }
1163                         linelen += 3;
1164                         odatalen += 3;
1165                         in++;
1166                 }
1167                 else {
1168                         if (istext &&
1169                             ((data[in] == '\n') ||
1170                              ((in+1 < datalen) && (data[in] == '\r') &&
1171                              (data[in+1] == '\n'))))
1172                         {
1173                                 linelen = 0;
1174                                 /* Protect against whitespace on end of line */
1175                                 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1176                                         odatalen += 2;
1177                                 if (crlf)
1178                                         odatalen += 2;
1179                                 else
1180                                         odatalen += 1;
1181                                 if (data[in] == '\r')
1182                                         in += 2;
1183                                 else
1184                                         in++;
1185                         }
1186                         else {
1187                                 if ((in + 1 != datalen) &&
1188                                     (data[in+1] != '\n') &&
1189                                     (linelen + 1) >= MAXLINESIZE) {
1190                                         linelen = 0;
1191                                         if (crlf)
1192                                                 odatalen += 3;
1193                                         else
1194                                                 odatalen += 2;
1195                                 }
1196                                 linelen++;
1197                                 odatalen++;
1198                                 in++;
1199                         }
1200                 }
1201         }
1202
1203         odata = (unsigned char *) calloc(1, odatalen);
1204
1205         if (odata == NULL) {
1206                 PyErr_NoMemory();
1207                 return NULL;
1208         }
1209
1210         in = out = linelen = 0;
1211         while (in < datalen) {
1212                 if ((data[in] > 126) ||
1213                     (data[in] == '=') ||
1214                     (header && data[in] == '_') ||
1215                     ((data[in] == '.') && (linelen == 1)) ||
1216                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1217                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1218                     ((data[in] < 33) &&
1219                      (data[in] != '\r') && (data[in] != '\n') &&
1220                      (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1221                 {
1222                         if ((linelen + 3 )>= MAXLINESIZE) {
1223                                 odata[out++] = '=';
1224                                 if (crlf) odata[out++] = '\r';
1225                                 odata[out++] = '\n';
1226                                 linelen = 0;
1227                         }
1228                         odata[out++] = '=';
1229                         to_hex(data[in], &odata[out]);
1230                         out += 2;
1231                         in++;
1232                         linelen += 3;
1233                 }
1234                 else {
1235                         if (istext &&
1236                             ((data[in] == '\n') ||
1237                              ((in+1 < datalen) && (data[in] == '\r') &&
1238                              (data[in+1] == '\n'))))
1239                         {
1240                                 linelen = 0;
1241                                 /* Protect against whitespace on end of line */
1242                                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1243                                         ch = odata[out-1];
1244                                         odata[out-1] = '=';
1245                                         to_hex(ch, &odata[out]);
1246                                         out += 2;
1247                                 }
1248
1249                                 if (crlf) odata[out++] = '\r';
1250                                 odata[out++] = '\n';
1251                                 if (data[in] == '\r')
1252                                         in += 2;
1253                                 else
1254                                         in++;
1255                         }
1256                         else {
1257                                 if ((in + 1 != datalen) &&
1258                                     (data[in+1] != '\n') &&
1259                                     (linelen + 1) >= MAXLINESIZE) {
1260                                         odata[out++] = '=';
1261                                         if (crlf) odata[out++] = '\r';
1262                                         odata[out++] = '\n';
1263                                         linelen = 0;
1264                                 }
1265                                 linelen++;
1266                                 if (header && data[in] == ' ') {
1267                                         odata[out++] = '_';
1268                                         in++;
1269                                 }
1270                                 else {
1271                                         odata[out++] = data[in++];
1272                                 }
1273                         }
1274                 }
1275         }
1276         if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1277                 free (odata);
1278                 return NULL;
1279         }
1280         free (odata);
1281         return rv;
1282 }
1283
1284 /* List of functions defined in the module */
1285
1286 static struct PyMethodDef binascii_module_methods[] = {
1287         {"a2b_uu",     binascii_a2b_uu,     METH_VARARGS, doc_a2b_uu},
1288         {"b2a_uu",     binascii_b2a_uu,     METH_VARARGS, doc_b2a_uu},
1289         {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1290         {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1291         {"a2b_hqx",    binascii_a2b_hqx,    METH_VARARGS, doc_a2b_hqx},
1292         {"b2a_hqx",    binascii_b2a_hqx,    METH_VARARGS, doc_b2a_hqx},
1293         {"b2a_hex",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1294         {"a2b_hex",    binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1295         {"hexlify",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1296         {"unhexlify",  binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1297         {"rlecode_hqx",   binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1298         {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1299          doc_rledecode_hqx},
1300         {"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
1301         {"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
1302         {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1303           doc_a2b_qp},
1304         {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1305           doc_b2a_qp},
1306         {NULL, NULL}                         /* sentinel */
1307 };
1308
1309
1310 /* Initialization function for the module (*must* be called initbinascii) */
1311 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1312
1313 PyMODINIT_FUNC
1314 initbinascii(void)
1315 {
1316         PyObject *m, *d, *x;
1317
1318         /* Create the module and add the functions */
1319         m = Py_InitModule("binascii", binascii_module_methods);
1320
1321         d = PyModule_GetDict(m);
1322         x = PyString_FromString(doc_binascii);
1323         PyDict_SetItemString(d, "__doc__", x);
1324         Py_XDECREF(x);
1325
1326         Error = PyErr_NewException("binascii.Error", NULL, NULL);
1327         PyDict_SetItemString(d, "Error", Error);
1328         Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1329         PyDict_SetItemString(d, "Incomplete", Incomplete);
1330 }