Modules/binascii.c

   1 /*
   2 ** Routines to represent binary data in ASCII and vice-versa
   3 **
   4 ** This module currently supports the following encodings:
   5 ** uuencode:
   6 **      each line encodes 45 bytes (except possibly the last)
   7 **      First char encodes (binary) length, rest data
   8 **      each char encodes 6 bits, as follows:
   9 **      binary: 01234567 abcdefgh ijklmnop
  10 **      ascii:  012345 67abcd efghij klmnop
  11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
  12 **      short binary data is zero-extended (so the bits are always in the
  13 **      right place), this does *not* reflect in the length.
  14 ** base64:
  15 **      Line breaks are insignificant, but lines are at most 76 chars
  16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
  17 **      is done via a table.
  18 **      Short binary data is filled (in ASCII) with '='.
  19 ** hqx:
  20 **      File starts with introductory text, real data starts and ends
  21 **      with colons.
  22 **      Data consists of three similar parts: info, datafork, resourcefork.
  23 **      Each part is protected (at the end) with a 16-bit crc
  24 **      The binary data is run-length encoded, and then ascii-fied:
  25 **      binary: 01234567 abcdefgh ijklmnop
  26 **      ascii:  012345 67abcd efghij klmnop
  27 **      ASCII encoding is table-driven, see the code.
  28 **      Short binary data results in the runt ascii-byte being output with
  29 **      the bits in the right place.
  30 **
  31 ** While I was reading dozens of programs that encode or decode the formats
  32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
  33 **
  34 **      Programs that encode binary data in ASCII are written in
  35 **      such a style that they are as unreadable as possible. Devices used
  36 **      include unnecessary global variables, burying important tables
  37 **      in unrelated sourcefiles, putting functions in include files,
  38 **      using seemingly-descriptive variable names for different purposes,
  39 **      calls to empty subroutines and a host of others.
  40 **
  41 ** I have attempted to break with this tradition, but I guess that that
  42 ** does make the performance sub-optimal. Oh well, too bad...
  43 **
  44 ** Jack Jansen, CWI, July 1995.
  45 **
  46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
  47 ** quoted-printable encoding specifies that non printable characters (anything
  48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
  49 ** of the character.  It also specifies some other behavior to enable 8bit data
  50 ** in a mail message with little difficulty (maximum line sizes, protecting
  51 ** some cases of whitespace, etc).
  52 **
  53 ** Brandon Long, September 2001.
  54 */
  55
  56 #define PY_SSIZE_T_CLEAN
  57
  58 #include "Python.h"
  59 #ifdef USE_ZLIB_CRC32
  60 #include "zlib.h"
  61 #endif
  62
  63 static PyObject *Error;
  64 static PyObject *Incomplete;
  65
  66 /*
  67 ** hqx lookup table, ascii->binary.
  68 */
  69
  70 #define RUNCHAR 0x90
  71
  72 #define DONE 0x7F
  73 #define SKIP 0x7E
  74 #define FAIL 0x7D
  75
  76 static unsigned char table_a2b_hqx[256] = {
  77 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
  78 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  79 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
  80 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
  81 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
  82 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  83 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
  84 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  85 /*              !     "     #     $     %     &     '   */
  86 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
  87 /*        (     )     *     +     ,     -     .     /   */
  88 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
  89 /*        0     1     2     3     4     5     6     7   */
  90 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
  91 /*        8     9     :     ;     <     =     >     ?   */
  92 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
  93 /*        @     A     B     C     D     E     F     G   */
  94 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
  95 /*        H     I     J     K     L     M     N     O   */
  96 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
  97 /*        P     Q     R     S     T     U     V     W   */
  98 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
  99 /*        X     Y     Z     [     \     ]     ^     _   */
 100 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
 101 /*        `     a     b     c     d     e     f     g   */
 102 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
 103 /*        h     i     j     k     l     m     n     o   */
 104 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
 105 /*        p     q     r     s     t     u     v     w   */
 106 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
 107 /*        x     y     z     {     |     }     ~    ^?   */
 108 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 109 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 110     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 111     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 112     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 113     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 114     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 115     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 116     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 117     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 118     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 119     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 120     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 121     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 122     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 123     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 124     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 125 };
 126
 127 static unsigned char table_b2a_hqx[] =
 128 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
 129
 130 static char table_a2b_base64[] = {
 131     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 132     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 133     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 134     52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
 135     -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 136     15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 137     -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 138     41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 139 };
 140
 141 #define BASE64_PAD '='
 142
 143 /* Max binary chunk size; limited only by available memory */
 144 #define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
 145
 146 static unsigned char table_b2a_base64[] =
 147 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 148
 149
 150
 151 static unsigned short crctab_hqx[256] = {
 152     0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
 153     0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
 154     0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
 155     0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
 156     0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
 157     0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
 158     0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
 159     0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
 160     0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
 161     0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
 162     0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
 163     0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
 164     0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
 165     0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
 166     0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
 167     0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
 168     0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
 169     0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
 170     0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
 171     0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
 172     0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
 173     0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 174     0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
 175     0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
 176     0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
 177     0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
 178     0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
 179     0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
 180     0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
 181     0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
 182     0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
 183     0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
 184 };
 185
 186 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
 187
 188 static PyObject *
 189 binascii_a2b_uu(PyObject *self, PyObject *args)
 190 {
 191     Py_buffer pascii;
 192     unsigned char *ascii_data, *bin_data;
 193     int leftbits = 0;
 194     unsigned char this_ch;
 195     unsigned int leftchar = 0;
 196     PyObject *rv;
 197     Py_ssize_t ascii_len, bin_len;
 198
 199     if ( !PyArg_ParseTuple(args, "s*:a2b_uu", &pascii) )
 200         return NULL;
 201     ascii_data = pascii.buf;
 202     ascii_len = pascii.len;
 203
 204     assert(ascii_len >= 0);
 205
 206     /* First byte: binary data length (in bytes) */
 207     bin_len = (*ascii_data++ - ' ') & 077;
 208     ascii_len--;
 209
 210     /* Allocate the buffer */
 211     if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
 212         PyBuffer_Release(&pascii);
 213         return NULL;
 214     }
 215     bin_data = (unsigned char *)PyString_AS_STRING(rv);
 216
 217     for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
 218         /* XXX is it really best to add NULs if there's no more data */
 219         this_ch = (ascii_len > 0) ? *ascii_data : 0;
 220         if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
 221             /*
 222             ** Whitespace. Assume some spaces got eaten at
 223             ** end-of-line. (We check this later)
 224             */
 225             this_ch = 0;
 226         } else {
 227             /* Check the character for legality
 228             ** The 64 in stead of the expected 63 is because
 229             ** there are a few uuencodes out there that use
 230             ** '`' as zero instead of space.
 231             */
 232             if ( this_ch < ' ' || this_ch > (' ' + 64)) {
 233                 PyErr_SetString(Error, "Illegal char");
 234                 PyBuffer_Release(&pascii);
 235                 Py_DECREF(rv);
 236                 return NULL;
 237             }
 238             this_ch = (this_ch - ' ') & 077;
 239         }
 240         /*
 241         ** Shift it in on the low end, and see if there's
 242         ** a byte ready for output.
 243         */
 244         leftchar = (leftchar << 6) | (this_ch);
 245         leftbits += 6;
 246         if ( leftbits >= 8 ) {
 247             leftbits -= 8;
 248             *bin_data++ = (leftchar >> leftbits) & 0xff;
 249             leftchar &= ((1 << leftbits) - 1);
 250             bin_len--;
 251         }
 252     }
 253     /*
 254     ** Finally, check that if there's anything left on the line
 255     ** that it's whitespace only.
 256     */
 257     while( ascii_len-- > 0 ) {
 258         this_ch = *ascii_data++;
 259         /* Extra '`' may be written as padding in some cases */
 260         if ( this_ch != ' ' && this_ch != ' '+64 &&
 261              this_ch != '\n' && this_ch != '\r' ) {
 262             PyErr_SetString(Error, "Trailing garbage");
 263             PyBuffer_Release(&pascii);
 264             Py_DECREF(rv);
 265             return NULL;
 266         }
 267     }
 268     PyBuffer_Release(&pascii);
 269     return rv;
 270 }
 271
 272 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
 273
 274 static PyObject *
 275 binascii_b2a_uu(PyObject *self, PyObject *args)
 276 {
 277     Py_buffer pbin;
 278     unsigned char *ascii_data, *bin_data;
 279     int leftbits = 0;
 280     unsigned char this_ch;
 281     unsigned int leftchar = 0;
 282     PyObject *rv;
 283     Py_ssize_t bin_len;
 284
 285     if ( !PyArg_ParseTuple(args, "s*:b2a_uu", &pbin) )
 286         return NULL;
 287     bin_data = pbin.buf;
 288     bin_len = pbin.len;
 289     if ( bin_len > 45 ) {
 290         /* The 45 is a limit that appears in all uuencode's */
 291         PyErr_SetString(Error, "At most 45 bytes at once");
 292         PyBuffer_Release(&pbin);
 293         return NULL;
 294     }
 295
 296     /* We're lazy and allocate to much (fixed up later) */
 297     if ( (rv=PyString_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) {
 298         PyBuffer_Release(&pbin);
 299         return NULL;
 300     }
 301     ascii_data = (unsigned char *)PyString_AS_STRING(rv);
 302
 303     /* Store the length */
 304     *ascii_data++ = ' ' + (bin_len & 077);
 305
 306     for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
 307         /* Shift the data (or padding) into our buffer */
 308         if ( bin_len > 0 )              /* Data */
 309             leftchar = (leftchar << 8) | *bin_data;
 310         else                            /* Padding */
 311             leftchar <<= 8;
 312         leftbits += 8;
 313
 314         /* See if there are 6-bit groups ready */
 315         while ( leftbits >= 6 ) {
 316             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 317             leftbits -= 6;
 318             *ascii_data++ = this_ch + ' ';
 319         }
 320     }
 321     *ascii_data++ = '\n';       /* Append a courtesy newline */
 322
 323     if (_PyString_Resize(&rv,
 324                        (ascii_data -
 325                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
 326         Py_DECREF(rv);
 327         rv = NULL;
 328     }
 329     PyBuffer_Release(&pbin);
 330     return rv;
 331 }
 332
 333
 334 static int
 335 binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
 336 {
 337     /* Finds & returns the (num+1)th
 338     ** valid character for base64, or -1 if none.
 339     */
 340
 341     int ret = -1;
 342     unsigned char c, b64val;
 343
 344     while ((slen > 0) && (ret == -1)) {
 345         c = *s;
 346         b64val = table_a2b_base64[c & 0x7f];
 347         if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
 348             if (num == 0)
 349                 ret = *s;
 350             num--;
 351         }
 352
 353         s++;
 354         slen--;
 355     }
 356     return ret;
 357 }
 358
 359 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
 360
 361 static PyObject *
 362 binascii_a2b_base64(PyObject *self, PyObject *args)
 363 {
 364     Py_buffer pascii;
 365     unsigned char *ascii_data, *bin_data;
 366     int leftbits = 0;
 367     unsigned char this_ch;
 368     unsigned int leftchar = 0;
 369     PyObject *rv;
 370     Py_ssize_t ascii_len, bin_len;
 371     int quad_pos = 0;
 372
 373     if ( !PyArg_ParseTuple(args, "s*:a2b_base64", &pascii) )
 374         return NULL;
 375     ascii_data = pascii.buf;
 376     ascii_len = pascii.len;
 377
 378     assert(ascii_len >= 0);
 379
 380     if (ascii_len > PY_SSIZE_T_MAX - 3) {
 381         PyBuffer_Release(&pascii);
 382         return PyErr_NoMemory();
 383     }
 384
 385     bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
 386
 387     /* Allocate the buffer */
 388     if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
 389         PyBuffer_Release(&pascii);
 390         return NULL;
 391     }
 392     bin_data = (unsigned char *)PyString_AS_STRING(rv);
 393     bin_len = 0;
 394
 395     for( ; ascii_len > 0; ascii_len--, ascii_data++) {
 396         this_ch = *ascii_data;
 397
 398         if (this_ch > 0x7f ||
 399             this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
 400             continue;
 401
 402         /* Check for pad sequences and ignore
 403         ** the invalid ones.
 404         */
 405         if (this_ch == BASE64_PAD) {
 406             if ( (quad_pos < 2) ||
 407                  ((quad_pos == 2) &&
 408                   (binascii_find_valid(ascii_data, ascii_len, 1)
 409                    != BASE64_PAD)) )
 410             {
 411                 continue;
 412             }
 413             else {
 414                 /* A pad sequence means no more input.
 415                 ** We've already interpreted the data
 416                 ** from the quad at this point.
 417                 */
 418                 leftbits = 0;
 419                 break;
 420             }
 421         }
 422
 423         this_ch = table_a2b_base64[*ascii_data];
 424         if ( this_ch == (unsigned char) -1 )
 425             continue;
 426
 427         /*
 428         ** Shift it in on the low end, and see if there's
 429         ** a byte ready for output.
 430         */
 431         quad_pos = (quad_pos + 1) & 0x03;
 432         leftchar = (leftchar << 6) | (this_ch);
 433         leftbits += 6;
 434
 435         if ( leftbits >= 8 ) {
 436             leftbits -= 8;
 437             *bin_data++ = (leftchar >> leftbits) & 0xff;
 438             bin_len++;
 439             leftchar &= ((1 << leftbits) - 1);
 440         }
 441     }
 442
 443     if (leftbits != 0) {
 444         PyBuffer_Release(&pascii);
 445         PyErr_SetString(Error, "Incorrect padding");
 446         Py_DECREF(rv);
 447         return NULL;
 448     }
 449
 450     /* And set string size correctly. If the result string is empty
 451     ** (because the input was all invalid) return the shared empty
 452     ** string instead; _PyString_Resize() won't do this for us.
 453     */
 454     if (bin_len > 0) {
 455         if (_PyString_Resize(&rv, bin_len) < 0) {
 456             Py_DECREF(rv);
 457             rv = NULL;
 458         }
 459     }
 460     else {
 461         Py_DECREF(rv);
 462         rv = PyString_FromStringAndSize("", 0);
 463     }
 464     PyBuffer_Release(&pascii);
 465     return rv;
 466 }
 467
 468 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
 469
 470 static PyObject *
 471 binascii_b2a_base64(PyObject *self, PyObject *args)
 472 {
 473     Py_buffer pbuf;
 474     unsigned char *ascii_data, *bin_data;
 475     int leftbits = 0;
 476     unsigned char this_ch;
 477     unsigned int leftchar = 0;
 478     PyObject *rv;
 479     Py_ssize_t bin_len;
 480
 481     if ( !PyArg_ParseTuple(args, "s*:b2a_base64", &pbuf) )
 482         return NULL;
 483     bin_data = pbuf.buf;
 484     bin_len = pbuf.len;
 485
 486     assert(bin_len >= 0);
 487
 488     if ( bin_len > BASE64_MAXBIN ) {
 489         PyErr_SetString(Error, "Too much data for base64 line");
 490         PyBuffer_Release(&pbuf);
 491         return NULL;
 492     }
 493
 494     /* We're lazy and allocate too much (fixed up later).
 495        "+3" leaves room for up to two pad characters and a trailing
 496        newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
 497     if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
 498         PyBuffer_Release(&pbuf);
 499         return NULL;
 500     }
 501     ascii_data = (unsigned char *)PyString_AS_STRING(rv);
 502
 503     for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
 504         /* Shift the data into our buffer */
 505         leftchar = (leftchar << 8) | *bin_data;
 506         leftbits += 8;
 507
 508         /* See if there are 6-bit groups ready */
 509         while ( leftbits >= 6 ) {
 510             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 511             leftbits -= 6;
 512             *ascii_data++ = table_b2a_base64[this_ch];
 513         }
 514     }
 515     if ( leftbits == 2 ) {
 516         *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
 517         *ascii_data++ = BASE64_PAD;
 518         *ascii_data++ = BASE64_PAD;
 519     } else if ( leftbits == 4 ) {
 520         *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
 521         *ascii_data++ = BASE64_PAD;
 522     }
 523     *ascii_data++ = '\n';       /* Append a courtesy newline */
 524
 525     if (_PyString_Resize(&rv,
 526                        (ascii_data -
 527                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
 528         Py_DECREF(rv);
 529         rv = NULL;
 530     }
 531     PyBuffer_Release(&pbuf);
 532     return rv;
 533 }
 534
 535 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
 536
 537 static PyObject *
 538 binascii_a2b_hqx(PyObject *self, PyObject *args)
 539 {
 540     Py_buffer pascii;
 541     unsigned char *ascii_data, *bin_data;
 542     int leftbits = 0;
 543     unsigned char this_ch;
 544     unsigned int leftchar = 0;
 545     PyObject *rv;
 546     Py_ssize_t len;
 547     int done = 0;
 548
 549     if ( !PyArg_ParseTuple(args, "s*:a2b_hqx", &pascii) )
 550         return NULL;
 551     ascii_data = pascii.buf;
 552     len = pascii.len;
 553
 554     assert(len >= 0);
 555
 556     if (len > PY_SSIZE_T_MAX - 2) {
 557         PyBuffer_Release(&pascii);
 558         return PyErr_NoMemory();
 559     }
 560
 561     /* Allocate a string that is too big (fixed later)
 562        Add two to the initial length to prevent interning which
 563        would preclude subsequent resizing.  */
 564     if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL ) {
 565         PyBuffer_Release(&pascii);
 566         return NULL;
 567     }
 568     bin_data = (unsigned char *)PyString_AS_STRING(rv);
 569
 570     for( ; len > 0 ; len--, ascii_data++ ) {
 571         /* Get the byte and look it up */
 572         this_ch = table_a2b_hqx[*ascii_data];
 573         if ( this_ch == SKIP )
 574             continue;
 575         if ( this_ch == FAIL ) {
 576             PyErr_SetString(Error, "Illegal char");
 577             PyBuffer_Release(&pascii);
 578             Py_DECREF(rv);
 579             return NULL;
 580         }
 581         if ( this_ch == DONE ) {
 582             /* The terminating colon */
 583             done = 1;
 584             break;
 585         }
 586
 587         /* Shift it into the buffer and see if any bytes are ready */
 588         leftchar = (leftchar << 6) | (this_ch);
 589         leftbits += 6;
 590         if ( leftbits >= 8 ) {
 591             leftbits -= 8;
 592             *bin_data++ = (leftchar >> leftbits) & 0xff;
 593             leftchar &= ((1 << leftbits) - 1);
 594         }
 595     }
 596
 597     if ( leftbits && !done ) {
 598         PyErr_SetString(Incomplete,
 599                         "String has incomplete number of bytes");
 600         PyBuffer_Release(&pascii);
 601         Py_DECREF(rv);
 602         return NULL;
 603     }
 604     if (_PyString_Resize(&rv,
 605                        (bin_data -
 606                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
 607         Py_DECREF(rv);
 608         rv = NULL;
 609     }
 610     if (rv) {
 611         PyObject *rrv = Py_BuildValue("Oi", rv, done);
 612         PyBuffer_Release(&pascii);
 613         Py_DECREF(rv);
 614         return rrv;
 615     }
 616
 617     PyBuffer_Release(&pascii);
 618     return NULL;
 619 }
 620
 621 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
 622
 623 static PyObject *
 624 binascii_rlecode_hqx(PyObject *self, PyObject *args)
 625 {
 626     Py_buffer pbuf;
 627     unsigned char *in_data, *out_data;
 628     PyObject *rv;
 629     unsigned char ch;
 630     Py_ssize_t in, inend, len;
 631
 632     if ( !PyArg_ParseTuple(args, "s*:rlecode_hqx", &pbuf) )
 633         return NULL;
 634     in_data = pbuf.buf;
 635     len = pbuf.len;
 636
 637     assert(len >= 0);
 638
 639     if (len > PY_SSIZE_T_MAX / 2 - 2) {
 640         PyBuffer_Release(&pbuf);
 641         return PyErr_NoMemory();
 642     }
 643
 644     /* Worst case: output is twice as big as input (fixed later) */
 645     if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
 646         PyBuffer_Release(&pbuf);
 647         return NULL;
 648     }
 649     out_data = (unsigned char *)PyString_AS_STRING(rv);
 650
 651     for( in=0; in<len; in++) {
 652         ch = in_data[in];
 653         if ( ch == RUNCHAR ) {
 654             /* RUNCHAR. Escape it. */
 655             *out_data++ = RUNCHAR;
 656             *out_data++ = 0;
 657         } else {
 658             /* Check how many following are the same */
 659             for(inend=in+1;
 660                 inend<len && in_data[inend] == ch &&
 661                     inend < in+255;
 662                 inend++) ;
 663             if ( inend - in > 3 ) {
 664                 /* More than 3 in a row. Output RLE. */
 665                 *out_data++ = ch;
 666                 *out_data++ = RUNCHAR;
 667                 *out_data++ = inend-in;
 668                 in = inend-1;
 669             } else {
 670                 /* Less than 3. Output the byte itself */
 671                 *out_data++ = ch;
 672             }
 673         }
 674     }
 675     if (_PyString_Resize(&rv,
 676                        (out_data -
 677                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
 678         Py_DECREF(rv);
 679         rv = NULL;
 680     }
 681     PyBuffer_Release(&pbuf);
 682     return rv;
 683 }
 684
 685 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
 686
 687 static PyObject *
 688 binascii_b2a_hqx(PyObject *self, PyObject *args)
 689 {
 690     Py_buffer pbin;
 691     unsigned char *ascii_data, *bin_data;
 692     int leftbits = 0;
 693     unsigned char this_ch;
 694     unsigned int leftchar = 0;
 695     PyObject *rv;
 696     Py_ssize_t len;
 697
 698     if ( !PyArg_ParseTuple(args, "s*:b2a_hqx", &pbin) )
 699         return NULL;
 700     bin_data = pbin.buf;
 701     len = pbin.len;
 702
 703     assert(len >= 0);
 704
 705     if (len > PY_SSIZE_T_MAX / 2 - 2) {
 706         PyBuffer_Release(&pbin);
 707         return PyErr_NoMemory();
 708     }
 709
 710     /* Allocate a buffer that is at least large enough */
 711     if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
 712         PyBuffer_Release(&pbin);
 713         return NULL;
 714     }
 715     ascii_data = (unsigned char *)PyString_AS_STRING(rv);
 716
 717     for( ; len > 0 ; len--, bin_data++ ) {
 718         /* Shift into our buffer, and output any 6bits ready */
 719         leftchar = (leftchar << 8) | *bin_data;
 720         leftbits += 8;
 721         while ( leftbits >= 6 ) {
 722             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 723             leftbits -= 6;
 724             *ascii_data++ = table_b2a_hqx[this_ch];
 725         }
 726     }
 727     /* Output a possible runt byte */
 728     if ( leftbits ) {
 729         leftchar <<= (6-leftbits);
 730         *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
 731     }
 732     if (_PyString_Resize(&rv,
 733                        (ascii_data -
 734                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
 735         Py_DECREF(rv);
 736         rv = NULL;
 737     }
 738     PyBuffer_Release(&pbin);
 739     return rv;
 740 }
 741
 742 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
 743
 744 static PyObject *
 745 binascii_rledecode_hqx(PyObject *self, PyObject *args)
 746 {
 747     Py_buffer pin;
 748     unsigned char *in_data, *out_data;
 749     unsigned char in_byte, in_repeat;
 750     PyObject *rv;
 751     Py_ssize_t in_len, out_len, out_len_left;
 752
 753     if ( !PyArg_ParseTuple(args, "s*:rledecode_hqx", &pin) )
 754         return NULL;
 755     in_data = pin.buf;
 756     in_len = pin.len;
 757
 758     assert(in_len >= 0);
 759
 760     /* Empty string is a special case */
 761     if ( in_len == 0 ) {
 762         PyBuffer_Release(&pin);
 763         return PyString_FromStringAndSize("", 0);
 764     }
 765     else if (in_len > PY_SSIZE_T_MAX / 2) {
 766         PyBuffer_Release(&pin);
 767         return PyErr_NoMemory();
 768     }
 769
 770     /* Allocate a buffer of reasonable size. Resized when needed */
 771     out_len = in_len*2;
 772     if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL ) {
 773         PyBuffer_Release(&pin);
 774         return NULL;
 775     }
 776     out_len_left = out_len;
 777     out_data = (unsigned char *)PyString_AS_STRING(rv);
 778
 779     /*
 780     ** We need two macros here to get/put bytes and handle
 781     ** end-of-buffer for input and output strings.
 782     */
 783 #define INBYTE(b) \
 784     do { \
 785              if ( --in_len < 0 ) { \
 786                        PyErr_SetString(Incomplete, ""); \
 787                        Py_DECREF(rv); \
 788                        PyBuffer_Release(&pin); \
 789                        return NULL; \
 790              } \
 791              b = *in_data++; \
 792     } while(0)
 793
 794 #define OUTBYTE(b) \
 795     do { \
 796              if ( --out_len_left < 0 ) { \
 797                       if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
 798                       if (_PyString_Resize(&rv, 2*out_len) < 0) \
 799                         { Py_DECREF(rv); PyBuffer_Release(&pin); return NULL; } \
 800                       out_data = (unsigned char *)PyString_AS_STRING(rv) \
 801                                                              + out_len; \
 802                       out_len_left = out_len-1; \
 803                       out_len = out_len * 2; \
 804              } \
 805              *out_data++ = b; \
 806     } while(0)
 807
 808         /*
 809         ** Handle first byte separately (since we have to get angry
 810         ** in case of an orphaned RLE code).
 811         */
 812         INBYTE(in_byte);
 813
 814     if (in_byte == RUNCHAR) {
 815         INBYTE(in_repeat);
 816         if (in_repeat != 0) {
 817             /* Note Error, not Incomplete (which is at the end
 818             ** of the string only). This is a programmer error.
 819             */
 820             PyErr_SetString(Error, "Orphaned RLE code at start");
 821             PyBuffer_Release(&pin);
 822             Py_DECREF(rv);
 823             return NULL;
 824         }
 825         OUTBYTE(RUNCHAR);
 826     } else {
 827         OUTBYTE(in_byte);
 828     }
 829
 830     while( in_len > 0 ) {
 831         INBYTE(in_byte);
 832
 833         if (in_byte == RUNCHAR) {
 834             INBYTE(in_repeat);
 835             if ( in_repeat == 0 ) {
 836                 /* Just an escaped RUNCHAR value */
 837                 OUTBYTE(RUNCHAR);
 838             } else {
 839                 /* Pick up value and output a sequence of it */
 840                 in_byte = out_data[-1];
 841                 while ( --in_repeat > 0 )
 842                     OUTBYTE(in_byte);
 843             }
 844         } else {
 845             /* Normal byte */
 846             OUTBYTE(in_byte);
 847         }
 848     }
 849     if (_PyString_Resize(&rv,
 850                        (out_data -
 851                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
 852         Py_DECREF(rv);
 853         rv = NULL;
 854     }
 855     PyBuffer_Release(&pin);
 856     return rv;
 857 }
 858
 859 PyDoc_STRVAR(doc_crc_hqx,
 860 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
 861
 862 static PyObject *
 863 binascii_crc_hqx(PyObject *self, PyObject *args)
 864 {
 865     Py_buffer pin;
 866     unsigned char *bin_data;
 867     unsigned int crc;
 868     Py_ssize_t len;
 869
 870     if ( !PyArg_ParseTuple(args, "s*i:crc_hqx", &pin, &crc) )
 871         return NULL;
 872     bin_data = pin.buf;
 873     len = pin.len;
 874
 875     while(len-- > 0) {
 876         crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
 877     }
 878
 879     PyBuffer_Release(&pin);
 880     return Py_BuildValue("i", crc);
 881 }
 882
 883 PyDoc_STRVAR(doc_crc32,
 884 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
 885
 886 #ifdef USE_ZLIB_CRC32
 887 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
 888 static PyObject *
 889 binascii_crc32(PyObject *self, PyObject *args)
 890 {
 891     unsigned int crc32val = 0;  /* crc32(0L, Z_NULL, 0) */
 892     Py_buffer pbuf;
 893     Byte *buf;
 894     Py_ssize_t len;
 895     int signed_val;
 896
 897     if (!PyArg_ParseTuple(args, "s*|I:crc32", &pbuf, &crc32val))
 898     return NULL;
 899     /* In Python 2.x we return a signed integer regardless of native platform
 900      * long size (the 32bit unsigned long is treated as 32-bit signed and sign
 901      * extended into a 64-bit long inside the integer object).  3.0 does the
 902      * right thing and returns unsigned. http://bugs.python.org/issue1202 */
 903     buf = (Byte*)pbuf.buf;
 904     len = pbuf.len;
 905     signed_val = crc32(crc32val, buf, len);
 906     PyBuffer_Release(&pbuf);
 907     return PyInt_FromLong(signed_val);
 908 }
 909 #else  /* USE_ZLIB_CRC32 */
 910 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
 911     Also known as: ISO 3307
 912 **********************************************************************|
 913 *                                                                    *|
 914 * Demonstration program to compute the 32-bit CRC used as the frame  *|
 915 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
 916 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
 917 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
 918 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
 919 * this polynomial is or will be included in CCITT V.41, which        *|
 920 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
 921 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
 922 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
 923 *                                                                    *|
 924 **********************************************************************|
 925
 926  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
 927  code or tables extracted from it, as desired without restriction.
 928
 929  First, the polynomial itself and its table of feedback terms.  The
 930  polynomial is
 931  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
 932  Note that we take it "backwards" and put the highest-order term in
 933  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
 934  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
 935  the MSB being 1.
 936
 937  Note that the usual hardware shift register implementation, which
 938  is what we're using (we're merely optimizing it by doing eight-bit
 939  chunks at a time) shifts bits into the lowest-order term.  In our
 940  implementation, that means shifting towards the right.  Why do we
 941  do it this way?  Because the calculated CRC must be transmitted in
 942  order from highest-order term to lowest-order term.  UARTs transmit
 943  characters in order from LSB to MSB.  By storing the CRC this way,
 944  we hand it to the UART in the order low-byte to high-byte; the UART
 945  sends each low-bit to hight-bit; and the result is transmission bit
 946  by bit from highest- to lowest-order term without requiring any bit
 947  shuffling on our part.  Reception works similarly.
 948
 949  The feedback terms table consists of 256, 32-bit entries.  Notes:
 950
 951   1. The table can be generated at runtime if desired; code to do so
 952      is shown later.  It might not be obvious, but the feedback
 953      terms simply represent the results of eight shift/xor opera-
 954      tions for all combinations of data and CRC register values.
 955
 956   2. The CRC accumulation logic is the same for all CRC polynomials,
 957      be they sixteen or thirty-two bits wide.  You simply choose the
 958      appropriate table.  Alternatively, because the table can be
 959      generated at runtime, you can start by generating the table for
 960      the polynomial in question and use exactly the same "updcrc",
 961      if your application needn't simultaneously handle two CRC
 962      polynomials.  (Note, however, that XMODEM is strange.)
 963
 964   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
 965      of course, 32-bit entries work OK if the high 16 bits are zero.
 966
 967   4. The values must be right-shifted by eight bits by the "updcrc"
 968      logic; the shift must be unsigned (bring in zeroes).  On some
 969      hardware you could probably optimize the shift in assembler by
 970      using byte-swap instructions.
 971 ********************************************************************/
 972
 973 static unsigned int crc_32_tab[256] = {
 974 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
 975 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
 976 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
 977 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
 978 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
 979 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
 980 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
 981 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
 982 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
 983 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
 984 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
 985 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
 986 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
 987 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
 988 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
 989 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
 990 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
 991 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
 992 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
 993 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
 994 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
 995 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
 996 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
 997 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
 998 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
 999 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
1000 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
1001 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
1002 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
1003 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
1004 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
1005 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
1006 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
1007 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
1008 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
1009 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
1010 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
1011 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
1012 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
1013 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
1014 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1015 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1016 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1017 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1018 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1019 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1020 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1021 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1022 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1023 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1024 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1025 0x2d02ef8dU
1026 };
1027
1028 static PyObject *
1029 binascii_crc32(PyObject *self, PyObject *args)
1030 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1031     Py_buffer pbin;
1032     unsigned char *bin_data;
1033     unsigned int crc = 0U;      /* initial value of CRC */
1034     Py_ssize_t len;
1035     int result;
1036
1037     if ( !PyArg_ParseTuple(args, "s*|I:crc32", &pbin, &crc) )
1038         return NULL;
1039     bin_data = pbin.buf;
1040     len = pbin.len;
1041
1042     crc = ~ crc;
1043     while (len-- > 0)
1044         crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
1045         /* Note:  (crc >> 8) MUST zero fill on left */
1046
1047     result = (int)(crc ^ 0xFFFFFFFFU);
1048     PyBuffer_Release(&pbin);
1049     return PyInt_FromLong(result);
1050 }
1051 #endif  /* USE_ZLIB_CRC32 */
1052
1053
1054 static PyObject *
1055 binascii_hexlify(PyObject *self, PyObject *args)
1056 {
1057     Py_buffer parg;
1058     char* argbuf;
1059     Py_ssize_t arglen;
1060     PyObject *retval;
1061     char* retbuf;
1062     Py_ssize_t i, j;
1063
1064     if (!PyArg_ParseTuple(args, "s*:b2a_hex", &parg))
1065         return NULL;
1066     argbuf = parg.buf;
1067     arglen = parg.len;
1068
1069     assert(arglen >= 0);
1070     if (arglen > PY_SSIZE_T_MAX / 2) {
1071         PyBuffer_Release(&parg);
1072         return PyErr_NoMemory();
1073     }
1074
1075     retval = PyString_FromStringAndSize(NULL, arglen*2);
1076     if (!retval) {
1077         PyBuffer_Release(&parg);
1078         return NULL;
1079     }
1080     retbuf = PyString_AS_STRING(retval);
1081
1082     /* make hex version of string, taken from shamodule.c */
1083     for (i=j=0; i < arglen; i++) {
1084         char c;
1085         c = (argbuf[i] >> 4) & 0xf;
1086         c = (c>9) ? c+'a'-10 : c + '0';
1087         retbuf[j++] = c;
1088         c = argbuf[i] & 0xf;
1089         c = (c>9) ? c+'a'-10 : c + '0';
1090         retbuf[j++] = c;
1091     }
1092     PyBuffer_Release(&parg);
1093     return retval;
1094 }
1095
1096 PyDoc_STRVAR(doc_hexlify,
1097 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1098 \n\
1099 This function is also available as \"hexlify()\".");
1100
1101
1102 static int
1103 to_int(int c)
1104 {
1105     if (isdigit(c))
1106         return c - '0';
1107     else {
1108         if (isupper(c))
1109             c = tolower(c);
1110         if (c >= 'a' && c <= 'f')
1111             return c - 'a' + 10;
1112     }
1113     return -1;
1114 }
1115
1116
1117 static PyObject *
1118 binascii_unhexlify(PyObject *self, PyObject *args)
1119 {
1120     Py_buffer parg;
1121     char* argbuf;
1122     Py_ssize_t arglen;
1123     PyObject *retval;
1124     char* retbuf;
1125     Py_ssize_t i, j;
1126
1127     if (!PyArg_ParseTuple(args, "s*:a2b_hex", &parg))
1128         return NULL;
1129     argbuf = parg.buf;
1130     arglen = parg.len;
1131
1132     assert(arglen >= 0);
1133
1134     /* XXX What should we do about strings with an odd length?  Should
1135      * we add an implicit leading zero, or a trailing zero?  For now,
1136      * raise an exception.
1137      */
1138     if (arglen % 2) {
1139         PyBuffer_Release(&parg);
1140         PyErr_SetString(PyExc_TypeError, "Odd-length string");
1141         return NULL;
1142     }
1143
1144     retval = PyString_FromStringAndSize(NULL, (arglen/2));
1145     if (!retval) {
1146         PyBuffer_Release(&parg);
1147         return NULL;
1148     }
1149     retbuf = PyString_AS_STRING(retval);
1150
1151     for (i=j=0; i < arglen; i += 2) {
1152         int top = to_int(Py_CHARMASK(argbuf[i]));
1153         int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1154         if (top == -1 || bot == -1) {
1155             PyErr_SetString(PyExc_TypeError,
1156                             "Non-hexadecimal digit found");
1157             goto finally;
1158         }
1159         retbuf[j++] = (top << 4) + bot;
1160     }
1161     PyBuffer_Release(&parg);
1162     return retval;
1163
1164   finally:
1165     PyBuffer_Release(&parg);
1166     Py_DECREF(retval);
1167     return NULL;
1168 }
1169
1170 PyDoc_STRVAR(doc_unhexlify,
1171 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1172 \n\
1173 hexstr must contain an even number of hex digits (upper or lower case).\n\
1174 This function is also available as \"unhexlify()\"");
1175
1176 static int table_hex[128] = {
1177   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1178   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1179   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1180    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1181   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1182   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1183   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1184   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1185 };
1186
1187 #define hexval(c) table_hex[(unsigned int)(c)]
1188
1189 #define MAXLINESIZE 76
1190
1191 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1192
1193 static PyObject*
1194 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1195 {
1196     Py_ssize_t in, out;
1197     char ch;
1198     Py_buffer pdata;
1199     unsigned char *data, *odata;
1200     Py_ssize_t datalen = 0;
1201     PyObject *rv;
1202     static char *kwlist[] = {"data", "header", NULL};
1203     int header = 0;
1204
1205     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", kwlist, &pdata,
1206           &header))
1207         return NULL;
1208     data = pdata.buf;
1209     datalen = pdata.len;
1210
1211     /* We allocate the output same size as input, this is overkill.
1212      * The previous implementation used calloc() so we'll zero out the
1213      * memory here too, since PyMem_Malloc() does not guarantee that.
1214      */
1215     odata = (unsigned char *) PyMem_Malloc(datalen);
1216     if (odata == NULL) {
1217         PyBuffer_Release(&pdata);
1218         PyErr_NoMemory();
1219         return NULL;
1220     }
1221     memset(odata, 0, datalen);
1222
1223     in = out = 0;
1224     while (in < datalen) {
1225         if (data[in] == '=') {
1226             in++;
1227             if (in >= datalen) break;
1228             /* Soft line breaks */
1229             if ((data[in] == '\n') || (data[in] == '\r')) {
1230                 if (data[in] != '\n') {
1231                     while (in < datalen && data[in] != '\n') in++;
1232                 }
1233                 if (in < datalen) in++;
1234             }
1235             else if (data[in] == '=') {
1236                 /* broken case from broken python qp */
1237                 odata[out++] = '=';
1238                 in++;
1239             }
1240             else if (((data[in] >= 'A' && data[in] <= 'F') ||
1241                       (data[in] >= 'a' && data[in] <= 'f') ||
1242                       (data[in] >= '0' && data[in] <= '9')) &&
1243                      ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1244                       (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1245                       (data[in+1] >= '0' && data[in+1] <= '9'))) {
1246                 /* hexval */
1247                 ch = hexval(data[in]) << 4;
1248                 in++;
1249                 ch |= hexval(data[in]);
1250                 in++;
1251                 odata[out++] = ch;
1252             }
1253             else {
1254               odata[out++] = '=';
1255             }
1256         }
1257         else if (header && data[in] == '_') {
1258             odata[out++] = ' ';
1259             in++;
1260         }
1261         else {
1262             odata[out] = data[in];
1263             in++;
1264             out++;
1265         }
1266     }
1267     if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1268         PyBuffer_Release(&pdata);
1269         PyMem_Free(odata);
1270         return NULL;
1271     }
1272     PyBuffer_Release(&pdata);
1273     PyMem_Free(odata);
1274     return rv;
1275 }
1276
1277 static int
1278 to_hex (unsigned char ch, unsigned char *s)
1279 {
1280     unsigned int uvalue = ch;
1281
1282     s[1] = "0123456789ABCDEF"[uvalue % 16];
1283     uvalue = (uvalue / 16);
1284     s[0] = "0123456789ABCDEF"[uvalue % 16];
1285     return 0;
1286 }
1287
1288 PyDoc_STRVAR(doc_b2a_qp,
1289 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1290  Encode a string using quoted-printable encoding. \n\
1291 \n\
1292 On encoding, when istext is set, newlines are not encoded, and white \n\
1293 space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
1294 both encoded.  When quotetabs is set, space and tabs are encoded.");
1295
1296 /* XXX: This is ridiculously complicated to be backward compatible
1297  * (mostly) with the quopri module.  It doesn't re-create the quopri
1298  * module bug where text ending in CRLF has the CR encoded */
1299 static PyObject*
1300 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1301 {
1302     Py_ssize_t in, out;
1303     Py_buffer pdata;
1304     unsigned char *data, *odata;
1305     Py_ssize_t datalen = 0, odatalen = 0;
1306     PyObject *rv;
1307     unsigned int linelen = 0;
1308     static char *kwlist[] = {"data", "quotetabs", "istext",
1309                                    "header", NULL};
1310     int istext = 1;
1311     int quotetabs = 0;
1312     int header = 0;
1313     unsigned char ch;
1314     int crlf = 0;
1315     unsigned char *p;
1316
1317     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|iii", kwlist, &pdata,
1318           &quotetabs, &istext, &header))
1319         return NULL;
1320     data = pdata.buf;
1321     datalen = pdata.len;
1322
1323     /* See if this string is using CRLF line ends */
1324     /* XXX: this function has the side effect of converting all of
1325      * the end of lines to be the same depending on this detection
1326      * here */
1327     p = (unsigned char *) memchr(data, '\n', datalen);
1328     if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1329         crlf = 1;
1330
1331     /* First, scan to see how many characters need to be encoded */
1332     in = 0;
1333     while (in < datalen) {
1334         if ((data[in] > 126) ||
1335             (data[in] == '=') ||
1336             (header && data[in] == '_') ||
1337             ((data[in] == '.') && (linelen == 0) &&
1338              (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1339             (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1340             ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1341             ((data[in] < 33) &&
1342              (data[in] != '\r') && (data[in] != '\n') &&
1343              (quotetabs ||
1344             (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1345         {
1346             if ((linelen + 3) >= MAXLINESIZE) {
1347                 linelen = 0;
1348                 if (crlf)
1349                     odatalen += 3;
1350                 else
1351                     odatalen += 2;
1352             }
1353             linelen += 3;
1354             odatalen += 3;
1355             in++;
1356         }
1357         else {
1358             if (istext &&
1359                 ((data[in] == '\n') ||
1360                  ((in+1 < datalen) && (data[in] == '\r') &&
1361                  (data[in+1] == '\n'))))
1362             {
1363                 linelen = 0;
1364                 /* Protect against whitespace on end of line */
1365                 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1366                     odatalen += 2;
1367                 if (crlf)
1368                     odatalen += 2;
1369                 else
1370                     odatalen += 1;
1371                 if (data[in] == '\r')
1372                     in += 2;
1373                 else
1374                     in++;
1375             }
1376             else {
1377                 if ((in + 1 != datalen) &&
1378                     (data[in+1] != '\n') &&
1379                     (linelen + 1) >= MAXLINESIZE) {
1380                     linelen = 0;
1381                     if (crlf)
1382                         odatalen += 3;
1383                     else
1384                         odatalen += 2;
1385                 }
1386                 linelen++;
1387                 odatalen++;
1388                 in++;
1389             }
1390         }
1391     }
1392
1393     /* We allocate the output same size as input, this is overkill.
1394      * The previous implementation used calloc() so we'll zero out the
1395      * memory here too, since PyMem_Malloc() does not guarantee that.
1396      */
1397     odata = (unsigned char *) PyMem_Malloc(odatalen);
1398     if (odata == NULL) {
1399         PyBuffer_Release(&pdata);
1400         PyErr_NoMemory();
1401         return NULL;
1402     }
1403     memset(odata, 0, odatalen);
1404
1405     in = out = linelen = 0;
1406     while (in < datalen) {
1407         if ((data[in] > 126) ||
1408             (data[in] == '=') ||
1409             (header && data[in] == '_') ||
1410             ((data[in] == '.') && (linelen == 0) &&
1411              (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1412             (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1413             ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1414             ((data[in] < 33) &&
1415              (data[in] != '\r') && (data[in] != '\n') &&
1416              (quotetabs ||
1417             (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1418         {
1419             if ((linelen + 3 )>= MAXLINESIZE) {
1420                 odata[out++] = '=';
1421                 if (crlf) odata[out++] = '\r';
1422                 odata[out++] = '\n';
1423                 linelen = 0;
1424             }
1425             odata[out++] = '=';
1426             to_hex(data[in], &odata[out]);
1427             out += 2;
1428             in++;
1429             linelen += 3;
1430         }
1431         else {
1432             if (istext &&
1433                 ((data[in] == '\n') ||
1434                  ((in+1 < datalen) && (data[in] == '\r') &&
1435                  (data[in+1] == '\n'))))
1436             {
1437                 linelen = 0;
1438                 /* Protect against whitespace on end of line */
1439                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1440                     ch = odata[out-1];
1441                     odata[out-1] = '=';
1442                     to_hex(ch, &odata[out]);
1443                     out += 2;
1444                 }
1445
1446                 if (crlf) odata[out++] = '\r';
1447                 odata[out++] = '\n';
1448                 if (data[in] == '\r')
1449                     in += 2;
1450                 else
1451                     in++;
1452             }
1453             else {
1454                 if ((in + 1 != datalen) &&
1455                     (data[in+1] != '\n') &&
1456                     (linelen + 1) >= MAXLINESIZE) {
1457                     odata[out++] = '=';
1458                     if (crlf) odata[out++] = '\r';
1459                     odata[out++] = '\n';
1460                     linelen = 0;
1461                 }
1462                 linelen++;
1463                 if (header && data[in] == ' ') {
1464                     odata[out++] = '_';
1465                     in++;
1466                 }
1467                 else {
1468                     odata[out++] = data[in++];
1469                 }
1470             }
1471         }
1472     }
1473     if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1474         PyBuffer_Release(&pdata);
1475         PyMem_Free(odata);
1476         return NULL;
1477     }
1478     PyBuffer_Release(&pdata);
1479     PyMem_Free(odata);
1480     return rv;
1481 }
1482
1483 /* List of functions defined in the module */
1484
1485 static struct PyMethodDef binascii_module_methods[] = {
1486     {"a2b_uu",     binascii_a2b_uu,     METH_VARARGS, doc_a2b_uu},
1487     {"b2a_uu",     binascii_b2a_uu,     METH_VARARGS, doc_b2a_uu},
1488     {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1489     {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1490     {"a2b_hqx",    binascii_a2b_hqx,    METH_VARARGS, doc_a2b_hqx},
1491     {"b2a_hqx",    binascii_b2a_hqx,    METH_VARARGS, doc_b2a_hqx},
1492     {"b2a_hex",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1493     {"a2b_hex",    binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1494     {"hexlify",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1495     {"unhexlify",  binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1496     {"rlecode_hqx",   binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1497     {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1498      doc_rledecode_hqx},
1499     {"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
1500     {"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
1501     {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1502       doc_a2b_qp},
1503     {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1504       doc_b2a_qp},
1505     {NULL, NULL}                             /* sentinel */
1506 };
1507
1508
1509 /* Initialization function for the module (*must* be called initbinascii) */
1510 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1511
1512 PyMODINIT_FUNC
1513 initbinascii(void)
1514 {
1515     PyObject *m, *d, *x;
1516
1517     /* Create the module and add the functions */
1518     m = Py_InitModule("binascii", binascii_module_methods);
1519     if (m == NULL)
1520         return;
1521
1522     d = PyModule_GetDict(m);
1523     x = PyString_FromString(doc_binascii);
1524     PyDict_SetItemString(d, "__doc__", x);
1525     Py_XDECREF(x);
1526
1527     Error = PyErr_NewException("binascii.Error", NULL, NULL);
1528     PyDict_SetItemString(d, "Error", Error);
1529     Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1530     PyDict_SetItemString(d, "Incomplete", Incomplete);
1531 }