Merged release21-maint changes.
[python/dscho.git] / Modules / binascii.c
blob00a28052d9781a9d44ce992ea23d078aab9c674f
1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
44 ** Jack Jansen, CWI, July 1995.
48 #include "Python.h"
50 static PyObject *Error;
51 static PyObject *Incomplete;
54 ** hqx lookup table, ascii->binary.
57 #define RUNCHAR 0x90
59 #define DONE 0x7F
60 #define SKIP 0x7E
61 #define FAIL 0x7D
63 static unsigned char table_a2b_hqx[256] = {
64 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
65 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
66 /* \b \t \n ^K ^L \r ^N ^O */
67 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
68 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
69 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
70 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
71 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
72 /* ! " # $ % & ' */
73 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
74 /* ( ) * + , - . / */
75 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
76 /* 0 1 2 3 4 5 6 7 */
77 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
78 /* 8 9 : ; < = > ? */
79 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
80 /* @ A B C D E F G */
81 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
82 /* H I J K L M N O */
83 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
84 /* P Q R S T U V W */
85 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
86 /* X Y Z [ \ ] ^ _ */
87 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
88 /* ` a b c d e f g */
89 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
90 /* h i j k l m n o */
91 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
92 /* p q r s t u v w */
93 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
94 /* x y z { | } ~ ^? */
95 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
96 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
97 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
98 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
99 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
100 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
101 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
102 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
103 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
104 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
105 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 static unsigned char table_b2a_hqx[] =
115 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
117 static char table_a2b_base64[] = {
118 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
119 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
120 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
121 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
122 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
123 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
124 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
125 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
128 #define BASE64_PAD '='
129 #define BASE64_MAXBIN 57 /* Max binary chunk size (76 char line) */
131 static unsigned char table_b2a_base64[] =
132 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
136 static unsigned short crctab_hqx[256] = {
137 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
138 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
139 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
140 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
141 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
142 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
143 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
144 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
145 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
146 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
147 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
148 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
149 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
150 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
151 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
152 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
153 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
154 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
155 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
156 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
157 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
158 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
159 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
160 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
161 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
162 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
163 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
164 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
165 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
166 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
167 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
168 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
171 static char doc_a2b_uu[] = "(ascii) -> bin. Decode a line of uuencoded data";
173 static PyObject *
174 binascii_a2b_uu(PyObject *self, PyObject *args)
176 unsigned char *ascii_data, *bin_data;
177 int leftbits = 0;
178 unsigned char this_ch;
179 unsigned int leftchar = 0;
180 PyObject *rv;
181 int ascii_len, bin_len;
183 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
184 return NULL;
186 /* First byte: binary data length (in bytes) */
187 bin_len = (*ascii_data++ - ' ') & 077;
188 ascii_len--;
190 /* Allocate the buffer */
191 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
192 return NULL;
193 bin_data = (unsigned char *)PyString_AsString(rv);
195 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
196 this_ch = *ascii_data;
197 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
199 ** Whitespace. Assume some spaces got eaten at
200 ** end-of-line. (We check this later)
202 this_ch = 0;
203 } else {
204 /* Check the character for legality
205 ** The 64 in stead of the expected 63 is because
206 ** there are a few uuencodes out there that use
207 ** '`' as zero instead of space.
209 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
210 PyErr_SetString(Error, "Illegal char");
211 Py_DECREF(rv);
212 return NULL;
214 this_ch = (this_ch - ' ') & 077;
217 ** Shift it in on the low end, and see if there's
218 ** a byte ready for output.
220 leftchar = (leftchar << 6) | (this_ch);
221 leftbits += 6;
222 if ( leftbits >= 8 ) {
223 leftbits -= 8;
224 *bin_data++ = (leftchar >> leftbits) & 0xff;
225 leftchar &= ((1 << leftbits) - 1);
226 bin_len--;
230 ** Finally, check that if there's anything left on the line
231 ** that it's whitespace only.
233 while( ascii_len-- > 0 ) {
234 this_ch = *ascii_data++;
235 /* Extra '`' may be written as padding in some cases */
236 if ( this_ch != ' ' && this_ch != ' '+64 &&
237 this_ch != '\n' && this_ch != '\r' ) {
238 PyErr_SetString(Error, "Trailing garbage");
239 Py_DECREF(rv);
240 return NULL;
243 return rv;
246 static char doc_b2a_uu[] = "(bin) -> ascii. Uuencode line of data";
248 static PyObject *
249 binascii_b2a_uu(PyObject *self, PyObject *args)
251 unsigned char *ascii_data, *bin_data;
252 int leftbits = 0;
253 unsigned char this_ch;
254 unsigned int leftchar = 0;
255 PyObject *rv;
256 int bin_len;
258 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
259 return NULL;
260 if ( bin_len > 45 ) {
261 /* The 45 is a limit that appears in all uuencode's */
262 PyErr_SetString(Error, "At most 45 bytes at once");
263 return NULL;
266 /* We're lazy and allocate to much (fixed up later) */
267 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2)) == NULL )
268 return NULL;
269 ascii_data = (unsigned char *)PyString_AsString(rv);
271 /* Store the length */
272 *ascii_data++ = ' ' + (bin_len & 077);
274 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
275 /* Shift the data (or padding) into our buffer */
276 if ( bin_len > 0 ) /* Data */
277 leftchar = (leftchar << 8) | *bin_data;
278 else /* Padding */
279 leftchar <<= 8;
280 leftbits += 8;
282 /* See if there are 6-bit groups ready */
283 while ( leftbits >= 6 ) {
284 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
285 leftbits -= 6;
286 *ascii_data++ = this_ch + ' ';
289 *ascii_data++ = '\n'; /* Append a courtesy newline */
291 _PyString_Resize(&rv, (ascii_data -
292 (unsigned char *)PyString_AsString(rv)));
293 return rv;
297 static int
298 binascii_find_valid(unsigned char *s, int slen, int num)
300 /* Finds & returns the (num+1)th
301 ** valid character for base64, or -1 if none.
304 int ret = -1;
305 unsigned char c, b64val;
307 while ((slen > 0) && (ret == -1)) {
308 c = *s;
309 b64val = table_a2b_base64[c & 0x7f];
310 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
311 if (num == 0)
312 ret = *s;
313 num--;
316 s++;
317 slen--;
319 return ret;
322 static char doc_a2b_base64[] = "(ascii) -> bin. Decode a line of base64 data";
324 static PyObject *
325 binascii_a2b_base64(PyObject *self, PyObject *args)
327 unsigned char *ascii_data, *bin_data;
328 int leftbits = 0;
329 unsigned char this_ch;
330 unsigned int leftchar = 0;
331 PyObject *rv;
332 int ascii_len, bin_len;
333 int quad_pos = 0;
335 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
336 return NULL;
338 if ( ascii_len == 0) {
339 PyErr_SetString(Error, "Cannot decode empty input");
340 return NULL;
342 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
344 /* Allocate the buffer */
345 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
346 return NULL;
347 bin_data = (unsigned char *)PyString_AsString(rv);
348 bin_len = 0;
350 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
351 this_ch = *ascii_data;
353 if (this_ch > 0x7f ||
354 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
355 continue;
357 /* Check for pad sequences and ignore
358 ** the invalid ones.
360 if (this_ch == BASE64_PAD) {
361 if ( (quad_pos < 2) ||
362 ((quad_pos == 2) &&
363 (binascii_find_valid(ascii_data, ascii_len, 1)
364 != BASE64_PAD)) )
366 continue;
368 else {
369 /* A pad sequence means no more input.
370 ** We've already interpreted the data
371 ** from the quad at this point.
373 leftbits = 0;
374 break;
378 this_ch = table_a2b_base64[*ascii_data];
379 if ( this_ch == (unsigned char) -1 )
380 continue;
383 ** Shift it in on the low end, and see if there's
384 ** a byte ready for output.
386 quad_pos = (quad_pos + 1) & 0x03;
387 leftchar = (leftchar << 6) | (this_ch);
388 leftbits += 6;
390 if ( leftbits >= 8 ) {
391 leftbits -= 8;
392 *bin_data++ = (leftchar >> leftbits) & 0xff;
393 bin_len++;
394 leftchar &= ((1 << leftbits) - 1);
398 if (leftbits != 0) {
399 PyErr_SetString(Error, "Incorrect padding");
400 Py_DECREF(rv);
401 return NULL;
404 /* and set string size correctly */
405 _PyString_Resize(&rv, bin_len);
406 return rv;
409 static char doc_b2a_base64[] = "(bin) -> ascii. Base64-code line of data";
411 static PyObject *
412 binascii_b2a_base64(PyObject *self, PyObject *args)
414 unsigned char *ascii_data, *bin_data;
415 int leftbits = 0;
416 unsigned char this_ch;
417 unsigned int leftchar = 0;
418 PyObject *rv;
419 int bin_len;
421 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
422 return NULL;
423 if ( bin_len > BASE64_MAXBIN ) {
424 PyErr_SetString(Error, "Too much data for base64 line");
425 return NULL;
428 /* We're lazy and allocate to much (fixed up later) */
429 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2)) == NULL )
430 return NULL;
431 ascii_data = (unsigned char *)PyString_AsString(rv);
433 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
434 /* Shift the data into our buffer */
435 leftchar = (leftchar << 8) | *bin_data;
436 leftbits += 8;
438 /* See if there are 6-bit groups ready */
439 while ( leftbits >= 6 ) {
440 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
441 leftbits -= 6;
442 *ascii_data++ = table_b2a_base64[this_ch];
445 if ( leftbits == 2 ) {
446 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
447 *ascii_data++ = BASE64_PAD;
448 *ascii_data++ = BASE64_PAD;
449 } else if ( leftbits == 4 ) {
450 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
451 *ascii_data++ = BASE64_PAD;
453 *ascii_data++ = '\n'; /* Append a courtesy newline */
455 _PyString_Resize(&rv, (ascii_data -
456 (unsigned char *)PyString_AsString(rv)));
457 return rv;
460 static char doc_a2b_hqx[] = "ascii -> bin, done. Decode .hqx coding";
462 static PyObject *
463 binascii_a2b_hqx(PyObject *self, PyObject *args)
465 unsigned char *ascii_data, *bin_data;
466 int leftbits = 0;
467 unsigned char this_ch;
468 unsigned int leftchar = 0;
469 PyObject *rv;
470 int len;
471 int done = 0;
473 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
474 return NULL;
476 /* Allocate a string that is too big (fixed later) */
477 if ( (rv=PyString_FromStringAndSize(NULL, len)) == NULL )
478 return NULL;
479 bin_data = (unsigned char *)PyString_AsString(rv);
481 for( ; len > 0 ; len--, ascii_data++ ) {
482 /* Get the byte and look it up */
483 this_ch = table_a2b_hqx[*ascii_data];
484 if ( this_ch == SKIP )
485 continue;
486 if ( this_ch == FAIL ) {
487 PyErr_SetString(Error, "Illegal char");
488 Py_DECREF(rv);
489 return NULL;
491 if ( this_ch == DONE ) {
492 /* The terminating colon */
493 done = 1;
494 break;
497 /* Shift it into the buffer and see if any bytes are ready */
498 leftchar = (leftchar << 6) | (this_ch);
499 leftbits += 6;
500 if ( leftbits >= 8 ) {
501 leftbits -= 8;
502 *bin_data++ = (leftchar >> leftbits) & 0xff;
503 leftchar &= ((1 << leftbits) - 1);
507 if ( leftbits && !done ) {
508 PyErr_SetString(Incomplete,
509 "String has incomplete number of bytes");
510 Py_DECREF(rv);
511 return NULL;
513 _PyString_Resize(
514 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
515 if (rv) {
516 PyObject *rrv = Py_BuildValue("Oi", rv, done);
517 Py_DECREF(rv);
518 return rrv;
521 return NULL;
524 static char doc_rlecode_hqx[] = "Binhex RLE-code binary data";
526 static PyObject *
527 binascii_rlecode_hqx(PyObject *self, PyObject *args)
529 unsigned char *in_data, *out_data;
530 PyObject *rv;
531 unsigned char ch;
532 int in, inend, len;
534 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
535 return NULL;
537 /* Worst case: output is twice as big as input (fixed later) */
538 if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
539 return NULL;
540 out_data = (unsigned char *)PyString_AsString(rv);
542 for( in=0; in<len; in++) {
543 ch = in_data[in];
544 if ( ch == RUNCHAR ) {
545 /* RUNCHAR. Escape it. */
546 *out_data++ = RUNCHAR;
547 *out_data++ = 0;
548 } else {
549 /* Check how many following are the same */
550 for(inend=in+1;
551 inend<len && in_data[inend] == ch &&
552 inend < in+255;
553 inend++) ;
554 if ( inend - in > 3 ) {
555 /* More than 3 in a row. Output RLE. */
556 *out_data++ = ch;
557 *out_data++ = RUNCHAR;
558 *out_data++ = inend-in;
559 in = inend-1;
560 } else {
561 /* Less than 3. Output the byte itself */
562 *out_data++ = ch;
566 _PyString_Resize(&rv, (out_data -
567 (unsigned char *)PyString_AsString(rv)));
568 return rv;
571 static char doc_b2a_hqx[] = "Encode .hqx data";
573 static PyObject *
574 binascii_b2a_hqx(PyObject *self, PyObject *args)
576 unsigned char *ascii_data, *bin_data;
577 int leftbits = 0;
578 unsigned char this_ch;
579 unsigned int leftchar = 0;
580 PyObject *rv;
581 int len;
583 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
584 return NULL;
586 /* Allocate a buffer that is at least large enough */
587 if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
588 return NULL;
589 ascii_data = (unsigned char *)PyString_AsString(rv);
591 for( ; len > 0 ; len--, bin_data++ ) {
592 /* Shift into our buffer, and output any 6bits ready */
593 leftchar = (leftchar << 8) | *bin_data;
594 leftbits += 8;
595 while ( leftbits >= 6 ) {
596 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
597 leftbits -= 6;
598 *ascii_data++ = table_b2a_hqx[this_ch];
601 /* Output a possible runt byte */
602 if ( leftbits ) {
603 leftchar <<= (6-leftbits);
604 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
606 _PyString_Resize(&rv, (ascii_data -
607 (unsigned char *)PyString_AsString(rv)));
608 return rv;
611 static char doc_rledecode_hqx[] = "Decode hexbin RLE-coded string";
613 static PyObject *
614 binascii_rledecode_hqx(PyObject *self, PyObject *args)
616 unsigned char *in_data, *out_data;
617 unsigned char in_byte, in_repeat;
618 PyObject *rv;
619 int in_len, out_len, out_len_left;
621 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
622 return NULL;
624 /* Empty string is a special case */
625 if ( in_len == 0 )
626 return Py_BuildValue("s", "");
628 /* Allocate a buffer of reasonable size. Resized when needed */
629 out_len = in_len*2;
630 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
631 return NULL;
632 out_len_left = out_len;
633 out_data = (unsigned char *)PyString_AsString(rv);
636 ** We need two macros here to get/put bytes and handle
637 ** end-of-buffer for input and output strings.
639 #define INBYTE(b) \
640 do { \
641 if ( --in_len < 0 ) { \
642 PyErr_SetString(Incomplete, ""); \
643 Py_DECREF(rv); \
644 return NULL; \
646 b = *in_data++; \
647 } while(0)
649 #define OUTBYTE(b) \
650 do { \
651 if ( --out_len_left < 0 ) { \
652 _PyString_Resize(&rv, 2*out_len); \
653 if ( rv == NULL ) return NULL; \
654 out_data = (unsigned char *)PyString_AsString(rv) \
655 + out_len; \
656 out_len_left = out_len-1; \
657 out_len = out_len * 2; \
659 *out_data++ = b; \
660 } while(0)
663 ** Handle first byte separately (since we have to get angry
664 ** in case of an orphaned RLE code).
666 INBYTE(in_byte);
668 if (in_byte == RUNCHAR) {
669 INBYTE(in_repeat);
670 if (in_repeat != 0) {
671 /* Note Error, not Incomplete (which is at the end
672 ** of the string only). This is a programmer error.
674 PyErr_SetString(Error, "Orphaned RLE code at start");
675 Py_DECREF(rv);
676 return NULL;
678 OUTBYTE(RUNCHAR);
679 } else {
680 OUTBYTE(in_byte);
683 while( in_len > 0 ) {
684 INBYTE(in_byte);
686 if (in_byte == RUNCHAR) {
687 INBYTE(in_repeat);
688 if ( in_repeat == 0 ) {
689 /* Just an escaped RUNCHAR value */
690 OUTBYTE(RUNCHAR);
691 } else {
692 /* Pick up value and output a sequence of it */
693 in_byte = out_data[-1];
694 while ( --in_repeat > 0 )
695 OUTBYTE(in_byte);
697 } else {
698 /* Normal byte */
699 OUTBYTE(in_byte);
702 _PyString_Resize(&rv, (out_data -
703 (unsigned char *)PyString_AsString(rv)));
704 return rv;
707 static char doc_crc_hqx[] =
708 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally";
710 static PyObject *
711 binascii_crc_hqx(PyObject *self, PyObject *args)
713 unsigned char *bin_data;
714 unsigned int crc;
715 int len;
717 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
718 return NULL;
720 while(len--) {
721 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
724 return Py_BuildValue("i", crc);
727 static char doc_crc32[] =
728 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally";
730 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
731 Also known as: ISO 3307
732 **********************************************************************|
733 * *|
734 * Demonstration program to compute the 32-bit CRC used as the frame *|
735 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
736 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
737 * protocol). The 32-bit FCS was added via the Federal Register, *|
738 * 1 June 1982, p.23798. I presume but don't know for certain that *|
739 * this polynomial is or will be included in CCITT V.41, which *|
740 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
741 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
742 * errors by a factor of 10^-5 over 16-bit FCS. *|
743 * *|
744 **********************************************************************|
746 Copyright (C) 1986 Gary S. Brown. You may use this program, or
747 code or tables extracted from it, as desired without restriction.
749 First, the polynomial itself and its table of feedback terms. The
750 polynomial is
751 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
752 Note that we take it "backwards" and put the highest-order term in
753 the lowest-order bit. The X^32 term is "implied"; the LSB is the
754 X^31 term, etc. The X^0 term (usually shown as "+1") results in
755 the MSB being 1.
757 Note that the usual hardware shift register implementation, which
758 is what we're using (we're merely optimizing it by doing eight-bit
759 chunks at a time) shifts bits into the lowest-order term. In our
760 implementation, that means shifting towards the right. Why do we
761 do it this way? Because the calculated CRC must be transmitted in
762 order from highest-order term to lowest-order term. UARTs transmit
763 characters in order from LSB to MSB. By storing the CRC this way,
764 we hand it to the UART in the order low-byte to high-byte; the UART
765 sends each low-bit to hight-bit; and the result is transmission bit
766 by bit from highest- to lowest-order term without requiring any bit
767 shuffling on our part. Reception works similarly.
769 The feedback terms table consists of 256, 32-bit entries. Notes:
771 1. The table can be generated at runtime if desired; code to do so
772 is shown later. It might not be obvious, but the feedback
773 terms simply represent the results of eight shift/xor opera-
774 tions for all combinations of data and CRC register values.
776 2. The CRC accumulation logic is the same for all CRC polynomials,
777 be they sixteen or thirty-two bits wide. You simply choose the
778 appropriate table. Alternatively, because the table can be
779 generated at runtime, you can start by generating the table for
780 the polynomial in question and use exactly the same "updcrc",
781 if your application needn't simultaneously handle two CRC
782 polynomials. (Note, however, that XMODEM is strange.)
784 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
785 of course, 32-bit entries work OK if the high 16 bits are zero.
787 4. The values must be right-shifted by eight bits by the "updcrc"
788 logic; the shift must be unsigned (bring in zeroes). On some
789 hardware you could probably optimize the shift in assembler by
790 using byte-swap instructions.
791 ********************************************************************/
793 static unsigned long crc_32_tab[256] = {
794 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
795 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
796 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
797 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
798 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
799 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
800 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
801 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
802 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
803 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
804 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
805 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
806 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
807 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
808 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
809 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
810 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
811 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
812 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
813 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
814 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
815 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
816 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
817 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
818 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
819 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
820 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
821 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
822 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
823 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
824 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
825 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
826 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
827 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
828 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
829 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
830 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
831 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
832 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
833 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
834 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
835 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
836 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
837 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
838 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
839 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
840 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
841 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
842 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
843 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
844 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
845 0x2d02ef8dUL
848 static PyObject *
849 binascii_crc32(PyObject *self, PyObject *args)
850 { /* By Jim Ahlstrom; All rights transferred to CNRI */
851 unsigned char *bin_data;
852 unsigned long crc = 0UL; /* initial value of CRC */
853 int len;
855 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
856 return NULL;
858 crc = crc ^ 0xFFFFFFFFUL;
859 while(len--)
860 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
861 /* Note: (crc >> 8) MUST zero fill on left */
862 return Py_BuildValue("l", crc ^ 0xFFFFFFFFUL);
866 static PyObject *
867 binascii_hexlify(PyObject *self, PyObject *args)
869 char* argbuf;
870 int arglen;
871 PyObject *retval;
872 char* retbuf;
873 int i, j;
875 if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen))
876 return NULL;
878 retval = PyString_FromStringAndSize(NULL, arglen*2);
879 if (!retval)
880 return NULL;
881 retbuf = PyString_AsString(retval);
882 if (!retbuf)
883 goto finally;
885 /* make hex version of string, taken from shamodule.c */
886 for (i=j=0; i < arglen; i++) {
887 char c;
888 c = (argbuf[i] >> 4) & 0xf;
889 c = (c>9) ? c+'a'-10 : c + '0';
890 retbuf[j++] = c;
891 c = argbuf[i] & 0xf;
892 c = (c>9) ? c+'a'-10 : c + '0';
893 retbuf[j++] = c;
895 return retval;
897 finally:
898 Py_DECREF(retval);
899 return NULL;
902 static char doc_hexlify[] =
903 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
905 This function is also available as \"hexlify()\".";
908 static int
909 to_int(int c)
911 if (isdigit(c))
912 return c - '0';
913 else {
914 if (isupper(c))
915 c = tolower(c);
916 if (c >= 'a' && c <= 'f')
917 return c - 'a' + 10;
919 return -1;
923 static PyObject *
924 binascii_unhexlify(PyObject *self, PyObject *args)
926 char* argbuf;
927 int arglen;
928 PyObject *retval;
929 char* retbuf;
930 int i, j;
932 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
933 return NULL;
935 /* XXX What should we do about strings with an odd length? Should
936 * we add an implicit leading zero, or a trailing zero? For now,
937 * raise an exception.
939 if (arglen % 2) {
940 PyErr_SetString(PyExc_TypeError, "Odd-length string");
941 return NULL;
944 retval = PyString_FromStringAndSize(NULL, (arglen/2));
945 if (!retval)
946 return NULL;
947 retbuf = PyString_AsString(retval);
948 if (!retbuf)
949 goto finally;
951 for (i=j=0; i < arglen; i += 2) {
952 int top = to_int(Py_CHARMASK(argbuf[i]));
953 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
954 if (top == -1 || bot == -1) {
955 PyErr_SetString(PyExc_TypeError,
956 "Non-hexadecimal digit found");
957 goto finally;
959 retbuf[j++] = (top << 4) + bot;
961 return retval;
963 finally:
964 Py_DECREF(retval);
965 return NULL;
968 static char doc_unhexlify[] =
969 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
971 hexstr must contain an even number of hex digits (upper or lower case).\n\
972 This function is also available as \"unhexlify()\"";
975 /* List of functions defined in the module */
977 static struct PyMethodDef binascii_module_methods[] = {
978 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
979 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
980 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
981 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
982 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
983 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
984 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
985 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
986 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
987 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
988 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
989 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
990 doc_rledecode_hqx},
991 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
992 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
993 {NULL, NULL} /* sentinel */
997 /* Initialization function for the module (*must* be called initbinascii) */
998 static char doc_binascii[] = "Conversion between binary data and ASCII";
1000 DL_EXPORT(void)
1001 initbinascii(void)
1003 PyObject *m, *d, *x;
1005 /* Create the module and add the functions */
1006 m = Py_InitModule("binascii", binascii_module_methods);
1008 d = PyModule_GetDict(m);
1009 x = PyString_FromString(doc_binascii);
1010 PyDict_SetItemString(d, "__doc__", x);
1011 Py_XDECREF(x);
1013 Error = PyErr_NewException("binascii.Error", NULL, NULL);
1014 PyDict_SetItemString(d, "Error", Error);
1015 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1016 PyDict_SetItemString(d, "Incomplete", Incomplete);