- Got rid of newmodule.c
[python/dscho.git] / Modules / binascii.c
blob6cba688fc7a9bbb50adc143e836b6c2689b2f60f
1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
53 ** Brandon Long, September 2001.
57 #include "Python.h"
59 static PyObject *Error;
60 static PyObject *Incomplete;
63 ** hqx lookup table, ascii->binary.
66 #define RUNCHAR 0x90
68 #define DONE 0x7F
69 #define SKIP 0x7E
70 #define FAIL 0x7D
72 static unsigned char table_a2b_hqx[256] = {
73 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
74 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
75 /* \b \t \n ^K ^L \r ^N ^O */
76 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
77 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
78 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
80 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
81 /* ! " # $ % & ' */
82 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
83 /* ( ) * + , - . / */
84 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
85 /* 0 1 2 3 4 5 6 7 */
86 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
87 /* 8 9 : ; < = > ? */
88 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
89 /* @ A B C D E F G */
90 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
91 /* H I J K L M N O */
92 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
93 /* P Q R S T U V W */
94 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
95 /* X Y Z [ \ ] ^ _ */
96 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
97 /* ` a b c d e f g */
98 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
99 /* h i j k l m n o */
100 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
101 /* p q r s t u v w */
102 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
103 /* x y z { | } ~ ^? */
104 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
105 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 static unsigned char table_b2a_hqx[] =
124 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
126 static char table_a2b_base64[] = {
127 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
130 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
131 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
132 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
133 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
134 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
137 #define BASE64_PAD '='
139 /* Max binary chunk size; limited only by available memory */
140 #define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
142 static unsigned char table_b2a_base64[] =
143 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
147 static unsigned short crctab_hqx[256] = {
148 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
149 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
150 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
151 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
152 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
153 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
154 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
155 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
156 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
157 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
158 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
159 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
160 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
161 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
162 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
163 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
164 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
165 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
166 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
167 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
168 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
169 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
170 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
171 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
172 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
173 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
174 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
175 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
176 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
177 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
178 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
179 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
182 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
184 static PyObject *
185 binascii_a2b_uu(PyObject *self, PyObject *args)
187 unsigned char *ascii_data, *bin_data;
188 int leftbits = 0;
189 unsigned char this_ch;
190 unsigned int leftchar = 0;
191 PyObject *rv;
192 int ascii_len, bin_len;
194 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
195 return NULL;
197 /* First byte: binary data length (in bytes) */
198 bin_len = (*ascii_data++ - ' ') & 077;
199 ascii_len--;
201 /* Allocate the buffer */
202 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
203 return NULL;
204 bin_data = (unsigned char *)PyString_AsString(rv);
206 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
207 this_ch = *ascii_data;
208 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
210 ** Whitespace. Assume some spaces got eaten at
211 ** end-of-line. (We check this later)
213 this_ch = 0;
214 } else {
215 /* Check the character for legality
216 ** The 64 in stead of the expected 63 is because
217 ** there are a few uuencodes out there that use
218 ** '`' as zero instead of space.
220 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
221 PyErr_SetString(Error, "Illegal char");
222 Py_DECREF(rv);
223 return NULL;
225 this_ch = (this_ch - ' ') & 077;
228 ** Shift it in on the low end, and see if there's
229 ** a byte ready for output.
231 leftchar = (leftchar << 6) | (this_ch);
232 leftbits += 6;
233 if ( leftbits >= 8 ) {
234 leftbits -= 8;
235 *bin_data++ = (leftchar >> leftbits) & 0xff;
236 leftchar &= ((1 << leftbits) - 1);
237 bin_len--;
241 ** Finally, check that if there's anything left on the line
242 ** that it's whitespace only.
244 while( ascii_len-- > 0 ) {
245 this_ch = *ascii_data++;
246 /* Extra '`' may be written as padding in some cases */
247 if ( this_ch != ' ' && this_ch != ' '+64 &&
248 this_ch != '\n' && this_ch != '\r' ) {
249 PyErr_SetString(Error, "Trailing garbage");
250 Py_DECREF(rv);
251 return NULL;
254 return rv;
257 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
259 static PyObject *
260 binascii_b2a_uu(PyObject *self, PyObject *args)
262 unsigned char *ascii_data, *bin_data;
263 int leftbits = 0;
264 unsigned char this_ch;
265 unsigned int leftchar = 0;
266 PyObject *rv;
267 int bin_len;
269 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
270 return NULL;
271 if ( bin_len > 45 ) {
272 /* The 45 is a limit that appears in all uuencode's */
273 PyErr_SetString(Error, "At most 45 bytes at once");
274 return NULL;
277 /* We're lazy and allocate to much (fixed up later) */
278 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2)) == NULL )
279 return NULL;
280 ascii_data = (unsigned char *)PyString_AsString(rv);
282 /* Store the length */
283 *ascii_data++ = ' ' + (bin_len & 077);
285 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
286 /* Shift the data (or padding) into our buffer */
287 if ( bin_len > 0 ) /* Data */
288 leftchar = (leftchar << 8) | *bin_data;
289 else /* Padding */
290 leftchar <<= 8;
291 leftbits += 8;
293 /* See if there are 6-bit groups ready */
294 while ( leftbits >= 6 ) {
295 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
296 leftbits -= 6;
297 *ascii_data++ = this_ch + ' ';
300 *ascii_data++ = '\n'; /* Append a courtesy newline */
302 _PyString_Resize(&rv, (ascii_data -
303 (unsigned char *)PyString_AsString(rv)));
304 return rv;
308 static int
309 binascii_find_valid(unsigned char *s, int slen, int num)
311 /* Finds & returns the (num+1)th
312 ** valid character for base64, or -1 if none.
315 int ret = -1;
316 unsigned char c, b64val;
318 while ((slen > 0) && (ret == -1)) {
319 c = *s;
320 b64val = table_a2b_base64[c & 0x7f];
321 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
322 if (num == 0)
323 ret = *s;
324 num--;
327 s++;
328 slen--;
330 return ret;
333 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
335 static PyObject *
336 binascii_a2b_base64(PyObject *self, PyObject *args)
338 unsigned char *ascii_data, *bin_data;
339 int leftbits = 0;
340 unsigned char this_ch;
341 unsigned int leftchar = 0;
342 PyObject *rv;
343 int ascii_len, bin_len;
344 int quad_pos = 0;
346 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
347 return NULL;
349 if ( ascii_len == 0) {
350 PyErr_SetString(Error, "Cannot decode empty input");
351 return NULL;
353 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
355 /* Allocate the buffer */
356 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
357 return NULL;
358 bin_data = (unsigned char *)PyString_AsString(rv);
359 bin_len = 0;
361 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
362 this_ch = *ascii_data;
364 if (this_ch > 0x7f ||
365 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
366 continue;
368 /* Check for pad sequences and ignore
369 ** the invalid ones.
371 if (this_ch == BASE64_PAD) {
372 if ( (quad_pos < 2) ||
373 ((quad_pos == 2) &&
374 (binascii_find_valid(ascii_data, ascii_len, 1)
375 != BASE64_PAD)) )
377 continue;
379 else {
380 /* A pad sequence means no more input.
381 ** We've already interpreted the data
382 ** from the quad at this point.
384 leftbits = 0;
385 break;
389 this_ch = table_a2b_base64[*ascii_data];
390 if ( this_ch == (unsigned char) -1 )
391 continue;
394 ** Shift it in on the low end, and see if there's
395 ** a byte ready for output.
397 quad_pos = (quad_pos + 1) & 0x03;
398 leftchar = (leftchar << 6) | (this_ch);
399 leftbits += 6;
401 if ( leftbits >= 8 ) {
402 leftbits -= 8;
403 *bin_data++ = (leftchar >> leftbits) & 0xff;
404 bin_len++;
405 leftchar &= ((1 << leftbits) - 1);
409 if (leftbits != 0) {
410 PyErr_SetString(Error, "Incorrect padding");
411 Py_DECREF(rv);
412 return NULL;
415 /* and set string size correctly */
416 _PyString_Resize(&rv, bin_len);
417 return rv;
420 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
422 static PyObject *
423 binascii_b2a_base64(PyObject *self, PyObject *args)
425 unsigned char *ascii_data, *bin_data;
426 int leftbits = 0;
427 unsigned char this_ch;
428 unsigned int leftchar = 0;
429 PyObject *rv;
430 int bin_len;
432 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
433 return NULL;
434 if ( bin_len > BASE64_MAXBIN ) {
435 PyErr_SetString(Error, "Too much data for base64 line");
436 return NULL;
439 /* We're lazy and allocate too much (fixed up later).
440 "+3" leaves room for up to two pad characters and a trailing
441 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
442 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
443 return NULL;
444 ascii_data = (unsigned char *)PyString_AsString(rv);
446 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
447 /* Shift the data into our buffer */
448 leftchar = (leftchar << 8) | *bin_data;
449 leftbits += 8;
451 /* See if there are 6-bit groups ready */
452 while ( leftbits >= 6 ) {
453 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
454 leftbits -= 6;
455 *ascii_data++ = table_b2a_base64[this_ch];
458 if ( leftbits == 2 ) {
459 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
460 *ascii_data++ = BASE64_PAD;
461 *ascii_data++ = BASE64_PAD;
462 } else if ( leftbits == 4 ) {
463 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
464 *ascii_data++ = BASE64_PAD;
466 *ascii_data++ = '\n'; /* Append a courtesy newline */
468 _PyString_Resize(&rv, (ascii_data -
469 (unsigned char *)PyString_AsString(rv)));
470 return rv;
473 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
475 static PyObject *
476 binascii_a2b_hqx(PyObject *self, PyObject *args)
478 unsigned char *ascii_data, *bin_data;
479 int leftbits = 0;
480 unsigned char this_ch;
481 unsigned int leftchar = 0;
482 PyObject *rv;
483 int len;
484 int done = 0;
486 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
487 return NULL;
489 /* Allocate a string that is too big (fixed later) */
490 if ( (rv=PyString_FromStringAndSize(NULL, len)) == NULL )
491 return NULL;
492 bin_data = (unsigned char *)PyString_AsString(rv);
494 for( ; len > 0 ; len--, ascii_data++ ) {
495 /* Get the byte and look it up */
496 this_ch = table_a2b_hqx[*ascii_data];
497 if ( this_ch == SKIP )
498 continue;
499 if ( this_ch == FAIL ) {
500 PyErr_SetString(Error, "Illegal char");
501 Py_DECREF(rv);
502 return NULL;
504 if ( this_ch == DONE ) {
505 /* The terminating colon */
506 done = 1;
507 break;
510 /* Shift it into the buffer and see if any bytes are ready */
511 leftchar = (leftchar << 6) | (this_ch);
512 leftbits += 6;
513 if ( leftbits >= 8 ) {
514 leftbits -= 8;
515 *bin_data++ = (leftchar >> leftbits) & 0xff;
516 leftchar &= ((1 << leftbits) - 1);
520 if ( leftbits && !done ) {
521 PyErr_SetString(Incomplete,
522 "String has incomplete number of bytes");
523 Py_DECREF(rv);
524 return NULL;
526 _PyString_Resize(
527 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
528 if (rv) {
529 PyObject *rrv = Py_BuildValue("Oi", rv, done);
530 Py_DECREF(rv);
531 return rrv;
534 return NULL;
537 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
539 static PyObject *
540 binascii_rlecode_hqx(PyObject *self, PyObject *args)
542 unsigned char *in_data, *out_data;
543 PyObject *rv;
544 unsigned char ch;
545 int in, inend, len;
547 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
548 return NULL;
550 /* Worst case: output is twice as big as input (fixed later) */
551 if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
552 return NULL;
553 out_data = (unsigned char *)PyString_AsString(rv);
555 for( in=0; in<len; in++) {
556 ch = in_data[in];
557 if ( ch == RUNCHAR ) {
558 /* RUNCHAR. Escape it. */
559 *out_data++ = RUNCHAR;
560 *out_data++ = 0;
561 } else {
562 /* Check how many following are the same */
563 for(inend=in+1;
564 inend<len && in_data[inend] == ch &&
565 inend < in+255;
566 inend++) ;
567 if ( inend - in > 3 ) {
568 /* More than 3 in a row. Output RLE. */
569 *out_data++ = ch;
570 *out_data++ = RUNCHAR;
571 *out_data++ = inend-in;
572 in = inend-1;
573 } else {
574 /* Less than 3. Output the byte itself */
575 *out_data++ = ch;
579 _PyString_Resize(&rv, (out_data -
580 (unsigned char *)PyString_AsString(rv)));
581 return rv;
584 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
586 static PyObject *
587 binascii_b2a_hqx(PyObject *self, PyObject *args)
589 unsigned char *ascii_data, *bin_data;
590 int leftbits = 0;
591 unsigned char this_ch;
592 unsigned int leftchar = 0;
593 PyObject *rv;
594 int len;
596 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
597 return NULL;
599 /* Allocate a buffer that is at least large enough */
600 if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
601 return NULL;
602 ascii_data = (unsigned char *)PyString_AsString(rv);
604 for( ; len > 0 ; len--, bin_data++ ) {
605 /* Shift into our buffer, and output any 6bits ready */
606 leftchar = (leftchar << 8) | *bin_data;
607 leftbits += 8;
608 while ( leftbits >= 6 ) {
609 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
610 leftbits -= 6;
611 *ascii_data++ = table_b2a_hqx[this_ch];
614 /* Output a possible runt byte */
615 if ( leftbits ) {
616 leftchar <<= (6-leftbits);
617 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
619 _PyString_Resize(&rv, (ascii_data -
620 (unsigned char *)PyString_AsString(rv)));
621 return rv;
624 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
626 static PyObject *
627 binascii_rledecode_hqx(PyObject *self, PyObject *args)
629 unsigned char *in_data, *out_data;
630 unsigned char in_byte, in_repeat;
631 PyObject *rv;
632 int in_len, out_len, out_len_left;
634 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
635 return NULL;
637 /* Empty string is a special case */
638 if ( in_len == 0 )
639 return Py_BuildValue("s", "");
641 /* Allocate a buffer of reasonable size. Resized when needed */
642 out_len = in_len*2;
643 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
644 return NULL;
645 out_len_left = out_len;
646 out_data = (unsigned char *)PyString_AsString(rv);
649 ** We need two macros here to get/put bytes and handle
650 ** end-of-buffer for input and output strings.
652 #define INBYTE(b) \
653 do { \
654 if ( --in_len < 0 ) { \
655 PyErr_SetString(Incomplete, ""); \
656 Py_DECREF(rv); \
657 return NULL; \
659 b = *in_data++; \
660 } while(0)
662 #define OUTBYTE(b) \
663 do { \
664 if ( --out_len_left < 0 ) { \
665 _PyString_Resize(&rv, 2*out_len); \
666 if ( rv == NULL ) return NULL; \
667 out_data = (unsigned char *)PyString_AsString(rv) \
668 + out_len; \
669 out_len_left = out_len-1; \
670 out_len = out_len * 2; \
672 *out_data++ = b; \
673 } while(0)
676 ** Handle first byte separately (since we have to get angry
677 ** in case of an orphaned RLE code).
679 INBYTE(in_byte);
681 if (in_byte == RUNCHAR) {
682 INBYTE(in_repeat);
683 if (in_repeat != 0) {
684 /* Note Error, not Incomplete (which is at the end
685 ** of the string only). This is a programmer error.
687 PyErr_SetString(Error, "Orphaned RLE code at start");
688 Py_DECREF(rv);
689 return NULL;
691 OUTBYTE(RUNCHAR);
692 } else {
693 OUTBYTE(in_byte);
696 while( in_len > 0 ) {
697 INBYTE(in_byte);
699 if (in_byte == RUNCHAR) {
700 INBYTE(in_repeat);
701 if ( in_repeat == 0 ) {
702 /* Just an escaped RUNCHAR value */
703 OUTBYTE(RUNCHAR);
704 } else {
705 /* Pick up value and output a sequence of it */
706 in_byte = out_data[-1];
707 while ( --in_repeat > 0 )
708 OUTBYTE(in_byte);
710 } else {
711 /* Normal byte */
712 OUTBYTE(in_byte);
715 _PyString_Resize(&rv, (out_data -
716 (unsigned char *)PyString_AsString(rv)));
717 return rv;
720 PyDoc_STRVAR(doc_crc_hqx,
721 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
723 static PyObject *
724 binascii_crc_hqx(PyObject *self, PyObject *args)
726 unsigned char *bin_data;
727 unsigned int crc;
728 int len;
730 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
731 return NULL;
733 while(len--) {
734 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
737 return Py_BuildValue("i", crc);
740 PyDoc_STRVAR(doc_crc32,
741 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
743 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
744 Also known as: ISO 3307
745 **********************************************************************|
746 * *|
747 * Demonstration program to compute the 32-bit CRC used as the frame *|
748 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
749 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
750 * protocol). The 32-bit FCS was added via the Federal Register, *|
751 * 1 June 1982, p.23798. I presume but don't know for certain that *|
752 * this polynomial is or will be included in CCITT V.41, which *|
753 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
754 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
755 * errors by a factor of 10^-5 over 16-bit FCS. *|
756 * *|
757 **********************************************************************|
759 Copyright (C) 1986 Gary S. Brown. You may use this program, or
760 code or tables extracted from it, as desired without restriction.
762 First, the polynomial itself and its table of feedback terms. The
763 polynomial is
764 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
765 Note that we take it "backwards" and put the highest-order term in
766 the lowest-order bit. The X^32 term is "implied"; the LSB is the
767 X^31 term, etc. The X^0 term (usually shown as "+1") results in
768 the MSB being 1.
770 Note that the usual hardware shift register implementation, which
771 is what we're using (we're merely optimizing it by doing eight-bit
772 chunks at a time) shifts bits into the lowest-order term. In our
773 implementation, that means shifting towards the right. Why do we
774 do it this way? Because the calculated CRC must be transmitted in
775 order from highest-order term to lowest-order term. UARTs transmit
776 characters in order from LSB to MSB. By storing the CRC this way,
777 we hand it to the UART in the order low-byte to high-byte; the UART
778 sends each low-bit to hight-bit; and the result is transmission bit
779 by bit from highest- to lowest-order term without requiring any bit
780 shuffling on our part. Reception works similarly.
782 The feedback terms table consists of 256, 32-bit entries. Notes:
784 1. The table can be generated at runtime if desired; code to do so
785 is shown later. It might not be obvious, but the feedback
786 terms simply represent the results of eight shift/xor opera-
787 tions for all combinations of data and CRC register values.
789 2. The CRC accumulation logic is the same for all CRC polynomials,
790 be they sixteen or thirty-two bits wide. You simply choose the
791 appropriate table. Alternatively, because the table can be
792 generated at runtime, you can start by generating the table for
793 the polynomial in question and use exactly the same "updcrc",
794 if your application needn't simultaneously handle two CRC
795 polynomials. (Note, however, that XMODEM is strange.)
797 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
798 of course, 32-bit entries work OK if the high 16 bits are zero.
800 4. The values must be right-shifted by eight bits by the "updcrc"
801 logic; the shift must be unsigned (bring in zeroes). On some
802 hardware you could probably optimize the shift in assembler by
803 using byte-swap instructions.
804 ********************************************************************/
806 static unsigned long crc_32_tab[256] = {
807 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
808 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
809 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
810 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
811 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
812 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
813 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
814 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
815 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
816 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
817 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
818 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
819 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
820 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
821 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
822 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
823 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
824 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
825 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
826 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
827 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
828 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
829 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
830 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
831 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
832 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
833 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
834 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
835 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
836 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
837 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
838 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
839 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
840 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
841 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
842 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
843 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
844 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
845 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
846 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
847 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
848 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
849 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
850 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
851 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
852 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
853 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
854 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
855 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
856 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
857 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
858 0x2d02ef8dUL
861 static PyObject *
862 binascii_crc32(PyObject *self, PyObject *args)
863 { /* By Jim Ahlstrom; All rights transferred to CNRI */
864 unsigned char *bin_data;
865 unsigned long crc = 0UL; /* initial value of CRC */
866 int len;
868 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
869 return NULL;
871 crc = crc ^ 0xFFFFFFFFUL;
872 while(len--)
873 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
874 /* Note: (crc >> 8) MUST zero fill on left */
875 return Py_BuildValue("l", crc ^ 0xFFFFFFFFUL);
879 static PyObject *
880 binascii_hexlify(PyObject *self, PyObject *args)
882 char* argbuf;
883 int arglen;
884 PyObject *retval;
885 char* retbuf;
886 int i, j;
888 if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen))
889 return NULL;
891 retval = PyString_FromStringAndSize(NULL, arglen*2);
892 if (!retval)
893 return NULL;
894 retbuf = PyString_AsString(retval);
895 if (!retbuf)
896 goto finally;
898 /* make hex version of string, taken from shamodule.c */
899 for (i=j=0; i < arglen; i++) {
900 char c;
901 c = (argbuf[i] >> 4) & 0xf;
902 c = (c>9) ? c+'a'-10 : c + '0';
903 retbuf[j++] = c;
904 c = argbuf[i] & 0xf;
905 c = (c>9) ? c+'a'-10 : c + '0';
906 retbuf[j++] = c;
908 return retval;
910 finally:
911 Py_DECREF(retval);
912 return NULL;
915 PyDoc_STRVAR(doc_hexlify,
916 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
918 This function is also available as \"hexlify()\".");
921 static int
922 to_int(int c)
924 if (isdigit(c))
925 return c - '0';
926 else {
927 if (isupper(c))
928 c = tolower(c);
929 if (c >= 'a' && c <= 'f')
930 return c - 'a' + 10;
932 return -1;
936 static PyObject *
937 binascii_unhexlify(PyObject *self, PyObject *args)
939 char* argbuf;
940 int arglen;
941 PyObject *retval;
942 char* retbuf;
943 int i, j;
945 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
946 return NULL;
948 /* XXX What should we do about strings with an odd length? Should
949 * we add an implicit leading zero, or a trailing zero? For now,
950 * raise an exception.
952 if (arglen % 2) {
953 PyErr_SetString(PyExc_TypeError, "Odd-length string");
954 return NULL;
957 retval = PyString_FromStringAndSize(NULL, (arglen/2));
958 if (!retval)
959 return NULL;
960 retbuf = PyString_AsString(retval);
961 if (!retbuf)
962 goto finally;
964 for (i=j=0; i < arglen; i += 2) {
965 int top = to_int(Py_CHARMASK(argbuf[i]));
966 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
967 if (top == -1 || bot == -1) {
968 PyErr_SetString(PyExc_TypeError,
969 "Non-hexadecimal digit found");
970 goto finally;
972 retbuf[j++] = (top << 4) + bot;
974 return retval;
976 finally:
977 Py_DECREF(retval);
978 return NULL;
981 PyDoc_STRVAR(doc_unhexlify,
982 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
984 hexstr must contain an even number of hex digits (upper or lower case).\n\
985 This function is also available as \"unhexlify()\"");
987 static int table_hex[128] = {
988 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
989 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
990 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
991 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
992 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
993 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
994 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
995 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
998 #define hexval(c) table_hex[(unsigned int)(c)]
1000 #define MAXLINESIZE 76
1002 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1004 static PyObject*
1005 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1007 unsigned int in, out;
1008 char ch;
1009 unsigned char *data, *odata;
1010 unsigned int datalen = 0;
1011 PyObject *rv;
1012 static char *kwlist[] = {"data", "header", NULL};
1013 int header = 0;
1015 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1016 &datalen, &header))
1017 return NULL;
1019 /* We allocate the output same size as input, this is overkill */
1020 odata = (unsigned char *) calloc(1, datalen);
1022 if (odata == NULL) {
1023 PyErr_NoMemory();
1024 return NULL;
1027 in = out = 0;
1028 while (in < datalen) {
1029 if (data[in] == '=') {
1030 in++;
1031 if (in >= datalen) break;
1032 /* Soft line breaks */
1033 if ((data[in] == '\n') || (data[in] == '\r') ||
1034 (data[in] == ' ') || (data[in] == '\t')) {
1035 if (data[in] != '\n') {
1036 while (in < datalen && data[in] != '\n') in++;
1038 if (in < datalen) in++;
1040 else if (data[in] == '=') {
1041 /* broken case from broken python qp */
1042 odata[out++] = '=';
1043 in++;
1045 else if (((data[in] >= 'A' && data[in] <= 'F') ||
1046 (data[in] >= 'a' && data[in] <= 'f') ||
1047 (data[in] >= '0' && data[in] <= '9')) &&
1048 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1049 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1050 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1051 /* hexval */
1052 ch = hexval(data[in]) << 4;
1053 in++;
1054 ch |= hexval(data[in]);
1055 in++;
1056 odata[out++] = ch;
1058 else {
1059 odata[out++] = '=';
1062 else if (header && data[in] == '_') {
1063 odata[out++] = ' ';
1064 in++;
1066 else {
1067 odata[out] = data[in];
1068 in++;
1069 out++;
1072 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1073 free (odata);
1074 return NULL;
1076 free (odata);
1077 return rv;
1080 static int
1081 to_hex (unsigned char ch, unsigned char *s)
1083 unsigned int uvalue = ch;
1085 s[1] = "0123456789ABCDEF"[uvalue % 16];
1086 uvalue = (uvalue / 16);
1087 s[0] = "0123456789ABCDEF"[uvalue % 16];
1088 return 0;
1091 PyDoc_STRVAR(doc_b2a_qp,
1092 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1093 Encode a string using quoted-printable encoding. \n\
1095 On encoding, when istext is set, newlines are not encoded, and white \n\
1096 space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
1097 both encoded. When quotetabs is set, space and tabs are encoded.");
1099 /* XXX: This is ridiculously complicated to be backward compatible
1100 * (mostly) with the quopri module. It doesn't re-create the quopri
1101 * module bug where text ending in CRLF has the CR encoded */
1102 static PyObject*
1103 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1105 unsigned int in, out;
1106 unsigned char *data, *odata;
1107 unsigned int datalen = 0, odatalen = 0;
1108 PyObject *rv;
1109 unsigned int linelen = 0;
1110 static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
1111 int istext = 1;
1112 int quotetabs = 0;
1113 int header = 0;
1114 unsigned char ch;
1115 int crlf = 0;
1116 unsigned char *p;
1118 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1119 &datalen, &quotetabs, &istext, &header))
1120 return NULL;
1122 /* See if this string is using CRLF line ends */
1123 /* XXX: this function has the side effect of converting all of
1124 * the end of lines to be the same depending on this detection
1125 * here */
1126 p = (unsigned char *) strchr((char *)data, '\n');
1127 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1128 crlf = 1;
1130 /* First, scan to see how many characters need to be encoded */
1131 in = 0;
1132 while (in < datalen) {
1133 if ((data[in] > 126) ||
1134 (data[in] == '=') ||
1135 (header && data[in] == '_') ||
1136 ((data[in] == '.') && (linelen == 1)) ||
1137 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1138 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1139 ((data[in] < 33) &&
1140 (data[in] != '\r') && (data[in] != '\n') &&
1141 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1143 if ((linelen + 3) >= MAXLINESIZE) {
1144 linelen = 0;
1145 if (crlf)
1146 odatalen += 3;
1147 else
1148 odatalen += 2;
1150 linelen += 3;
1151 odatalen += 3;
1152 in++;
1154 else {
1155 if (istext &&
1156 ((data[in] == '\n') ||
1157 ((in+1 < datalen) && (data[in] == '\r') &&
1158 (data[in+1] == '\n'))))
1160 linelen = 0;
1161 /* Protect against whitespace on end of line */
1162 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1163 odatalen += 2;
1164 if (crlf)
1165 odatalen += 2;
1166 else
1167 odatalen += 1;
1168 if (data[in] == '\r')
1169 in += 2;
1170 else
1171 in++;
1173 else {
1174 if ((in + 1 != datalen) &&
1175 (data[in+1] != '\n') &&
1176 (linelen + 1) >= MAXLINESIZE) {
1177 linelen = 0;
1178 if (crlf)
1179 odatalen += 3;
1180 else
1181 odatalen += 2;
1183 linelen++;
1184 odatalen++;
1185 in++;
1190 odata = (unsigned char *) calloc(1, odatalen);
1192 if (odata == NULL) {
1193 PyErr_NoMemory();
1194 return NULL;
1197 in = out = linelen = 0;
1198 while (in < datalen) {
1199 if ((data[in] > 126) ||
1200 (data[in] == '=') ||
1201 (header && data[in] == '_') ||
1202 ((data[in] == '.') && (linelen == 1)) ||
1203 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1204 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1205 ((data[in] < 33) &&
1206 (data[in] != '\r') && (data[in] != '\n') &&
1207 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1209 if ((linelen + 3 )>= MAXLINESIZE) {
1210 odata[out++] = '=';
1211 if (crlf) odata[out++] = '\r';
1212 odata[out++] = '\n';
1213 linelen = 0;
1215 odata[out++] = '=';
1216 to_hex(data[in], &odata[out]);
1217 out += 2;
1218 in++;
1219 linelen += 3;
1221 else {
1222 if (istext &&
1223 ((data[in] == '\n') ||
1224 ((in+1 < datalen) && (data[in] == '\r') &&
1225 (data[in+1] == '\n'))))
1227 linelen = 0;
1228 /* Protect against whitespace on end of line */
1229 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1230 ch = odata[out-1];
1231 odata[out-1] = '=';
1232 to_hex(ch, &odata[out]);
1233 out += 2;
1236 if (crlf) odata[out++] = '\r';
1237 odata[out++] = '\n';
1238 if (data[in] == '\r')
1239 in += 2;
1240 else
1241 in++;
1243 else {
1244 if ((in + 1 != datalen) &&
1245 (data[in+1] != '\n') &&
1246 (linelen + 1) >= MAXLINESIZE) {
1247 odata[out++] = '=';
1248 if (crlf) odata[out++] = '\r';
1249 odata[out++] = '\n';
1250 linelen = 0;
1252 linelen++;
1253 if (header && data[in] == ' ') {
1254 odata[out++] = '_';
1255 in++;
1257 else {
1258 odata[out++] = data[in++];
1263 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1264 free (odata);
1265 return NULL;
1267 free (odata);
1268 return rv;
1271 /* List of functions defined in the module */
1273 static struct PyMethodDef binascii_module_methods[] = {
1274 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1275 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1276 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1277 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1278 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1279 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1280 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1281 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1282 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1283 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1284 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1285 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1286 doc_rledecode_hqx},
1287 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1288 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1289 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1290 doc_a2b_qp},
1291 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1292 doc_b2a_qp},
1293 {NULL, NULL} /* sentinel */
1297 /* Initialization function for the module (*must* be called initbinascii) */
1298 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1300 DL_EXPORT(void)
1301 initbinascii(void)
1303 PyObject *m, *d, *x;
1305 /* Create the module and add the functions */
1306 m = Py_InitModule("binascii", binascii_module_methods);
1308 d = PyModule_GetDict(m);
1309 x = PyString_FromString(doc_binascii);
1310 PyDict_SetItemString(d, "__doc__", x);
1311 Py_XDECREF(x);
1313 Error = PyErr_NewException("binascii.Error", NULL, NULL);
1314 PyDict_SetItemString(d, "Error", Error);
1315 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1316 PyDict_SetItemString(d, "Incomplete", Incomplete);