More installation info. Bump alpha version.
[python/dscho.git] / Modules / binascii.c
blobc56d528ef47f28bbf20e530eb9bea4497c09d41a
1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
44 ** Jack Jansen, CWI, July 1995.
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
53 ** Brandon Long, September 2001.
57 #include "Python.h"
59 static PyObject *Error;
60 static PyObject *Incomplete;
63 ** hqx lookup table, ascii->binary.
66 #define RUNCHAR 0x90
68 #define DONE 0x7F
69 #define SKIP 0x7E
70 #define FAIL 0x7D
72 static unsigned char table_a2b_hqx[256] = {
73 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
74 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
75 /* \b \t \n ^K ^L \r ^N ^O */
76 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
77 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
78 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
80 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
81 /* ! " # $ % & ' */
82 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
83 /* ( ) * + , - . / */
84 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
85 /* 0 1 2 3 4 5 6 7 */
86 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
87 /* 8 9 : ; < = > ? */
88 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
89 /* @ A B C D E F G */
90 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
91 /* H I J K L M N O */
92 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
93 /* P Q R S T U V W */
94 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
95 /* X Y Z [ \ ] ^ _ */
96 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
97 /* ` a b c d e f g */
98 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
99 /* h i j k l m n o */
100 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
101 /* p q r s t u v w */
102 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
103 /* x y z { | } ~ ^? */
104 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
105 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 static unsigned char table_b2a_hqx[] =
124 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
126 static char table_a2b_base64[] = {
127 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
130 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
131 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
132 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
133 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
134 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
137 #define BASE64_PAD '='
139 /* Max binary chunk size; limited only by available memory */
140 #define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
142 static unsigned char table_b2a_base64[] =
143 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
147 static unsigned short crctab_hqx[256] = {
148 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
149 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
150 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
151 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
152 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
153 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
154 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
155 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
156 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
157 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
158 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
159 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
160 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
161 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
162 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
163 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
164 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
165 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
166 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
167 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
168 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
169 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
170 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
171 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
172 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
173 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
174 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
175 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
176 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
177 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
178 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
179 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
182 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
184 static PyObject *
185 binascii_a2b_uu(PyObject *self, PyObject *args)
187 unsigned char *ascii_data, *bin_data;
188 int leftbits = 0;
189 unsigned char this_ch;
190 unsigned int leftchar = 0;
191 PyObject *rv;
192 int ascii_len, bin_len;
194 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
195 return NULL;
197 /* First byte: binary data length (in bytes) */
198 bin_len = (*ascii_data++ - ' ') & 077;
199 ascii_len--;
201 /* Allocate the buffer */
202 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
203 return NULL;
204 bin_data = (unsigned char *)PyString_AsString(rv);
206 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
207 this_ch = *ascii_data;
208 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
210 ** Whitespace. Assume some spaces got eaten at
211 ** end-of-line. (We check this later)
213 this_ch = 0;
214 } else {
215 /* Check the character for legality
216 ** The 64 in stead of the expected 63 is because
217 ** there are a few uuencodes out there that use
218 ** '`' as zero instead of space.
220 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
221 PyErr_SetString(Error, "Illegal char");
222 Py_DECREF(rv);
223 return NULL;
225 this_ch = (this_ch - ' ') & 077;
228 ** Shift it in on the low end, and see if there's
229 ** a byte ready for output.
231 leftchar = (leftchar << 6) | (this_ch);
232 leftbits += 6;
233 if ( leftbits >= 8 ) {
234 leftbits -= 8;
235 *bin_data++ = (leftchar >> leftbits) & 0xff;
236 leftchar &= ((1 << leftbits) - 1);
237 bin_len--;
241 ** Finally, check that if there's anything left on the line
242 ** that it's whitespace only.
244 while( ascii_len-- > 0 ) {
245 this_ch = *ascii_data++;
246 /* Extra '`' may be written as padding in some cases */
247 if ( this_ch != ' ' && this_ch != ' '+64 &&
248 this_ch != '\n' && this_ch != '\r' ) {
249 PyErr_SetString(Error, "Trailing garbage");
250 Py_DECREF(rv);
251 return NULL;
254 return rv;
257 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
259 static PyObject *
260 binascii_b2a_uu(PyObject *self, PyObject *args)
262 unsigned char *ascii_data, *bin_data;
263 int leftbits = 0;
264 unsigned char this_ch;
265 unsigned int leftchar = 0;
266 PyObject *rv;
267 int bin_len;
269 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
270 return NULL;
271 if ( bin_len > 45 ) {
272 /* The 45 is a limit that appears in all uuencode's */
273 PyErr_SetString(Error, "At most 45 bytes at once");
274 return NULL;
277 /* We're lazy and allocate to much (fixed up later) */
278 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2)) == NULL )
279 return NULL;
280 ascii_data = (unsigned char *)PyString_AsString(rv);
282 /* Store the length */
283 *ascii_data++ = ' ' + (bin_len & 077);
285 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
286 /* Shift the data (or padding) into our buffer */
287 if ( bin_len > 0 ) /* Data */
288 leftchar = (leftchar << 8) | *bin_data;
289 else /* Padding */
290 leftchar <<= 8;
291 leftbits += 8;
293 /* See if there are 6-bit groups ready */
294 while ( leftbits >= 6 ) {
295 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
296 leftbits -= 6;
297 *ascii_data++ = this_ch + ' ';
300 *ascii_data++ = '\n'; /* Append a courtesy newline */
302 _PyString_Resize(&rv, (ascii_data -
303 (unsigned char *)PyString_AsString(rv)));
304 return rv;
308 static int
309 binascii_find_valid(unsigned char *s, int slen, int num)
311 /* Finds & returns the (num+1)th
312 ** valid character for base64, or -1 if none.
315 int ret = -1;
316 unsigned char c, b64val;
318 while ((slen > 0) && (ret == -1)) {
319 c = *s;
320 b64val = table_a2b_base64[c & 0x7f];
321 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
322 if (num == 0)
323 ret = *s;
324 num--;
327 s++;
328 slen--;
330 return ret;
333 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
335 static PyObject *
336 binascii_a2b_base64(PyObject *self, PyObject *args)
338 unsigned char *ascii_data, *bin_data;
339 int leftbits = 0;
340 unsigned char this_ch;
341 unsigned int leftchar = 0;
342 PyObject *rv;
343 int ascii_len, bin_len;
344 int quad_pos = 0;
346 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
347 return NULL;
349 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
351 /* Allocate the buffer */
352 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
353 return NULL;
354 bin_data = (unsigned char *)PyString_AsString(rv);
355 bin_len = 0;
357 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
358 this_ch = *ascii_data;
360 if (this_ch > 0x7f ||
361 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
362 continue;
364 /* Check for pad sequences and ignore
365 ** the invalid ones.
367 if (this_ch == BASE64_PAD) {
368 if ( (quad_pos < 2) ||
369 ((quad_pos == 2) &&
370 (binascii_find_valid(ascii_data, ascii_len, 1)
371 != BASE64_PAD)) )
373 continue;
375 else {
376 /* A pad sequence means no more input.
377 ** We've already interpreted the data
378 ** from the quad at this point.
380 leftbits = 0;
381 break;
385 this_ch = table_a2b_base64[*ascii_data];
386 if ( this_ch == (unsigned char) -1 )
387 continue;
390 ** Shift it in on the low end, and see if there's
391 ** a byte ready for output.
393 quad_pos = (quad_pos + 1) & 0x03;
394 leftchar = (leftchar << 6) | (this_ch);
395 leftbits += 6;
397 if ( leftbits >= 8 ) {
398 leftbits -= 8;
399 *bin_data++ = (leftchar >> leftbits) & 0xff;
400 bin_len++;
401 leftchar &= ((1 << leftbits) - 1);
405 if (leftbits != 0) {
406 PyErr_SetString(Error, "Incorrect padding");
407 Py_DECREF(rv);
408 return NULL;
411 /* and set string size correctly */
412 if (bin_len > 0)
413 _PyString_Resize(&rv, bin_len);
414 return rv;
417 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
419 static PyObject *
420 binascii_b2a_base64(PyObject *self, PyObject *args)
422 unsigned char *ascii_data, *bin_data;
423 int leftbits = 0;
424 unsigned char this_ch;
425 unsigned int leftchar = 0;
426 PyObject *rv;
427 int bin_len;
429 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
430 return NULL;
431 if ( bin_len > BASE64_MAXBIN ) {
432 PyErr_SetString(Error, "Too much data for base64 line");
433 return NULL;
436 /* We're lazy and allocate too much (fixed up later).
437 "+3" leaves room for up to two pad characters and a trailing
438 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
439 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
440 return NULL;
441 ascii_data = (unsigned char *)PyString_AsString(rv);
443 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
444 /* Shift the data into our buffer */
445 leftchar = (leftchar << 8) | *bin_data;
446 leftbits += 8;
448 /* See if there are 6-bit groups ready */
449 while ( leftbits >= 6 ) {
450 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
451 leftbits -= 6;
452 *ascii_data++ = table_b2a_base64[this_ch];
455 if ( leftbits == 2 ) {
456 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
457 *ascii_data++ = BASE64_PAD;
458 *ascii_data++ = BASE64_PAD;
459 } else if ( leftbits == 4 ) {
460 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
461 *ascii_data++ = BASE64_PAD;
463 *ascii_data++ = '\n'; /* Append a courtesy newline */
465 _PyString_Resize(&rv, (ascii_data -
466 (unsigned char *)PyString_AsString(rv)));
467 return rv;
470 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
472 static PyObject *
473 binascii_a2b_hqx(PyObject *self, PyObject *args)
475 unsigned char *ascii_data, *bin_data;
476 int leftbits = 0;
477 unsigned char this_ch;
478 unsigned int leftchar = 0;
479 PyObject *rv;
480 int len;
481 int done = 0;
483 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
484 return NULL;
486 /* Allocate a string that is too big (fixed later) */
487 if ( (rv=PyString_FromStringAndSize(NULL, len)) == NULL )
488 return NULL;
489 bin_data = (unsigned char *)PyString_AsString(rv);
491 for( ; len > 0 ; len--, ascii_data++ ) {
492 /* Get the byte and look it up */
493 this_ch = table_a2b_hqx[*ascii_data];
494 if ( this_ch == SKIP )
495 continue;
496 if ( this_ch == FAIL ) {
497 PyErr_SetString(Error, "Illegal char");
498 Py_DECREF(rv);
499 return NULL;
501 if ( this_ch == DONE ) {
502 /* The terminating colon */
503 done = 1;
504 break;
507 /* Shift it into the buffer and see if any bytes are ready */
508 leftchar = (leftchar << 6) | (this_ch);
509 leftbits += 6;
510 if ( leftbits >= 8 ) {
511 leftbits -= 8;
512 *bin_data++ = (leftchar >> leftbits) & 0xff;
513 leftchar &= ((1 << leftbits) - 1);
517 if ( leftbits && !done ) {
518 PyErr_SetString(Incomplete,
519 "String has incomplete number of bytes");
520 Py_DECREF(rv);
521 return NULL;
523 _PyString_Resize(
524 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
525 if (rv) {
526 PyObject *rrv = Py_BuildValue("Oi", rv, done);
527 Py_DECREF(rv);
528 return rrv;
531 return NULL;
534 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
536 static PyObject *
537 binascii_rlecode_hqx(PyObject *self, PyObject *args)
539 unsigned char *in_data, *out_data;
540 PyObject *rv;
541 unsigned char ch;
542 int in, inend, len;
544 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
545 return NULL;
547 /* Worst case: output is twice as big as input (fixed later) */
548 if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
549 return NULL;
550 out_data = (unsigned char *)PyString_AsString(rv);
552 for( in=0; in<len; in++) {
553 ch = in_data[in];
554 if ( ch == RUNCHAR ) {
555 /* RUNCHAR. Escape it. */
556 *out_data++ = RUNCHAR;
557 *out_data++ = 0;
558 } else {
559 /* Check how many following are the same */
560 for(inend=in+1;
561 inend<len && in_data[inend] == ch &&
562 inend < in+255;
563 inend++) ;
564 if ( inend - in > 3 ) {
565 /* More than 3 in a row. Output RLE. */
566 *out_data++ = ch;
567 *out_data++ = RUNCHAR;
568 *out_data++ = inend-in;
569 in = inend-1;
570 } else {
571 /* Less than 3. Output the byte itself */
572 *out_data++ = ch;
576 _PyString_Resize(&rv, (out_data -
577 (unsigned char *)PyString_AsString(rv)));
578 return rv;
581 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
583 static PyObject *
584 binascii_b2a_hqx(PyObject *self, PyObject *args)
586 unsigned char *ascii_data, *bin_data;
587 int leftbits = 0;
588 unsigned char this_ch;
589 unsigned int leftchar = 0;
590 PyObject *rv;
591 int len;
593 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
594 return NULL;
596 /* Allocate a buffer that is at least large enough */
597 if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
598 return NULL;
599 ascii_data = (unsigned char *)PyString_AsString(rv);
601 for( ; len > 0 ; len--, bin_data++ ) {
602 /* Shift into our buffer, and output any 6bits ready */
603 leftchar = (leftchar << 8) | *bin_data;
604 leftbits += 8;
605 while ( leftbits >= 6 ) {
606 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
607 leftbits -= 6;
608 *ascii_data++ = table_b2a_hqx[this_ch];
611 /* Output a possible runt byte */
612 if ( leftbits ) {
613 leftchar <<= (6-leftbits);
614 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
616 _PyString_Resize(&rv, (ascii_data -
617 (unsigned char *)PyString_AsString(rv)));
618 return rv;
621 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
623 static PyObject *
624 binascii_rledecode_hqx(PyObject *self, PyObject *args)
626 unsigned char *in_data, *out_data;
627 unsigned char in_byte, in_repeat;
628 PyObject *rv;
629 int in_len, out_len, out_len_left;
631 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
632 return NULL;
634 /* Empty string is a special case */
635 if ( in_len == 0 )
636 return Py_BuildValue("s", "");
638 /* Allocate a buffer of reasonable size. Resized when needed */
639 out_len = in_len*2;
640 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
641 return NULL;
642 out_len_left = out_len;
643 out_data = (unsigned char *)PyString_AsString(rv);
646 ** We need two macros here to get/put bytes and handle
647 ** end-of-buffer for input and output strings.
649 #define INBYTE(b) \
650 do { \
651 if ( --in_len < 0 ) { \
652 PyErr_SetString(Incomplete, ""); \
653 Py_DECREF(rv); \
654 return NULL; \
656 b = *in_data++; \
657 } while(0)
659 #define OUTBYTE(b) \
660 do { \
661 if ( --out_len_left < 0 ) { \
662 _PyString_Resize(&rv, 2*out_len); \
663 if ( rv == NULL ) return NULL; \
664 out_data = (unsigned char *)PyString_AsString(rv) \
665 + out_len; \
666 out_len_left = out_len-1; \
667 out_len = out_len * 2; \
669 *out_data++ = b; \
670 } while(0)
673 ** Handle first byte separately (since we have to get angry
674 ** in case of an orphaned RLE code).
676 INBYTE(in_byte);
678 if (in_byte == RUNCHAR) {
679 INBYTE(in_repeat);
680 if (in_repeat != 0) {
681 /* Note Error, not Incomplete (which is at the end
682 ** of the string only). This is a programmer error.
684 PyErr_SetString(Error, "Orphaned RLE code at start");
685 Py_DECREF(rv);
686 return NULL;
688 OUTBYTE(RUNCHAR);
689 } else {
690 OUTBYTE(in_byte);
693 while( in_len > 0 ) {
694 INBYTE(in_byte);
696 if (in_byte == RUNCHAR) {
697 INBYTE(in_repeat);
698 if ( in_repeat == 0 ) {
699 /* Just an escaped RUNCHAR value */
700 OUTBYTE(RUNCHAR);
701 } else {
702 /* Pick up value and output a sequence of it */
703 in_byte = out_data[-1];
704 while ( --in_repeat > 0 )
705 OUTBYTE(in_byte);
707 } else {
708 /* Normal byte */
709 OUTBYTE(in_byte);
712 _PyString_Resize(&rv, (out_data -
713 (unsigned char *)PyString_AsString(rv)));
714 return rv;
717 PyDoc_STRVAR(doc_crc_hqx,
718 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
720 static PyObject *
721 binascii_crc_hqx(PyObject *self, PyObject *args)
723 unsigned char *bin_data;
724 unsigned int crc;
725 int len;
727 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
728 return NULL;
730 while(len--) {
731 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
734 return Py_BuildValue("i", crc);
737 PyDoc_STRVAR(doc_crc32,
738 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
740 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
741 Also known as: ISO 3307
742 **********************************************************************|
743 * *|
744 * Demonstration program to compute the 32-bit CRC used as the frame *|
745 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
746 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
747 * protocol). The 32-bit FCS was added via the Federal Register, *|
748 * 1 June 1982, p.23798. I presume but don't know for certain that *|
749 * this polynomial is or will be included in CCITT V.41, which *|
750 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
751 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
752 * errors by a factor of 10^-5 over 16-bit FCS. *|
753 * *|
754 **********************************************************************|
756 Copyright (C) 1986 Gary S. Brown. You may use this program, or
757 code or tables extracted from it, as desired without restriction.
759 First, the polynomial itself and its table of feedback terms. The
760 polynomial is
761 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
762 Note that we take it "backwards" and put the highest-order term in
763 the lowest-order bit. The X^32 term is "implied"; the LSB is the
764 X^31 term, etc. The X^0 term (usually shown as "+1") results in
765 the MSB being 1.
767 Note that the usual hardware shift register implementation, which
768 is what we're using (we're merely optimizing it by doing eight-bit
769 chunks at a time) shifts bits into the lowest-order term. In our
770 implementation, that means shifting towards the right. Why do we
771 do it this way? Because the calculated CRC must be transmitted in
772 order from highest-order term to lowest-order term. UARTs transmit
773 characters in order from LSB to MSB. By storing the CRC this way,
774 we hand it to the UART in the order low-byte to high-byte; the UART
775 sends each low-bit to hight-bit; and the result is transmission bit
776 by bit from highest- to lowest-order term without requiring any bit
777 shuffling on our part. Reception works similarly.
779 The feedback terms table consists of 256, 32-bit entries. Notes:
781 1. The table can be generated at runtime if desired; code to do so
782 is shown later. It might not be obvious, but the feedback
783 terms simply represent the results of eight shift/xor opera-
784 tions for all combinations of data and CRC register values.
786 2. The CRC accumulation logic is the same for all CRC polynomials,
787 be they sixteen or thirty-two bits wide. You simply choose the
788 appropriate table. Alternatively, because the table can be
789 generated at runtime, you can start by generating the table for
790 the polynomial in question and use exactly the same "updcrc",
791 if your application needn't simultaneously handle two CRC
792 polynomials. (Note, however, that XMODEM is strange.)
794 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
795 of course, 32-bit entries work OK if the high 16 bits are zero.
797 4. The values must be right-shifted by eight bits by the "updcrc"
798 logic; the shift must be unsigned (bring in zeroes). On some
799 hardware you could probably optimize the shift in assembler by
800 using byte-swap instructions.
801 ********************************************************************/
803 static unsigned long crc_32_tab[256] = {
804 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
805 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
806 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
807 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
808 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
809 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
810 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
811 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
812 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
813 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
814 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
815 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
816 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
817 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
818 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
819 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
820 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
821 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
822 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
823 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
824 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
825 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
826 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
827 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
828 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
829 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
830 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
831 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
832 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
833 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
834 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
835 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
836 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
837 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
838 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
839 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
840 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
841 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
842 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
843 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
844 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
845 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
846 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
847 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
848 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
849 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
850 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
851 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
852 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
853 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
854 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
855 0x2d02ef8dUL
858 static PyObject *
859 binascii_crc32(PyObject *self, PyObject *args)
860 { /* By Jim Ahlstrom; All rights transferred to CNRI */
861 unsigned char *bin_data;
862 unsigned long crc = 0UL; /* initial value of CRC */
863 int len;
864 long result;
866 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
867 return NULL;
869 crc = ~ crc;
870 #if SIZEOF_LONG > 4
871 /* only want the trailing 32 bits */
872 crc &= 0xFFFFFFFFUL;
873 #endif
874 while (len--)
875 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
876 /* Note: (crc >> 8) MUST zero fill on left */
878 result = (long)(crc ^ 0xFFFFFFFFUL);
879 #if SIZEOF_LONG > 4
880 /* Extend the sign bit. This is one way to ensure the result is the
881 * same across platforms. The other way would be to return an
882 * unbounded unsigned long, but the evidence suggests that lots of
883 * code outside this treats the result as if it were a signed 4-byte
884 * integer.
886 result |= -(result & (1L << 31));
887 #endif
888 return PyInt_FromLong(result);
892 static PyObject *
893 binascii_hexlify(PyObject *self, PyObject *args)
895 char* argbuf;
896 int arglen;
897 PyObject *retval;
898 char* retbuf;
899 int i, j;
901 if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen))
902 return NULL;
904 retval = PyString_FromStringAndSize(NULL, arglen*2);
905 if (!retval)
906 return NULL;
907 retbuf = PyString_AsString(retval);
908 if (!retbuf)
909 goto finally;
911 /* make hex version of string, taken from shamodule.c */
912 for (i=j=0; i < arglen; i++) {
913 char c;
914 c = (argbuf[i] >> 4) & 0xf;
915 c = (c>9) ? c+'a'-10 : c + '0';
916 retbuf[j++] = c;
917 c = argbuf[i] & 0xf;
918 c = (c>9) ? c+'a'-10 : c + '0';
919 retbuf[j++] = c;
921 return retval;
923 finally:
924 Py_DECREF(retval);
925 return NULL;
928 PyDoc_STRVAR(doc_hexlify,
929 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
931 This function is also available as \"hexlify()\".");
934 static int
935 to_int(int c)
937 if (isdigit(c))
938 return c - '0';
939 else {
940 if (isupper(c))
941 c = tolower(c);
942 if (c >= 'a' && c <= 'f')
943 return c - 'a' + 10;
945 return -1;
949 static PyObject *
950 binascii_unhexlify(PyObject *self, PyObject *args)
952 char* argbuf;
953 int arglen;
954 PyObject *retval;
955 char* retbuf;
956 int i, j;
958 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
959 return NULL;
961 /* XXX What should we do about strings with an odd length? Should
962 * we add an implicit leading zero, or a trailing zero? For now,
963 * raise an exception.
965 if (arglen % 2) {
966 PyErr_SetString(PyExc_TypeError, "Odd-length string");
967 return NULL;
970 retval = PyString_FromStringAndSize(NULL, (arglen/2));
971 if (!retval)
972 return NULL;
973 retbuf = PyString_AsString(retval);
974 if (!retbuf)
975 goto finally;
977 for (i=j=0; i < arglen; i += 2) {
978 int top = to_int(Py_CHARMASK(argbuf[i]));
979 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
980 if (top == -1 || bot == -1) {
981 PyErr_SetString(PyExc_TypeError,
982 "Non-hexadecimal digit found");
983 goto finally;
985 retbuf[j++] = (top << 4) + bot;
987 return retval;
989 finally:
990 Py_DECREF(retval);
991 return NULL;
994 PyDoc_STRVAR(doc_unhexlify,
995 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
997 hexstr must contain an even number of hex digits (upper or lower case).\n\
998 This function is also available as \"unhexlify()\"");
1000 static int table_hex[128] = {
1001 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1002 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1003 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1004 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1005 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1006 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1007 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1008 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1011 #define hexval(c) table_hex[(unsigned int)(c)]
1013 #define MAXLINESIZE 76
1015 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1017 static PyObject*
1018 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1020 unsigned int in, out;
1021 char ch;
1022 unsigned char *data, *odata;
1023 unsigned int datalen = 0;
1024 PyObject *rv;
1025 static char *kwlist[] = {"data", "header", NULL};
1026 int header = 0;
1028 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1029 &datalen, &header))
1030 return NULL;
1032 /* We allocate the output same size as input, this is overkill */
1033 odata = (unsigned char *) calloc(1, datalen);
1035 if (odata == NULL) {
1036 PyErr_NoMemory();
1037 return NULL;
1040 in = out = 0;
1041 while (in < datalen) {
1042 if (data[in] == '=') {
1043 in++;
1044 if (in >= datalen) break;
1045 /* Soft line breaks */
1046 if ((data[in] == '\n') || (data[in] == '\r') ||
1047 (data[in] == ' ') || (data[in] == '\t')) {
1048 if (data[in] != '\n') {
1049 while (in < datalen && data[in] != '\n') in++;
1051 if (in < datalen) in++;
1053 else if (data[in] == '=') {
1054 /* broken case from broken python qp */
1055 odata[out++] = '=';
1056 in++;
1058 else if (((data[in] >= 'A' && data[in] <= 'F') ||
1059 (data[in] >= 'a' && data[in] <= 'f') ||
1060 (data[in] >= '0' && data[in] <= '9')) &&
1061 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1062 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1063 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1064 /* hexval */
1065 ch = hexval(data[in]) << 4;
1066 in++;
1067 ch |= hexval(data[in]);
1068 in++;
1069 odata[out++] = ch;
1071 else {
1072 odata[out++] = '=';
1075 else if (header && data[in] == '_') {
1076 odata[out++] = ' ';
1077 in++;
1079 else {
1080 odata[out] = data[in];
1081 in++;
1082 out++;
1085 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1086 free (odata);
1087 return NULL;
1089 free (odata);
1090 return rv;
1093 static int
1094 to_hex (unsigned char ch, unsigned char *s)
1096 unsigned int uvalue = ch;
1098 s[1] = "0123456789ABCDEF"[uvalue % 16];
1099 uvalue = (uvalue / 16);
1100 s[0] = "0123456789ABCDEF"[uvalue % 16];
1101 return 0;
1104 PyDoc_STRVAR(doc_b2a_qp,
1105 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1106 Encode a string using quoted-printable encoding. \n\
1108 On encoding, when istext is set, newlines are not encoded, and white \n\
1109 space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
1110 both encoded. When quotetabs is set, space and tabs are encoded.");
1112 /* XXX: This is ridiculously complicated to be backward compatible
1113 * (mostly) with the quopri module. It doesn't re-create the quopri
1114 * module bug where text ending in CRLF has the CR encoded */
1115 static PyObject*
1116 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1118 unsigned int in, out;
1119 unsigned char *data, *odata;
1120 unsigned int datalen = 0, odatalen = 0;
1121 PyObject *rv;
1122 unsigned int linelen = 0;
1123 static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
1124 int istext = 1;
1125 int quotetabs = 0;
1126 int header = 0;
1127 unsigned char ch;
1128 int crlf = 0;
1129 unsigned char *p;
1131 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1132 &datalen, &quotetabs, &istext, &header))
1133 return NULL;
1135 /* See if this string is using CRLF line ends */
1136 /* XXX: this function has the side effect of converting all of
1137 * the end of lines to be the same depending on this detection
1138 * here */
1139 p = (unsigned char *) strchr((char *)data, '\n');
1140 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1141 crlf = 1;
1143 /* First, scan to see how many characters need to be encoded */
1144 in = 0;
1145 while (in < datalen) {
1146 if ((data[in] > 126) ||
1147 (data[in] == '=') ||
1148 (header && data[in] == '_') ||
1149 ((data[in] == '.') && (linelen == 1)) ||
1150 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1151 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1152 ((data[in] < 33) &&
1153 (data[in] != '\r') && (data[in] != '\n') &&
1154 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1156 if ((linelen + 3) >= MAXLINESIZE) {
1157 linelen = 0;
1158 if (crlf)
1159 odatalen += 3;
1160 else
1161 odatalen += 2;
1163 linelen += 3;
1164 odatalen += 3;
1165 in++;
1167 else {
1168 if (istext &&
1169 ((data[in] == '\n') ||
1170 ((in+1 < datalen) && (data[in] == '\r') &&
1171 (data[in+1] == '\n'))))
1173 linelen = 0;
1174 /* Protect against whitespace on end of line */
1175 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1176 odatalen += 2;
1177 if (crlf)
1178 odatalen += 2;
1179 else
1180 odatalen += 1;
1181 if (data[in] == '\r')
1182 in += 2;
1183 else
1184 in++;
1186 else {
1187 if ((in + 1 != datalen) &&
1188 (data[in+1] != '\n') &&
1189 (linelen + 1) >= MAXLINESIZE) {
1190 linelen = 0;
1191 if (crlf)
1192 odatalen += 3;
1193 else
1194 odatalen += 2;
1196 linelen++;
1197 odatalen++;
1198 in++;
1203 odata = (unsigned char *) calloc(1, odatalen);
1205 if (odata == NULL) {
1206 PyErr_NoMemory();
1207 return NULL;
1210 in = out = linelen = 0;
1211 while (in < datalen) {
1212 if ((data[in] > 126) ||
1213 (data[in] == '=') ||
1214 (header && data[in] == '_') ||
1215 ((data[in] == '.') && (linelen == 1)) ||
1216 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1217 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1218 ((data[in] < 33) &&
1219 (data[in] != '\r') && (data[in] != '\n') &&
1220 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1222 if ((linelen + 3 )>= MAXLINESIZE) {
1223 odata[out++] = '=';
1224 if (crlf) odata[out++] = '\r';
1225 odata[out++] = '\n';
1226 linelen = 0;
1228 odata[out++] = '=';
1229 to_hex(data[in], &odata[out]);
1230 out += 2;
1231 in++;
1232 linelen += 3;
1234 else {
1235 if (istext &&
1236 ((data[in] == '\n') ||
1237 ((in+1 < datalen) && (data[in] == '\r') &&
1238 (data[in+1] == '\n'))))
1240 linelen = 0;
1241 /* Protect against whitespace on end of line */
1242 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1243 ch = odata[out-1];
1244 odata[out-1] = '=';
1245 to_hex(ch, &odata[out]);
1246 out += 2;
1249 if (crlf) odata[out++] = '\r';
1250 odata[out++] = '\n';
1251 if (data[in] == '\r')
1252 in += 2;
1253 else
1254 in++;
1256 else {
1257 if ((in + 1 != datalen) &&
1258 (data[in+1] != '\n') &&
1259 (linelen + 1) >= MAXLINESIZE) {
1260 odata[out++] = '=';
1261 if (crlf) odata[out++] = '\r';
1262 odata[out++] = '\n';
1263 linelen = 0;
1265 linelen++;
1266 if (header && data[in] == ' ') {
1267 odata[out++] = '_';
1268 in++;
1270 else {
1271 odata[out++] = data[in++];
1276 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1277 free (odata);
1278 return NULL;
1280 free (odata);
1281 return rv;
1284 /* List of functions defined in the module */
1286 static struct PyMethodDef binascii_module_methods[] = {
1287 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1288 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1289 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1290 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1291 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1292 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1293 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1294 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1295 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1296 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1297 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1298 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1299 doc_rledecode_hqx},
1300 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1301 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1302 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1303 doc_a2b_qp},
1304 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1305 doc_b2a_qp},
1306 {NULL, NULL} /* sentinel */
1310 /* Initialization function for the module (*must* be called initbinascii) */
1311 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1313 PyMODINIT_FUNC
1314 initbinascii(void)
1316 PyObject *m, *d, *x;
1318 /* Create the module and add the functions */
1319 m = Py_InitModule("binascii", binascii_module_methods);
1321 d = PyModule_GetDict(m);
1322 x = PyString_FromString(doc_binascii);
1323 PyDict_SetItemString(d, "__doc__", x);
1324 Py_XDECREF(x);
1326 Error = PyErr_NewException("binascii.Error", NULL, NULL);
1327 PyDict_SetItemString(d, "Error", Error);
1328 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1329 PyDict_SetItemString(d, "Incomplete", Incomplete);