This commit was manufactured by cvs2svn to create tag 'r23a1-fork'.
[python/dscho.git] / Modules / bz2module.c
blobd0383ac7ee48455747cc6d9970763f8258c7cf25
1 /*
3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
8 */
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
27 #define MODE_CLOSED 0
28 #define MODE_READ 1
29 #define MODE_READ_EOF 2
30 #define MODE_WRITE 3
32 #define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
34 #if SIZEOF_LONG >= 8
35 #define BZS_TOTAL_OUT(bzs) \
36 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
37 #elif SIZEOF_LONG_LONG >= 8
38 #define BZS_TOTAL_OUT(bzs) \
39 (((LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40 #else
41 #define BZS_TOTAL_OUT(bzs) \
42 bzs->total_out_lo32;
43 #endif
45 #ifdef WITH_THREAD
46 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
47 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
48 #else
49 #define ACQUIRE_LOCK(obj)
50 #define RELEASE_LOCK(obj)
51 #endif
53 #ifdef WITH_UNIVERSAL_NEWLINES
54 /* Bits in f_newlinetypes */
55 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
56 #define NEWLINE_CR 1 /* \r newline seen */
57 #define NEWLINE_LF 2 /* \n newline seen */
58 #define NEWLINE_CRLF 4 /* \r\n newline seen */
59 #endif
61 /* ===================================================================== */
62 /* Structure definitions. */
64 typedef struct {
65 PyFileObject file;
66 BZFILE *fp;
67 int mode;
68 long pos;
69 long size;
70 #ifdef WITH_THREAD
71 PyThread_type_lock lock;
72 #endif
73 } BZ2FileObject;
75 typedef struct {
76 PyObject_HEAD
77 bz_stream bzs;
78 int running;
79 #ifdef WITH_THREAD
80 PyThread_type_lock lock;
81 #endif
82 } BZ2CompObject;
84 typedef struct {
85 PyObject_HEAD
86 bz_stream bzs;
87 int running;
88 PyObject *unused_data;
89 #ifdef WITH_THREAD
90 PyThread_type_lock lock;
91 #endif
92 } BZ2DecompObject;
94 /* ===================================================================== */
95 /* Utility functions. */
97 static int
98 Util_CatchBZ2Error(int bzerror)
100 int ret = 0;
101 switch(bzerror) {
102 case BZ_OK:
103 case BZ_STREAM_END:
104 break;
106 case BZ_CONFIG_ERROR:
107 PyErr_SetString(PyExc_SystemError,
108 "the bz2 library was not compiled "
109 "correctly");
110 ret = 1;
111 break;
113 case BZ_PARAM_ERROR:
114 PyErr_SetString(PyExc_ValueError,
115 "the bz2 library has received wrong "
116 "parameters");
117 ret = 1;
118 break;
120 case BZ_MEM_ERROR:
121 PyErr_NoMemory();
122 ret = 1;
123 break;
125 case BZ_DATA_ERROR:
126 case BZ_DATA_ERROR_MAGIC:
127 PyErr_SetString(PyExc_IOError, "invalid data stream");
128 ret = 1;
129 break;
131 case BZ_IO_ERROR:
132 PyErr_SetString(PyExc_IOError, "unknown IO error");
133 ret = 1;
134 break;
136 case BZ_UNEXPECTED_EOF:
137 PyErr_SetString(PyExc_EOFError,
138 "compressed file ended before the "
139 "logical end-of-stream was detected");
140 ret = 1;
141 break;
143 case BZ_SEQUENCE_ERROR:
144 PyErr_SetString(PyExc_RuntimeError,
145 "wrong sequence of bz2 library "
146 "commands used");
147 ret = 1;
148 break;
150 return ret;
153 #if BUFSIZ < 8192
154 #define SMALLCHUNK 8192
155 #else
156 #define SMALLCHUNK BUFSIZ
157 #endif
159 #if SIZEOF_INT < 4
160 #define BIGCHUNK (512 * 32)
161 #else
162 #define BIGCHUNK (512 * 1024)
163 #endif
165 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
166 static size_t
167 Util_NewBufferSize(size_t currentsize)
169 if (currentsize > SMALLCHUNK) {
170 /* Keep doubling until we reach BIGCHUNK;
171 then keep adding BIGCHUNK. */
172 if (currentsize <= BIGCHUNK)
173 return currentsize + currentsize;
174 else
175 return currentsize + BIGCHUNK;
177 return currentsize + SMALLCHUNK;
180 /* This is a hacked version of Python's fileobject.c:get_line(). */
181 static PyObject *
182 Util_GetLine(BZ2FileObject *self, int n)
184 char c;
185 char *buf, *end;
186 size_t total_v_size; /* total # of slots in buffer */
187 size_t used_v_size; /* # used slots in buffer */
188 size_t increment; /* amount to increment the buffer */
189 PyObject *v;
190 int bzerror;
191 #ifdef WITH_UNIVERSAL_NEWLINES
192 int newlinetypes = ((PyFileObject*)self)->f_newlinetypes;
193 int skipnextlf = ((PyFileObject*)self)->f_skipnextlf;
194 int univ_newline = ((PyFileObject*)self)->f_univ_newline;
195 #endif
197 total_v_size = n > 0 ? n : 100;
198 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
199 if (v == NULL)
200 return NULL;
202 buf = BUF(v);
203 end = buf + total_v_size;
205 for (;;) {
206 Py_BEGIN_ALLOW_THREADS
207 #ifdef WITH_UNIVERSAL_NEWLINES
208 if (univ_newline) {
209 while (1) {
210 BZ2_bzRead(&bzerror, self->fp, &c, 1);
211 self->pos++;
212 if (bzerror != BZ_OK || buf == end)
213 break;
214 if (skipnextlf) {
215 skipnextlf = 0;
216 if (c == '\n') {
217 /* Seeing a \n here with
218 * skipnextlf true means we
219 * saw a \r before.
221 newlinetypes |= NEWLINE_CRLF;
222 BZ2_bzRead(&bzerror, self->fp,
223 &c, 1);
224 if (bzerror != BZ_OK)
225 break;
226 } else {
227 newlinetypes |= NEWLINE_CR;
230 if (c == '\r') {
231 skipnextlf = 1;
232 c = '\n';
233 } else if ( c == '\n')
234 newlinetypes |= NEWLINE_LF;
235 *buf++ = c;
236 if (c == '\n') break;
238 if (bzerror == BZ_STREAM_END && skipnextlf)
239 newlinetypes |= NEWLINE_CR;
240 } else /* If not universal newlines use the normal loop */
241 #endif
242 do {
243 BZ2_bzRead(&bzerror, self->fp, &c, 1);
244 self->pos++;
245 *buf++ = c;
246 } while (bzerror == BZ_OK && c != '\n' && buf != end);
247 Py_END_ALLOW_THREADS
248 #ifdef WITH_UNIVERSAL_NEWLINES
249 ((PyFileObject*)self)->f_newlinetypes = newlinetypes;
250 ((PyFileObject*)self)->f_skipnextlf = skipnextlf;
251 #endif
252 if (bzerror == BZ_STREAM_END) {
253 self->size = self->pos;
254 self->mode = MODE_READ_EOF;
255 break;
256 } else if (bzerror != BZ_OK) {
257 Util_CatchBZ2Error(bzerror);
258 Py_DECREF(v);
259 return NULL;
261 if (c == '\n')
262 break;
263 /* Must be because buf == end */
264 if (n > 0)
265 break;
266 used_v_size = total_v_size;
267 increment = total_v_size >> 2; /* mild exponential growth */
268 total_v_size += increment;
269 if (total_v_size > INT_MAX) {
270 PyErr_SetString(PyExc_OverflowError,
271 "line is longer than a Python string can hold");
272 Py_DECREF(v);
273 return NULL;
275 if (_PyString_Resize(&v, total_v_size) < 0)
276 return NULL;
277 buf = BUF(v) + used_v_size;
278 end = BUF(v) + total_v_size;
281 used_v_size = buf - BUF(v);
282 if (used_v_size != total_v_size)
283 _PyString_Resize(&v, used_v_size);
284 return v;
287 #ifndef WITH_UNIVERSAL_NEWLINES
288 #define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
289 #else
290 /* This is a hacked version of Python's
291 * fileobject.c:Py_UniversalNewlineFread(). */
292 size_t
293 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
294 char* buf, size_t n, BZ2FileObject *fobj)
296 char *dst = buf;
297 PyFileObject *f = (PyFileObject *)fobj;
298 int newlinetypes, skipnextlf;
300 assert(buf != NULL);
301 assert(stream != NULL);
303 if (!f->f_univ_newline)
304 return BZ2_bzRead(bzerror, stream, buf, n);
306 newlinetypes = f->f_newlinetypes;
307 skipnextlf = f->f_skipnextlf;
309 /* Invariant: n is the number of bytes remaining to be filled
310 * in the buffer.
312 while (n) {
313 size_t nread;
314 int shortread;
315 char *src = dst;
317 nread = BZ2_bzRead(bzerror, stream, dst, n);
318 assert(nread <= n);
319 n -= nread; /* assuming 1 byte out for each in; will adjust */
320 shortread = n != 0; /* true iff EOF or error */
321 while (nread--) {
322 char c = *src++;
323 if (c == '\r') {
324 /* Save as LF and set flag to skip next LF. */
325 *dst++ = '\n';
326 skipnextlf = 1;
328 else if (skipnextlf && c == '\n') {
329 /* Skip LF, and remember we saw CR LF. */
330 skipnextlf = 0;
331 newlinetypes |= NEWLINE_CRLF;
332 ++n;
334 else {
335 /* Normal char to be stored in buffer. Also
336 * update the newlinetypes flag if either this
337 * is an LF or the previous char was a CR.
339 if (c == '\n')
340 newlinetypes |= NEWLINE_LF;
341 else if (skipnextlf)
342 newlinetypes |= NEWLINE_CR;
343 *dst++ = c;
344 skipnextlf = 0;
347 if (shortread) {
348 /* If this is EOF, update type flags. */
349 if (skipnextlf && *bzerror == BZ_STREAM_END)
350 newlinetypes |= NEWLINE_CR;
351 break;
354 f->f_newlinetypes = newlinetypes;
355 f->f_skipnextlf = skipnextlf;
356 return dst - buf;
358 #endif
360 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
361 static void
362 Util_DropReadAhead(BZ2FileObject *self)
364 PyFileObject *f = (PyFileObject*)self;
365 if (f->f_buf != NULL) {
366 PyMem_Free(f->f_buf);
367 f->f_buf = NULL;
371 /* This is a hacked version of Python's fileobject.c:readahead(). */
372 static int
373 Util_ReadAhead(BZ2FileObject *self, int bufsize)
375 int chunksize;
376 int bzerror;
377 PyFileObject *f = (PyFileObject*)self;
379 if (f->f_buf != NULL) {
380 if((f->f_bufend - f->f_bufptr) >= 1)
381 return 0;
382 else
383 Util_DropReadAhead(self);
385 if (self->mode == MODE_READ_EOF) {
386 return -1;
388 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
389 return -1;
391 Py_BEGIN_ALLOW_THREADS
392 chunksize = Util_UnivNewlineRead(&bzerror, self->fp, f->f_buf,
393 bufsize, self);
394 Py_END_ALLOW_THREADS
395 self->pos += chunksize;
396 if (bzerror == BZ_STREAM_END) {
397 self->size = self->pos;
398 self->mode = MODE_READ_EOF;
399 } else if (bzerror != BZ_OK) {
400 Util_CatchBZ2Error(bzerror);
401 Util_DropReadAhead(self);
402 return -1;
404 f->f_bufptr = f->f_buf;
405 f->f_bufend = f->f_buf + chunksize;
406 return 0;
409 /* This is a hacked version of Python's
410 * fileobject.c:readahead_get_line_skip(). */
411 static PyStringObject *
412 Util_ReadAheadGetLineSkip(BZ2FileObject *bf, int skip, int bufsize)
414 PyFileObject *f = (PyFileObject*)bf;
415 PyStringObject* s;
416 char *bufptr;
417 char *buf;
418 int len;
420 if (f->f_buf == NULL)
421 if (Util_ReadAhead(bf, bufsize) < 0)
422 return NULL;
424 len = f->f_bufend - f->f_bufptr;
425 if (len == 0)
426 return (PyStringObject *)
427 PyString_FromStringAndSize(NULL, skip);
428 bufptr = memchr(f->f_bufptr, '\n', len);
429 if (bufptr != NULL) {
430 bufptr++; /* Count the '\n' */
431 len = bufptr - f->f_bufptr;
432 s = (PyStringObject *)
433 PyString_FromStringAndSize(NULL, skip+len);
434 if (s == NULL)
435 return NULL;
436 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
437 f->f_bufptr = bufptr;
438 if (bufptr == f->f_bufend)
439 Util_DropReadAhead(bf);
440 } else {
441 bufptr = f->f_bufptr;
442 buf = f->f_buf;
443 f->f_buf = NULL; /* Force new readahead buffer */
444 s = Util_ReadAheadGetLineSkip(
445 bf, skip+len, bufsize + (bufsize>>2) );
446 if (s == NULL) {
447 PyMem_Free(buf);
448 return NULL;
450 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
451 PyMem_Free(buf);
453 return s;
456 /* ===================================================================== */
457 /* Methods of BZ2File. */
459 PyDoc_STRVAR(BZ2File_read__doc__,
460 "read([size]) -> string\n\
462 Read at most size uncompressed bytes, returned as a string. If the size\n\
463 argument is negative or omitted, read until EOF is reached.\n\
466 /* This is a hacked version of Python's fileobject.c:file_read(). */
467 static PyObject *
468 BZ2File_read(BZ2FileObject *self, PyObject *args)
470 long bytesrequested = -1;
471 size_t bytesread, buffersize, chunksize;
472 int bzerror;
473 PyObject *ret = NULL;
475 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
476 return NULL;
478 ACQUIRE_LOCK(self);
479 switch (self->mode) {
480 case MODE_READ:
481 break;
482 case MODE_READ_EOF:
483 ret = PyString_FromString("");
484 goto cleanup;
485 case MODE_CLOSED:
486 PyErr_SetString(PyExc_ValueError,
487 "I/O operation on closed file");
488 goto cleanup;
489 default:
490 PyErr_SetString(PyExc_IOError,
491 "file is not ready for reading");
492 goto cleanup;
495 if (bytesrequested < 0)
496 buffersize = Util_NewBufferSize((size_t)0);
497 else
498 buffersize = bytesrequested;
499 if (buffersize > INT_MAX) {
500 PyErr_SetString(PyExc_OverflowError,
501 "requested number of bytes is "
502 "more than a Python string can hold");
503 goto cleanup;
505 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
506 if (ret == NULL)
507 goto cleanup;
508 bytesread = 0;
510 for (;;) {
511 Py_BEGIN_ALLOW_THREADS
512 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
513 BUF(ret)+bytesread,
514 buffersize-bytesread,
515 self);
516 self->pos += chunksize;
517 Py_END_ALLOW_THREADS
518 bytesread += chunksize;
519 if (bzerror == BZ_STREAM_END) {
520 self->size = self->pos;
521 self->mode = MODE_READ_EOF;
522 break;
523 } else if (bzerror != BZ_OK) {
524 Util_CatchBZ2Error(bzerror);
525 Py_DECREF(ret);
526 ret = NULL;
527 goto cleanup;
529 if (bytesrequested < 0) {
530 buffersize = Util_NewBufferSize(buffersize);
531 if (_PyString_Resize(&ret, buffersize) < 0)
532 goto cleanup;
533 } else {
534 break;
537 if (bytesread != buffersize)
538 _PyString_Resize(&ret, bytesread);
540 cleanup:
541 RELEASE_LOCK(self);
542 return ret;
545 PyDoc_STRVAR(BZ2File_readline__doc__,
546 "readline([size]) -> string\n\
548 Return the next line from the file, as a string, retaining newline.\n\
549 A non-negative size argument will limit the maximum number of bytes to\n\
550 return (an incomplete line may be returned then). Return an empty\n\
551 string at EOF.\n\
554 static PyObject *
555 BZ2File_readline(BZ2FileObject *self, PyObject *args)
557 PyObject *ret = NULL;
558 int sizehint = -1;
560 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
561 return NULL;
563 ACQUIRE_LOCK(self);
564 switch (self->mode) {
565 case MODE_READ:
566 break;
567 case MODE_READ_EOF:
568 ret = PyString_FromString("");
569 goto cleanup;
570 case MODE_CLOSED:
571 PyErr_SetString(PyExc_ValueError,
572 "I/O operation on closed file");
573 goto cleanup;
574 default:
575 PyErr_SetString(PyExc_IOError,
576 "file is not ready for reading");
577 goto cleanup;
580 if (sizehint == 0)
581 ret = PyString_FromString("");
582 else
583 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
585 cleanup:
586 RELEASE_LOCK(self);
587 return ret;
590 PyDoc_STRVAR(BZ2File_readlines__doc__,
591 "readlines([size]) -> list\n\
593 Call readline() repeatedly and return a list of lines read.\n\
594 The optional size argument, if given, is an approximate bound on the\n\
595 total number of bytes in the lines returned.\n\
598 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
599 static PyObject *
600 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
602 long sizehint = 0;
603 PyObject *list = NULL;
604 PyObject *line;
605 char small_buffer[SMALLCHUNK];
606 char *buffer = small_buffer;
607 size_t buffersize = SMALLCHUNK;
608 PyObject *big_buffer = NULL;
609 size_t nfilled = 0;
610 size_t nread;
611 size_t totalread = 0;
612 char *p, *q, *end;
613 int err;
614 int shortread = 0;
615 int bzerror;
617 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
618 return NULL;
620 ACQUIRE_LOCK(self);
621 switch (self->mode) {
622 case MODE_READ:
623 break;
624 case MODE_READ_EOF:
625 list = PyList_New(0);
626 goto cleanup;
627 case MODE_CLOSED:
628 PyErr_SetString(PyExc_ValueError,
629 "I/O operation on closed file");
630 goto cleanup;
631 default:
632 PyErr_SetString(PyExc_IOError,
633 "file is not ready for reading");
634 goto cleanup;
637 if ((list = PyList_New(0)) == NULL)
638 goto cleanup;
640 for (;;) {
641 Py_BEGIN_ALLOW_THREADS
642 nread = Util_UnivNewlineRead(&bzerror, self->fp,
643 buffer+nfilled,
644 buffersize-nfilled, self);
645 self->pos += nread;
646 Py_END_ALLOW_THREADS
647 if (bzerror == BZ_STREAM_END) {
648 self->size = self->pos;
649 self->mode = MODE_READ_EOF;
650 if (nread == 0) {
651 sizehint = 0;
652 break;
654 shortread = 1;
655 } else if (bzerror != BZ_OK) {
656 Util_CatchBZ2Error(bzerror);
657 error:
658 Py_DECREF(list);
659 list = NULL;
660 goto cleanup;
662 totalread += nread;
663 p = memchr(buffer+nfilled, '\n', nread);
664 if (p == NULL) {
665 /* Need a larger buffer to fit this line */
666 nfilled += nread;
667 buffersize *= 2;
668 if (buffersize > INT_MAX) {
669 PyErr_SetString(PyExc_OverflowError,
670 "line is longer than a Python string can hold");
671 goto error;
673 if (big_buffer == NULL) {
674 /* Create the big buffer */
675 big_buffer = PyString_FromStringAndSize(
676 NULL, buffersize);
677 if (big_buffer == NULL)
678 goto error;
679 buffer = PyString_AS_STRING(big_buffer);
680 memcpy(buffer, small_buffer, nfilled);
682 else {
683 /* Grow the big buffer */
684 _PyString_Resize(&big_buffer, buffersize);
685 buffer = PyString_AS_STRING(big_buffer);
687 continue;
689 end = buffer+nfilled+nread;
690 q = buffer;
691 do {
692 /* Process complete lines */
693 p++;
694 line = PyString_FromStringAndSize(q, p-q);
695 if (line == NULL)
696 goto error;
697 err = PyList_Append(list, line);
698 Py_DECREF(line);
699 if (err != 0)
700 goto error;
701 q = p;
702 p = memchr(q, '\n', end-q);
703 } while (p != NULL);
704 /* Move the remaining incomplete line to the start */
705 nfilled = end-q;
706 memmove(buffer, q, nfilled);
707 if (sizehint > 0)
708 if (totalread >= (size_t)sizehint)
709 break;
710 if (shortread) {
711 sizehint = 0;
712 break;
715 if (nfilled != 0) {
716 /* Partial last line */
717 line = PyString_FromStringAndSize(buffer, nfilled);
718 if (line == NULL)
719 goto error;
720 if (sizehint > 0) {
721 /* Need to complete the last line */
722 PyObject *rest = Util_GetLine(self, 0);
723 if (rest == NULL) {
724 Py_DECREF(line);
725 goto error;
727 PyString_Concat(&line, rest);
728 Py_DECREF(rest);
729 if (line == NULL)
730 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
738 cleanup:
739 RELEASE_LOCK(self);
740 if (big_buffer) {
741 Py_DECREF(big_buffer);
743 return list;
746 PyDoc_STRVAR(BZ2File_write__doc__,
747 "write(data) -> None\n\
749 Write the 'data' string to file. Note that due to buffering, close() may\n\
750 be needed before the file on disk reflects the data written.\n\
753 /* This is a hacked version of Python's fileobject.c:file_write(). */
754 static PyObject *
755 BZ2File_write(BZ2FileObject *self, PyObject *args)
757 PyObject *ret = NULL;
758 char *buf;
759 int len;
760 int bzerror;
762 if (!PyArg_ParseTuple(args, "s#", &buf, &len))
763 return NULL;
765 ACQUIRE_LOCK(self);
766 switch (self->mode) {
767 case MODE_WRITE:
768 break;
770 case MODE_CLOSED:
771 PyErr_SetString(PyExc_ValueError,
772 "I/O operation on closed file");
773 goto cleanup;;
775 default:
776 PyErr_SetString(PyExc_IOError,
777 "file is not ready for writing");
778 goto cleanup;;
781 PyFile_SoftSpace((PyObject*)self, 0);
783 Py_BEGIN_ALLOW_THREADS
784 BZ2_bzWrite (&bzerror, self->fp, buf, len);
785 self->pos += len;
786 Py_END_ALLOW_THREADS
788 if (bzerror != BZ_OK) {
789 Util_CatchBZ2Error(bzerror);
790 goto cleanup;
793 Py_INCREF(Py_None);
794 ret = Py_None;
796 cleanup:
797 RELEASE_LOCK(self);
798 return ret;
801 PyDoc_STRVAR(BZ2File_writelines__doc__,
802 "writelines(sequence_of_strings) -> None\n\
804 Write the sequence of strings to the file. Note that newlines are not\n\
805 added. The sequence can be any iterable object producing strings. This is\n\
806 equivalent to calling write() for each string.\n\
809 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
810 static PyObject *
811 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
813 #define CHUNKSIZE 1000
814 PyObject *list = NULL;
815 PyObject *iter = NULL;
816 PyObject *ret = NULL;
817 PyObject *line;
818 int i, j, index, len, islist;
819 int bzerror;
821 ACQUIRE_LOCK(self);
822 islist = PyList_Check(seq);
823 if (!islist) {
824 iter = PyObject_GetIter(seq);
825 if (iter == NULL) {
826 PyErr_SetString(PyExc_TypeError,
827 "writelines() requires an iterable argument");
828 goto error;
830 list = PyList_New(CHUNKSIZE);
831 if (list == NULL)
832 goto error;
835 /* Strategy: slurp CHUNKSIZE lines into a private list,
836 checking that they are all strings, then write that list
837 without holding the interpreter lock, then come back for more. */
838 for (index = 0; ; index += CHUNKSIZE) {
839 if (islist) {
840 Py_XDECREF(list);
841 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
842 if (list == NULL)
843 goto error;
844 j = PyList_GET_SIZE(list);
846 else {
847 for (j = 0; j < CHUNKSIZE; j++) {
848 line = PyIter_Next(iter);
849 if (line == NULL) {
850 if (PyErr_Occurred())
851 goto error;
852 break;
854 PyList_SetItem(list, j, line);
857 if (j == 0)
858 break;
860 /* Check that all entries are indeed strings. If not,
861 apply the same rules as for file.write() and
862 convert the rets to strings. This is slow, but
863 seems to be the only way since all conversion APIs
864 could potentially execute Python code. */
865 for (i = 0; i < j; i++) {
866 PyObject *v = PyList_GET_ITEM(list, i);
867 if (!PyString_Check(v)) {
868 const char *buffer;
869 int len;
870 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
871 PyErr_SetString(PyExc_TypeError,
872 "writelines() "
873 "argument must be "
874 "a sequence of "
875 "strings");
876 goto error;
878 line = PyString_FromStringAndSize(buffer,
879 len);
880 if (line == NULL)
881 goto error;
882 Py_DECREF(v);
883 PyList_SET_ITEM(list, i, line);
887 PyFile_SoftSpace((PyObject*)self, 0);
889 /* Since we are releasing the global lock, the
890 following code may *not* execute Python code. */
891 Py_BEGIN_ALLOW_THREADS
892 for (i = 0; i < j; i++) {
893 line = PyList_GET_ITEM(list, i);
894 len = PyString_GET_SIZE(line);
895 BZ2_bzWrite (&bzerror, self->fp,
896 PyString_AS_STRING(line), len);
897 if (bzerror != BZ_OK) {
898 Py_BLOCK_THREADS
899 Util_CatchBZ2Error(bzerror);
900 goto error;
903 Py_END_ALLOW_THREADS
905 if (j < CHUNKSIZE)
906 break;
909 Py_INCREF(Py_None);
910 ret = Py_None;
912 error:
913 RELEASE_LOCK(self);
914 Py_XDECREF(list);
915 Py_XDECREF(iter);
916 return ret;
917 #undef CHUNKSIZE
920 PyDoc_STRVAR(BZ2File_seek__doc__,
921 "seek(offset [, whence]) -> None\n\
923 Move to new file position. Argument offset is a byte count. Optional\n\
924 argument whence defaults to 0 (offset from start of file, offset\n\
925 should be >= 0); other values are 1 (move relative to current position,\n\
926 positive or negative), and 2 (move relative to end of file, usually\n\
927 negative, although many platforms allow seeking beyond the end of a file).\n\
929 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
930 the operation may be extremely slow.\n\
933 static PyObject *
934 BZ2File_seek(BZ2FileObject *self, PyObject *args)
936 int where = 0;
937 long offset;
938 char small_buffer[SMALLCHUNK];
939 char *buffer = small_buffer;
940 size_t buffersize = SMALLCHUNK;
941 int bytesread = 0;
942 int readsize;
943 int chunksize;
944 int bzerror;
945 int rewind = 0;
946 PyObject *func;
947 PyObject *ret = NULL;
949 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
950 return NULL;
952 ACQUIRE_LOCK(self);
953 Util_DropReadAhead(self);
954 switch (self->mode) {
955 case MODE_READ:
956 case MODE_READ_EOF:
957 break;
959 case MODE_CLOSED:
960 PyErr_SetString(PyExc_ValueError,
961 "I/O operation on closed file");
962 goto cleanup;;
964 default:
965 PyErr_SetString(PyExc_IOError,
966 "seek works only while reading");
967 goto cleanup;;
970 if (offset < 0) {
971 if (where == 1) {
972 offset = self->pos + offset;
973 rewind = 1;
974 } else if (where == 2) {
975 if (self->size == -1) {
976 assert(self->mode != MODE_READ_EOF);
977 for (;;) {
978 Py_BEGIN_ALLOW_THREADS
979 chunksize = Util_UnivNewlineRead(
980 &bzerror, self->fp,
981 buffer, buffersize,
982 self);
983 self->pos += chunksize;
984 Py_END_ALLOW_THREADS
986 bytesread += chunksize;
987 if (bzerror == BZ_STREAM_END) {
988 break;
989 } else if (bzerror != BZ_OK) {
990 Util_CatchBZ2Error(bzerror);
991 goto cleanup;
994 self->mode = MODE_READ_EOF;
995 self->size = self->pos;
996 bytesread = 0;
998 offset = self->size + offset;
999 if (offset >= self->pos)
1000 offset -= self->pos;
1001 else
1002 rewind = 1;
1004 if (offset < 0)
1005 offset = 0;
1006 } else if (where == 0) {
1007 if (offset >= self->pos)
1008 offset -= self->pos;
1009 else
1010 rewind = 1;
1013 if (rewind) {
1014 BZ2_bzReadClose(&bzerror, self->fp);
1015 func = Py_FindMethod(PyFile_Type.tp_methods, (PyObject*)self,
1016 "seek");
1017 if (bzerror != BZ_OK) {
1018 Util_CatchBZ2Error(bzerror);
1019 goto cleanup;
1021 if (!func) {
1022 PyErr_SetString(PyExc_RuntimeError,
1023 "can't find file.seek method");
1024 goto cleanup;
1026 ret = PyObject_CallFunction(func, "(i)", 0);
1027 if (!ret)
1028 goto cleanup;
1029 Py_DECREF(ret);
1030 ret = NULL;
1031 self->pos = 0;
1032 self->fp = BZ2_bzReadOpen(&bzerror,
1033 PyFile_AsFile((PyObject*)self),
1034 0, 0, NULL, 0);
1035 if (bzerror != BZ_OK) {
1036 Util_CatchBZ2Error(bzerror);
1037 goto cleanup;
1039 self->mode = MODE_READ;
1040 } else if (self->mode == MODE_READ_EOF) {
1041 goto exit;
1044 if (offset == 0)
1045 goto exit;
1047 /* Before getting here, offset must be set to the number of bytes
1048 * to walk forward. */
1049 for (;;) {
1050 if ((size_t)offset-bytesread > buffersize)
1051 readsize = buffersize;
1052 else
1053 readsize = offset-bytesread;
1054 Py_BEGIN_ALLOW_THREADS
1055 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1056 buffer, readsize, self);
1057 self->pos += chunksize;
1058 Py_END_ALLOW_THREADS
1059 bytesread += chunksize;
1060 if (bzerror == BZ_STREAM_END) {
1061 self->size = self->pos;
1062 self->mode = MODE_READ_EOF;
1063 break;
1064 } else if (bzerror != BZ_OK) {
1065 Util_CatchBZ2Error(bzerror);
1066 goto cleanup;
1068 if (bytesread == offset)
1069 break;
1072 exit:
1073 Py_INCREF(Py_None);
1074 ret = Py_None;
1076 cleanup:
1077 RELEASE_LOCK(self);
1078 return ret;
1081 PyDoc_STRVAR(BZ2File_tell__doc__,
1082 "tell() -> int\n\
1084 Return the current file position, an integer (may be a long integer).\n\
1087 static PyObject *
1088 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1090 PyObject *ret = NULL;
1092 if (self->mode == MODE_CLOSED) {
1093 PyErr_SetString(PyExc_ValueError,
1094 "I/O operation on closed file");
1095 goto cleanup;
1098 ret = PyInt_FromLong(self->pos);
1100 cleanup:
1101 return ret;
1104 PyDoc_STRVAR(BZ2File_notsup__doc__,
1105 "Operation not supported.\n\
1108 static PyObject *
1109 BZ2File_notsup(BZ2FileObject *self, PyObject *args)
1111 PyErr_SetString(PyExc_IOError, "operation not supported");
1112 return NULL;
1115 PyDoc_STRVAR(BZ2File_close__doc__,
1116 "close() -> None or (perhaps) an integer\n\
1118 Close the file. Sets data attribute .closed to true. A closed file\n\
1119 cannot be used for further I/O operations. close() may be called more\n\
1120 than once without error.\n\
1123 static PyObject *
1124 BZ2File_close(BZ2FileObject *self)
1126 PyObject *file_close;
1127 PyObject *ret = NULL;
1128 int bzerror = BZ_OK;
1130 ACQUIRE_LOCK(self);
1131 switch (self->mode) {
1132 case MODE_READ:
1133 case MODE_READ_EOF:
1134 BZ2_bzReadClose(&bzerror, self->fp);
1135 break;
1136 case MODE_WRITE:
1137 BZ2_bzWriteClose(&bzerror, self->fp,
1138 0, NULL, NULL);
1139 break;
1141 self->mode = MODE_CLOSED;
1142 file_close = Py_FindMethod(PyFile_Type.tp_methods, (PyObject*)self,
1143 "close");
1144 if (!file_close) {
1145 PyErr_SetString(PyExc_RuntimeError,
1146 "can't find file.close method");
1147 goto cleanup;
1149 ret = PyObject_CallObject(file_close, NULL);
1150 if (bzerror != BZ_OK) {
1151 Util_CatchBZ2Error(bzerror);
1152 Py_XDECREF(ret);
1153 ret = NULL;
1154 goto cleanup;
1157 cleanup:
1158 RELEASE_LOCK(self);
1159 return ret;
1162 static PyMethodDef BZ2File_methods[] = {
1163 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1164 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1165 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1166 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1167 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1168 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1169 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1170 {"truncate", (PyCFunction)BZ2File_notsup, METH_VARARGS, BZ2File_notsup__doc__},
1171 {"readinto", (PyCFunction)BZ2File_notsup, METH_VARARGS, BZ2File_notsup__doc__},
1172 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1173 {NULL, NULL} /* sentinel */
1177 /* ===================================================================== */
1178 /* Slot definitions for BZ2File_Type. */
1180 static int
1181 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1183 PyObject *file_args = NULL;
1184 static char *kwlist[] = {"filename", "mode", "buffering",
1185 "compresslevel", 0};
1186 char *name = NULL;
1187 char *mode = "r";
1188 int buffering = -1;
1189 int compresslevel = 9;
1190 int bzerror;
1191 int mode_char = 0;
1192 int univ_newline = 0;
1194 self->size = -1;
1196 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "et|sii:BZ2File",
1197 kwlist, Py_FileSystemDefaultEncoding,
1198 &name, &mode, &buffering,
1199 &compresslevel))
1200 return -1;
1202 if (compresslevel < 1 || compresslevel > 9) {
1203 PyErr_SetString(PyExc_ValueError,
1204 "compresslevel must be between 1 and 9");
1205 return -1;
1208 for (;;) {
1209 int error = 0;
1210 switch (*mode) {
1211 case 'r':
1212 case 'w':
1213 if (mode_char)
1214 error = 1;
1215 mode_char = *mode;
1216 break;
1218 case 'b':
1219 break;
1221 case 'U':
1222 univ_newline = 1;
1223 break;
1225 default:
1226 error = 1;
1227 break;
1229 if (error) {
1230 PyErr_Format(PyExc_ValueError,
1231 "invalid mode char %c", *mode);
1232 return -1;
1234 mode++;
1235 if (*mode == '\0')
1236 break;
1239 if (mode_char == 'r')
1240 mode = univ_newline ? "rbU" : "rb";
1241 else
1242 mode = univ_newline ? "wbU" : "wb";
1244 file_args = Py_BuildValue("(ssi)", name, mode, buffering);
1245 if (!file_args)
1246 return -1;
1248 /* From now on, we have stuff to dealloc, so jump to error label
1249 * instead of returning */
1251 if (PyFile_Type.tp_init((PyObject *)self, file_args, NULL) < 0)
1252 goto error;
1254 #ifdef WITH_THREAD
1255 self->lock = PyThread_allocate_lock();
1256 if (!self->lock)
1257 goto error;
1258 #endif
1260 if (mode_char == 'r')
1261 self->fp = BZ2_bzReadOpen(&bzerror,
1262 PyFile_AsFile((PyObject*)self),
1263 0, 0, NULL, 0);
1264 else
1265 self->fp = BZ2_bzWriteOpen(&bzerror,
1266 PyFile_AsFile((PyObject*)self),
1267 compresslevel, 0, 0);
1269 if (bzerror != BZ_OK) {
1270 Util_CatchBZ2Error(bzerror);
1271 goto error;
1274 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1276 Py_XDECREF(file_args);
1277 PyMem_Free(name);
1278 return 0;
1280 error:
1281 #ifdef WITH_THREAD
1282 if (self->lock)
1283 PyThread_free_lock(self->lock);
1284 #endif
1285 Py_XDECREF(file_args);
1286 PyMem_Free(name);
1287 return -1;
1290 static void
1291 BZ2File_dealloc(BZ2FileObject *self)
1293 int bzerror;
1294 #ifdef WITH_THREAD
1295 if (self->lock)
1296 PyThread_free_lock(self->lock);
1297 #endif
1298 switch (self->mode) {
1299 case MODE_READ:
1300 case MODE_READ_EOF:
1301 BZ2_bzReadClose(&bzerror, self->fp);
1302 break;
1303 case MODE_WRITE:
1304 BZ2_bzWriteClose(&bzerror, self->fp,
1305 0, NULL, NULL);
1306 break;
1308 Util_DropReadAhead(self);
1309 ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1312 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1313 static PyObject *
1314 BZ2File_getiter(BZ2FileObject *self)
1316 if (self->mode == MODE_CLOSED) {
1317 PyErr_SetString(PyExc_ValueError,
1318 "I/O operation on closed file");
1319 return NULL;
1321 Py_INCREF((PyObject*)self);
1322 return (PyObject *)self;
1325 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1326 #define READAHEAD_BUFSIZE 8192
1327 static PyObject *
1328 BZ2File_iternext(BZ2FileObject *self)
1330 PyStringObject* ret;
1331 ACQUIRE_LOCK(self);
1332 if (self->mode == MODE_CLOSED) {
1333 PyErr_SetString(PyExc_ValueError,
1334 "I/O operation on closed file");
1335 return NULL;
1337 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1338 RELEASE_LOCK(self);
1339 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1340 Py_XDECREF(ret);
1341 return NULL;
1343 return (PyObject *)ret;
1346 /* ===================================================================== */
1347 /* BZ2File_Type definition. */
1349 PyDoc_VAR(BZ2File__doc__) =
1350 PyDoc_STR(
1351 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1353 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1354 writing. When opened for writing, the file will be created if it doesn't\n\
1355 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1356 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1357 is given, must be a number between 1 and 9.\n\
1359 #ifdef WITH_UNIVERSAL_NEWLINES
1360 PyDoc_STR(
1361 "\n\
1362 Add a 'U' to mode to open the file for input with universal newline\n\
1363 support. Any line ending in the input file will be seen as a '\\n' in\n\
1364 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1365 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1366 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1367 newlines are available only when reading.\n\
1369 #endif
1372 static PyTypeObject BZ2File_Type = {
1373 PyObject_HEAD_INIT(NULL)
1374 0, /*ob_size*/
1375 "bz2.BZ2File", /*tp_name*/
1376 sizeof(BZ2FileObject), /*tp_basicsize*/
1377 0, /*tp_itemsize*/
1378 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1379 0, /*tp_print*/
1380 0, /*tp_getattr*/
1381 0, /*tp_setattr*/
1382 0, /*tp_compare*/
1383 0, /*tp_repr*/
1384 0, /*tp_as_number*/
1385 0, /*tp_as_sequence*/
1386 0, /*tp_as_mapping*/
1387 0, /*tp_hash*/
1388 0, /*tp_call*/
1389 0, /*tp_str*/
1390 0, /*tp_getattro*/
1391 0, /*tp_setattro*/
1392 0, /*tp_as_buffer*/
1393 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1394 BZ2File__doc__, /*tp_doc*/
1395 0, /*tp_traverse*/
1396 0, /*tp_clear*/
1397 0, /*tp_richcompare*/
1398 0, /*tp_weaklistoffset*/
1399 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1400 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1401 BZ2File_methods, /*tp_methods*/
1402 0, /*tp_members*/
1403 0, /*tp_getset*/
1404 0, /*tp_base*/
1405 0, /*tp_dict*/
1406 0, /*tp_descr_get*/
1407 0, /*tp_descr_set*/
1408 0, /*tp_dictoffset*/
1409 (initproc)BZ2File_init, /*tp_init*/
1410 0, /*tp_alloc*/
1411 0, /*tp_new*/
1412 0, /*tp_free*/
1413 0, /*tp_is_gc*/
1417 /* ===================================================================== */
1418 /* Methods of BZ2Comp. */
1420 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1421 "compress(data) -> string\n\
1423 Provide more data to the compressor object. It will return chunks of\n\
1424 compressed data whenever possible. When you've finished providing data\n\
1425 to compress, call the flush() method to finish the compression process,\n\
1426 and return what is left in the internal buffers.\n\
1429 static PyObject *
1430 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1432 char *data;
1433 int datasize;
1434 int bufsize = SMALLCHUNK;
1435 LONG_LONG totalout;
1436 PyObject *ret = NULL;
1437 bz_stream *bzs = &self->bzs;
1438 int bzerror;
1440 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1441 return NULL;
1443 ACQUIRE_LOCK(self);
1444 if (!self->running) {
1445 PyErr_SetString(PyExc_ValueError,
1446 "this object was already flushed");
1447 goto error;
1450 ret = PyString_FromStringAndSize(NULL, bufsize);
1451 if (!ret)
1452 goto error;
1454 bzs->next_in = data;
1455 bzs->avail_in = datasize;
1456 bzs->next_out = BUF(ret);
1457 bzs->avail_out = bufsize;
1459 totalout = BZS_TOTAL_OUT(bzs);
1461 for (;;) {
1462 Py_BEGIN_ALLOW_THREADS
1463 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1464 Py_END_ALLOW_THREADS
1465 if (bzerror != BZ_RUN_OK) {
1466 Util_CatchBZ2Error(bzerror);
1467 goto error;
1469 if (bzs->avail_out == 0) {
1470 bufsize = Util_NewBufferSize(bufsize);
1471 if (_PyString_Resize(&ret, bufsize) < 0) {
1472 BZ2_bzCompressEnd(bzs);
1473 goto error;
1475 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1476 - totalout);
1477 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1478 } else if (bzs->avail_in == 0) {
1479 break;
1483 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1485 RELEASE_LOCK(self);
1486 return ret;
1488 error:
1489 RELEASE_LOCK(self);
1490 Py_XDECREF(ret);
1491 return NULL;
1494 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1495 "flush() -> string\n\
1497 Finish the compression process and return what is left in internal buffers.\n\
1498 You must not use the compressor object after calling this method.\n\
1501 static PyObject *
1502 BZ2Comp_flush(BZ2CompObject *self)
1504 int bufsize = SMALLCHUNK;
1505 PyObject *ret = NULL;
1506 bz_stream *bzs = &self->bzs;
1507 LONG_LONG totalout;
1508 int bzerror;
1510 ACQUIRE_LOCK(self);
1511 if (!self->running) {
1512 PyErr_SetString(PyExc_ValueError, "object was already "
1513 "flushed");
1514 goto error;
1516 self->running = 0;
1518 ret = PyString_FromStringAndSize(NULL, bufsize);
1519 if (!ret)
1520 goto error;
1522 bzs->next_out = BUF(ret);
1523 bzs->avail_out = bufsize;
1525 totalout = BZS_TOTAL_OUT(bzs);
1527 for (;;) {
1528 Py_BEGIN_ALLOW_THREADS
1529 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1530 Py_END_ALLOW_THREADS
1531 if (bzerror == BZ_STREAM_END) {
1532 break;
1533 } else if (bzerror != BZ_FINISH_OK) {
1534 Util_CatchBZ2Error(bzerror);
1535 goto error;
1537 if (bzs->avail_out == 0) {
1538 bufsize = Util_NewBufferSize(bufsize);
1539 if (_PyString_Resize(&ret, bufsize) < 0)
1540 goto error;
1541 bzs->next_out = BUF(ret);
1542 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1543 - totalout);
1544 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1548 if (bzs->avail_out != 0)
1549 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1551 RELEASE_LOCK(self);
1552 return ret;
1554 error:
1555 RELEASE_LOCK(self);
1556 Py_XDECREF(ret);
1557 return NULL;
1560 static PyMethodDef BZ2Comp_methods[] = {
1561 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1562 BZ2Comp_compress__doc__},
1563 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1564 BZ2Comp_flush__doc__},
1565 {NULL, NULL} /* sentinel */
1569 /* ===================================================================== */
1570 /* Slot definitions for BZ2Comp_Type. */
1572 static int
1573 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1575 int compresslevel = 9;
1576 int bzerror;
1577 static char *kwlist[] = {"compresslevel", 0};
1579 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1580 kwlist, &compresslevel))
1581 return -1;
1583 if (compresslevel < 1 || compresslevel > 9) {
1584 PyErr_SetString(PyExc_ValueError,
1585 "compresslevel must be between 1 and 9");
1586 goto error;
1589 #ifdef WITH_THREAD
1590 self->lock = PyThread_allocate_lock();
1591 if (!self->lock)
1592 goto error;
1593 #endif
1595 memset(&self->bzs, 0, sizeof(bz_stream));
1596 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1597 if (bzerror != BZ_OK) {
1598 Util_CatchBZ2Error(bzerror);
1599 goto error;
1602 self->running = 1;
1604 return 0;
1605 error:
1606 #ifdef WITH_THREAD
1607 if (self->lock)
1608 PyThread_free_lock(self->lock);
1609 #endif
1610 return -1;
1613 static void
1614 BZ2Comp_dealloc(BZ2CompObject *self)
1616 #ifdef WITH_THREAD
1617 if (self->lock)
1618 PyThread_free_lock(self->lock);
1619 #endif
1620 BZ2_bzCompressEnd(&self->bzs);
1621 ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1625 /* ===================================================================== */
1626 /* BZ2Comp_Type definition. */
1628 PyDoc_STRVAR(BZ2Comp__doc__,
1629 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1631 Create a new compressor object. This object may be used to compress\n\
1632 data sequentially. If you want to compress data in one shot, use the\n\
1633 compress() function instead. The compresslevel parameter, if given,\n\
1634 must be a number between 1 and 9.\n\
1637 static PyTypeObject BZ2Comp_Type = {
1638 PyObject_HEAD_INIT(NULL)
1639 0, /*ob_size*/
1640 "bz2.BZ2Compressor", /*tp_name*/
1641 sizeof(BZ2CompObject), /*tp_basicsize*/
1642 0, /*tp_itemsize*/
1643 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1644 0, /*tp_print*/
1645 0, /*tp_getattr*/
1646 0, /*tp_setattr*/
1647 0, /*tp_compare*/
1648 0, /*tp_repr*/
1649 0, /*tp_as_number*/
1650 0, /*tp_as_sequence*/
1651 0, /*tp_as_mapping*/
1652 0, /*tp_hash*/
1653 0, /*tp_call*/
1654 0, /*tp_str*/
1655 0, /*tp_getattro*/
1656 0, /*tp_setattro*/
1657 0, /*tp_as_buffer*/
1658 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1659 BZ2Comp__doc__, /*tp_doc*/
1660 0, /*tp_traverse*/
1661 0, /*tp_clear*/
1662 0, /*tp_richcompare*/
1663 0, /*tp_weaklistoffset*/
1664 0, /*tp_iter*/
1665 0, /*tp_iternext*/
1666 BZ2Comp_methods, /*tp_methods*/
1667 0, /*tp_members*/
1668 0, /*tp_getset*/
1669 0, /*tp_base*/
1670 0, /*tp_dict*/
1671 0, /*tp_descr_get*/
1672 0, /*tp_descr_set*/
1673 0, /*tp_dictoffset*/
1674 (initproc)BZ2Comp_init, /*tp_init*/
1675 0, /*tp_alloc*/
1676 0, /*tp_new*/
1677 0, /*tp_free*/
1678 0, /*tp_is_gc*/
1682 /* ===================================================================== */
1683 /* Members of BZ2Decomp. */
1685 #define OFF(x) offsetof(BZ2DecompObject, x)
1687 static PyMemberDef BZ2Decomp_members[] = {
1688 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1689 {NULL} /* Sentinel */
1693 /* ===================================================================== */
1694 /* Methods of BZ2Decomp. */
1696 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1697 "decompress(data) -> string\n\
1699 Provide more data to the decompressor object. It will return chunks\n\
1700 of decompressed data whenever possible. If you try to decompress data\n\
1701 after the end of stream is found, EOFError will be raised. If any data\n\
1702 was found after the end of stream, it'll be ignored and saved in\n\
1703 unused_data attribute.\n\
1706 static PyObject *
1707 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1709 char *data;
1710 int datasize;
1711 int bufsize = SMALLCHUNK;
1712 LONG_LONG totalout;
1713 PyObject *ret = NULL;
1714 bz_stream *bzs = &self->bzs;
1715 int bzerror;
1717 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1718 return NULL;
1720 ACQUIRE_LOCK(self);
1721 if (!self->running) {
1722 PyErr_SetString(PyExc_EOFError, "end of stream was "
1723 "already found");
1724 goto error;
1727 ret = PyString_FromStringAndSize(NULL, bufsize);
1728 if (!ret)
1729 goto error;
1731 bzs->next_in = data;
1732 bzs->avail_in = datasize;
1733 bzs->next_out = BUF(ret);
1734 bzs->avail_out = bufsize;
1736 totalout = BZS_TOTAL_OUT(bzs);
1738 for (;;) {
1739 Py_BEGIN_ALLOW_THREADS
1740 bzerror = BZ2_bzDecompress(bzs);
1741 Py_END_ALLOW_THREADS
1742 if (bzerror == BZ_STREAM_END) {
1743 if (bzs->avail_in != 0) {
1744 Py_DECREF(self->unused_data);
1745 self->unused_data =
1746 PyString_FromStringAndSize(bzs->next_in,
1747 bzs->avail_in);
1749 self->running = 0;
1750 break;
1752 if (bzerror != BZ_OK) {
1753 Util_CatchBZ2Error(bzerror);
1754 goto error;
1756 if (bzs->avail_out == 0) {
1757 bufsize = Util_NewBufferSize(bufsize);
1758 if (_PyString_Resize(&ret, bufsize) < 0) {
1759 BZ2_bzDecompressEnd(bzs);
1760 goto error;
1762 bzs->next_out = BUF(ret);
1763 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1764 - totalout);
1765 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1766 } else if (bzs->avail_in == 0) {
1767 break;
1771 if (bzs->avail_out != 0)
1772 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1774 RELEASE_LOCK(self);
1775 return ret;
1777 error:
1778 RELEASE_LOCK(self);
1779 Py_XDECREF(ret);
1780 return NULL;
1783 static PyMethodDef BZ2Decomp_methods[] = {
1784 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1785 {NULL, NULL} /* sentinel */
1789 /* ===================================================================== */
1790 /* Slot definitions for BZ2Decomp_Type. */
1792 static int
1793 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1795 int bzerror;
1797 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1798 return -1;
1800 #ifdef WITH_THREAD
1801 self->lock = PyThread_allocate_lock();
1802 if (!self->lock)
1803 goto error;
1804 #endif
1806 self->unused_data = PyString_FromString("");
1807 if (!self->unused_data)
1808 goto error;
1810 memset(&self->bzs, 0, sizeof(bz_stream));
1811 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1812 if (bzerror != BZ_OK) {
1813 Util_CatchBZ2Error(bzerror);
1814 goto error;
1817 self->running = 1;
1819 return 0;
1821 error:
1822 #ifdef WITH_THREAD
1823 if (self->lock)
1824 PyThread_free_lock(self->lock);
1825 #endif
1826 Py_XDECREF(self->unused_data);
1827 return -1;
1830 static void
1831 BZ2Decomp_dealloc(BZ2DecompObject *self)
1833 #ifdef WITH_THREAD
1834 if (self->lock)
1835 PyThread_free_lock(self->lock);
1836 #endif
1837 Py_XDECREF(self->unused_data);
1838 BZ2_bzDecompressEnd(&self->bzs);
1839 ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1843 /* ===================================================================== */
1844 /* BZ2Decomp_Type definition. */
1846 PyDoc_STRVAR(BZ2Decomp__doc__,
1847 "BZ2Decompressor() -> decompressor object\n\
1849 Create a new decompressor object. This object may be used to decompress\n\
1850 data sequentially. If you want to decompress data in one shot, use the\n\
1851 decompress() function instead.\n\
1854 static PyTypeObject BZ2Decomp_Type = {
1855 PyObject_HEAD_INIT(NULL)
1856 0, /*ob_size*/
1857 "bz2.BZ2Decompressor", /*tp_name*/
1858 sizeof(BZ2DecompObject), /*tp_basicsize*/
1859 0, /*tp_itemsize*/
1860 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1861 0, /*tp_print*/
1862 0, /*tp_getattr*/
1863 0, /*tp_setattr*/
1864 0, /*tp_compare*/
1865 0, /*tp_repr*/
1866 0, /*tp_as_number*/
1867 0, /*tp_as_sequence*/
1868 0, /*tp_as_mapping*/
1869 0, /*tp_hash*/
1870 0, /*tp_call*/
1871 0, /*tp_str*/
1872 0, /*tp_getattro*/
1873 0, /*tp_setattro*/
1874 0, /*tp_as_buffer*/
1875 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1876 BZ2Decomp__doc__, /*tp_doc*/
1877 0, /*tp_traverse*/
1878 0, /*tp_clear*/
1879 0, /*tp_richcompare*/
1880 0, /*tp_weaklistoffset*/
1881 0, /*tp_iter*/
1882 0, /*tp_iternext*/
1883 BZ2Decomp_methods, /*tp_methods*/
1884 BZ2Decomp_members, /*tp_members*/
1885 0, /*tp_getset*/
1886 0, /*tp_base*/
1887 0, /*tp_dict*/
1888 0, /*tp_descr_get*/
1889 0, /*tp_descr_set*/
1890 0, /*tp_dictoffset*/
1891 (initproc)BZ2Decomp_init, /*tp_init*/
1892 0, /*tp_alloc*/
1893 0, /*tp_new*/
1894 0, /*tp_free*/
1895 0, /*tp_is_gc*/
1899 /* ===================================================================== */
1900 /* Module functions. */
1902 PyDoc_STRVAR(bz2_compress__doc__,
1903 "compress(data [, compresslevel=9]) -> string\n\
1905 Compress data in one shot. If you want to compress data sequentially,\n\
1906 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1907 given, must be a number between 1 and 9.\n\
1910 static PyObject *
1911 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1913 int compresslevel=9;
1914 char *data;
1915 int datasize;
1916 int bufsize;
1917 PyObject *ret = NULL;
1918 bz_stream _bzs;
1919 bz_stream *bzs = &_bzs;
1920 int bzerror;
1921 static char *kwlist[] = {"data", "compresslevel", 0};
1923 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1924 kwlist, &data, &datasize,
1925 &compresslevel))
1926 return NULL;
1928 if (compresslevel < 1 || compresslevel > 9) {
1929 PyErr_SetString(PyExc_ValueError,
1930 "compresslevel must be between 1 and 9");
1931 return NULL;
1934 /* Conforming to bz2 manual, this is large enough to fit compressed
1935 * data in one shot. We will check it later anyway. */
1936 bufsize = datasize + (datasize/100+1) + 600;
1938 ret = PyString_FromStringAndSize(NULL, bufsize);
1939 if (!ret)
1940 return NULL;
1942 memset(bzs, 0, sizeof(bz_stream));
1944 bzs->next_in = data;
1945 bzs->avail_in = datasize;
1946 bzs->next_out = BUF(ret);
1947 bzs->avail_out = bufsize;
1949 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1950 if (bzerror != BZ_OK) {
1951 Util_CatchBZ2Error(bzerror);
1952 Py_DECREF(ret);
1953 return NULL;
1956 for (;;) {
1957 Py_BEGIN_ALLOW_THREADS
1958 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1959 Py_END_ALLOW_THREADS
1960 if (bzerror == BZ_STREAM_END) {
1961 break;
1962 } else if (bzerror != BZ_FINISH_OK) {
1963 BZ2_bzCompressEnd(bzs);
1964 Util_CatchBZ2Error(bzerror);
1965 Py_DECREF(ret);
1966 return NULL;
1968 if (bzs->avail_out == 0) {
1969 bufsize = Util_NewBufferSize(bufsize);
1970 if (_PyString_Resize(&ret, bufsize) < 0) {
1971 BZ2_bzCompressEnd(bzs);
1972 Py_DECREF(ret);
1973 return NULL;
1975 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1976 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1980 if (bzs->avail_out != 0)
1981 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
1982 BZ2_bzCompressEnd(bzs);
1984 return ret;
1987 PyDoc_STRVAR(bz2_decompress__doc__,
1988 "decompress(data) -> decompressed data\n\
1990 Decompress data in one shot. If you want to decompress data sequentially,\n\
1991 use an instance of BZ2Decompressor instead.\n\
1994 static PyObject *
1995 bz2_decompress(PyObject *self, PyObject *args)
1997 char *data;
1998 int datasize;
1999 int bufsize = SMALLCHUNK;
2000 PyObject *ret;
2001 bz_stream _bzs;
2002 bz_stream *bzs = &_bzs;
2003 int bzerror;
2005 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2006 return NULL;
2008 if (datasize == 0)
2009 return PyString_FromString("");
2011 ret = PyString_FromStringAndSize(NULL, bufsize);
2012 if (!ret)
2013 return NULL;
2015 memset(bzs, 0, sizeof(bz_stream));
2017 bzs->next_in = data;
2018 bzs->avail_in = datasize;
2019 bzs->next_out = BUF(ret);
2020 bzs->avail_out = bufsize;
2022 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2023 if (bzerror != BZ_OK) {
2024 Util_CatchBZ2Error(bzerror);
2025 Py_DECREF(ret);
2026 return NULL;
2029 for (;;) {
2030 Py_BEGIN_ALLOW_THREADS
2031 bzerror = BZ2_bzDecompress(bzs);
2032 Py_END_ALLOW_THREADS
2033 if (bzerror == BZ_STREAM_END) {
2034 break;
2035 } else if (bzerror != BZ_OK) {
2036 BZ2_bzDecompressEnd(bzs);
2037 Util_CatchBZ2Error(bzerror);
2038 Py_DECREF(ret);
2039 return NULL;
2041 if (bzs->avail_out == 0) {
2042 bufsize = Util_NewBufferSize(bufsize);
2043 if (_PyString_Resize(&ret, bufsize) < 0) {
2044 BZ2_bzDecompressEnd(bzs);
2045 Py_DECREF(ret);
2046 return NULL;
2048 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2049 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2050 } else if (bzs->avail_in == 0) {
2051 BZ2_bzDecompressEnd(bzs);
2052 PyErr_SetString(PyExc_ValueError,
2053 "couldn't find end of stream");
2054 Py_DECREF(ret);
2055 return NULL;
2059 if (bzs->avail_out != 0)
2060 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2061 BZ2_bzDecompressEnd(bzs);
2063 return ret;
2066 static PyMethodDef bz2_methods[] = {
2067 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2068 bz2_compress__doc__},
2069 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2070 bz2_decompress__doc__},
2071 {NULL, NULL} /* sentinel */
2074 /* ===================================================================== */
2075 /* Initialization function. */
2077 PyDoc_STRVAR(bz2__doc__,
2078 "The python bz2 module provides a comprehensive interface for\n\
2079 the bz2 compression library. It implements a complete file\n\
2080 interface, one shot (de)compression functions, and types for\n\
2081 sequential (de)compression.\n\
2084 DL_EXPORT(void)
2085 initbz2(void)
2087 PyObject *m;
2089 BZ2File_Type.ob_type = &PyType_Type;
2090 BZ2File_Type.tp_base = &PyFile_Type;
2091 BZ2File_Type.tp_new = PyFile_Type.tp_new;
2092 BZ2File_Type.tp_getattro = PyObject_GenericGetAttr;
2093 BZ2File_Type.tp_setattro = PyObject_GenericSetAttr;
2094 BZ2File_Type.tp_alloc = PyType_GenericAlloc;
2095 BZ2File_Type.tp_free = _PyObject_Del;
2097 BZ2Comp_Type.ob_type = &PyType_Type;
2098 BZ2Comp_Type.tp_getattro = PyObject_GenericGetAttr;
2099 BZ2Comp_Type.tp_setattro = PyObject_GenericSetAttr;
2100 BZ2Comp_Type.tp_alloc = PyType_GenericAlloc;
2101 BZ2Comp_Type.tp_new = PyType_GenericNew;
2102 BZ2Comp_Type.tp_free = _PyObject_Del;
2104 BZ2Decomp_Type.ob_type = &PyType_Type;
2105 BZ2Decomp_Type.tp_getattro = PyObject_GenericGetAttr;
2106 BZ2Decomp_Type.tp_setattro = PyObject_GenericSetAttr;
2107 BZ2Decomp_Type.tp_alloc = PyType_GenericAlloc;
2108 BZ2Decomp_Type.tp_new = PyType_GenericNew;
2109 BZ2Decomp_Type.tp_free = _PyObject_Del;
2111 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2113 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2115 Py_INCREF(&BZ2File_Type);
2116 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2118 Py_INCREF(&BZ2Comp_Type);
2119 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2121 Py_INCREF(&BZ2Decomp_Type);
2122 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);