Move setting of ioready 'wait' earlier in call chain, to
[python/dscho.git] / Modules / bz2module.c
blob114070fab1044514aae50b799e7a785f44d2b46c
1 /*
3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
8 */
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
27 #define MODE_CLOSED 0
28 #define MODE_READ 1
29 #define MODE_READ_EOF 2
30 #define MODE_WRITE 3
32 #define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
34 #if SIZEOF_LONG >= 8
35 #define BZS_TOTAL_OUT(bzs) \
36 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
37 #elif SIZEOF_LONG_LONG >= 8
38 #define BZS_TOTAL_OUT(bzs) \
39 (((LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40 #else
41 #define BZS_TOTAL_OUT(bzs) \
42 bzs->total_out_lo32;
43 #endif
45 #ifdef WITH_THREAD
46 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
47 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
48 #else
49 #define ACQUIRE_LOCK(obj)
50 #define RELEASE_LOCK(obj)
51 #endif
53 #ifdef WITH_UNIVERSAL_NEWLINES
54 /* Bits in f_newlinetypes */
55 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
56 #define NEWLINE_CR 1 /* \r newline seen */
57 #define NEWLINE_LF 2 /* \n newline seen */
58 #define NEWLINE_CRLF 4 /* \r\n newline seen */
59 #endif
61 /* ===================================================================== */
62 /* Structure definitions. */
64 typedef struct {
65 PyObject_HEAD
66 PyObject *file;
68 char* f_buf; /* Allocated readahead buffer */
69 char* f_bufend; /* Points after last occupied position */
70 char* f_bufptr; /* Current buffer position */
72 int f_softspace; /* Flag used by 'print' command */
74 #ifdef WITH_UNIVERSAL_NEWLINES
75 int f_univ_newline; /* Handle any newline convention */
76 int f_newlinetypes; /* Types of newlines seen */
77 int f_skipnextlf; /* Skip next \n */
78 #endif
80 BZFILE *fp;
81 int mode;
82 long pos;
83 long size;
84 #ifdef WITH_THREAD
85 PyThread_type_lock lock;
86 #endif
87 } BZ2FileObject;
89 typedef struct {
90 PyObject_HEAD
91 bz_stream bzs;
92 int running;
93 #ifdef WITH_THREAD
94 PyThread_type_lock lock;
95 #endif
96 } BZ2CompObject;
98 typedef struct {
99 PyObject_HEAD
100 bz_stream bzs;
101 int running;
102 PyObject *unused_data;
103 #ifdef WITH_THREAD
104 PyThread_type_lock lock;
105 #endif
106 } BZ2DecompObject;
108 /* ===================================================================== */
109 /* Utility functions. */
111 static int
112 Util_CatchBZ2Error(int bzerror)
114 int ret = 0;
115 switch(bzerror) {
116 case BZ_OK:
117 case BZ_STREAM_END:
118 break;
120 case BZ_CONFIG_ERROR:
121 PyErr_SetString(PyExc_SystemError,
122 "the bz2 library was not compiled "
123 "correctly");
124 ret = 1;
125 break;
127 case BZ_PARAM_ERROR:
128 PyErr_SetString(PyExc_ValueError,
129 "the bz2 library has received wrong "
130 "parameters");
131 ret = 1;
132 break;
134 case BZ_MEM_ERROR:
135 PyErr_NoMemory();
136 ret = 1;
137 break;
139 case BZ_DATA_ERROR:
140 case BZ_DATA_ERROR_MAGIC:
141 PyErr_SetString(PyExc_IOError, "invalid data stream");
142 ret = 1;
143 break;
145 case BZ_IO_ERROR:
146 PyErr_SetString(PyExc_IOError, "unknown IO error");
147 ret = 1;
148 break;
150 case BZ_UNEXPECTED_EOF:
151 PyErr_SetString(PyExc_EOFError,
152 "compressed file ended before the "
153 "logical end-of-stream was detected");
154 ret = 1;
155 break;
157 case BZ_SEQUENCE_ERROR:
158 PyErr_SetString(PyExc_RuntimeError,
159 "wrong sequence of bz2 library "
160 "commands used");
161 ret = 1;
162 break;
164 return ret;
167 #if BUFSIZ < 8192
168 #define SMALLCHUNK 8192
169 #else
170 #define SMALLCHUNK BUFSIZ
171 #endif
173 #if SIZEOF_INT < 4
174 #define BIGCHUNK (512 * 32)
175 #else
176 #define BIGCHUNK (512 * 1024)
177 #endif
179 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
180 static size_t
181 Util_NewBufferSize(size_t currentsize)
183 if (currentsize > SMALLCHUNK) {
184 /* Keep doubling until we reach BIGCHUNK;
185 then keep adding BIGCHUNK. */
186 if (currentsize <= BIGCHUNK)
187 return currentsize + currentsize;
188 else
189 return currentsize + BIGCHUNK;
191 return currentsize + SMALLCHUNK;
194 /* This is a hacked version of Python's fileobject.c:get_line(). */
195 static PyObject *
196 Util_GetLine(BZ2FileObject *f, int n)
198 char c;
199 char *buf, *end;
200 size_t total_v_size; /* total # of slots in buffer */
201 size_t used_v_size; /* # used slots in buffer */
202 size_t increment; /* amount to increment the buffer */
203 PyObject *v;
204 int bzerror;
205 #ifdef WITH_UNIVERSAL_NEWLINES
206 int newlinetypes = f->f_newlinetypes;
207 int skipnextlf = f->f_skipnextlf;
208 int univ_newline = f->f_univ_newline;
209 #endif
211 total_v_size = n > 0 ? n : 100;
212 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
213 if (v == NULL)
214 return NULL;
216 buf = BUF(v);
217 end = buf + total_v_size;
219 for (;;) {
220 Py_BEGIN_ALLOW_THREADS
221 #ifdef WITH_UNIVERSAL_NEWLINES
222 if (univ_newline) {
223 while (1) {
224 BZ2_bzRead(&bzerror, f->fp, &c, 1);
225 f->pos++;
226 if (bzerror != BZ_OK || buf == end)
227 break;
228 if (skipnextlf) {
229 skipnextlf = 0;
230 if (c == '\n') {
231 /* Seeing a \n here with
232 * skipnextlf true means we
233 * saw a \r before.
235 newlinetypes |= NEWLINE_CRLF;
236 BZ2_bzRead(&bzerror, f->fp,
237 &c, 1);
238 if (bzerror != BZ_OK)
239 break;
240 } else {
241 newlinetypes |= NEWLINE_CR;
244 if (c == '\r') {
245 skipnextlf = 1;
246 c = '\n';
247 } else if ( c == '\n')
248 newlinetypes |= NEWLINE_LF;
249 *buf++ = c;
250 if (c == '\n') break;
252 if (bzerror == BZ_STREAM_END && skipnextlf)
253 newlinetypes |= NEWLINE_CR;
254 } else /* If not universal newlines use the normal loop */
255 #endif
256 do {
257 BZ2_bzRead(&bzerror, f->fp, &c, 1);
258 f->pos++;
259 *buf++ = c;
260 } while (bzerror == BZ_OK && c != '\n' && buf != end);
261 Py_END_ALLOW_THREADS
262 #ifdef WITH_UNIVERSAL_NEWLINES
263 f->f_newlinetypes = newlinetypes;
264 f->f_skipnextlf = skipnextlf;
265 #endif
266 if (bzerror == BZ_STREAM_END) {
267 f->size = f->pos;
268 f->mode = MODE_READ_EOF;
269 break;
270 } else if (bzerror != BZ_OK) {
271 Util_CatchBZ2Error(bzerror);
272 Py_DECREF(v);
273 return NULL;
275 if (c == '\n')
276 break;
277 /* Must be because buf == end */
278 if (n > 0)
279 break;
280 used_v_size = total_v_size;
281 increment = total_v_size >> 2; /* mild exponential growth */
282 total_v_size += increment;
283 if (total_v_size > INT_MAX) {
284 PyErr_SetString(PyExc_OverflowError,
285 "line is longer than a Python string can hold");
286 Py_DECREF(v);
287 return NULL;
289 if (_PyString_Resize(&v, total_v_size) < 0)
290 return NULL;
291 buf = BUF(v) + used_v_size;
292 end = BUF(v) + total_v_size;
295 used_v_size = buf - BUF(v);
296 if (used_v_size != total_v_size)
297 _PyString_Resize(&v, used_v_size);
298 return v;
301 #ifndef WITH_UNIVERSAL_NEWLINES
302 #define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
303 #else
304 /* This is a hacked version of Python's
305 * fileobject.c:Py_UniversalNewlineFread(). */
306 size_t
307 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
308 char* buf, size_t n, BZ2FileObject *f)
310 char *dst = buf;
311 int newlinetypes, skipnextlf;
313 assert(buf != NULL);
314 assert(stream != NULL);
316 if (!f->f_univ_newline)
317 return BZ2_bzRead(bzerror, stream, buf, n);
319 newlinetypes = f->f_newlinetypes;
320 skipnextlf = f->f_skipnextlf;
322 /* Invariant: n is the number of bytes remaining to be filled
323 * in the buffer.
325 while (n) {
326 size_t nread;
327 int shortread;
328 char *src = dst;
330 nread = BZ2_bzRead(bzerror, stream, dst, n);
331 assert(nread <= n);
332 n -= nread; /* assuming 1 byte out for each in; will adjust */
333 shortread = n != 0; /* true iff EOF or error */
334 while (nread--) {
335 char c = *src++;
336 if (c == '\r') {
337 /* Save as LF and set flag to skip next LF. */
338 *dst++ = '\n';
339 skipnextlf = 1;
341 else if (skipnextlf && c == '\n') {
342 /* Skip LF, and remember we saw CR LF. */
343 skipnextlf = 0;
344 newlinetypes |= NEWLINE_CRLF;
345 ++n;
347 else {
348 /* Normal char to be stored in buffer. Also
349 * update the newlinetypes flag if either this
350 * is an LF or the previous char was a CR.
352 if (c == '\n')
353 newlinetypes |= NEWLINE_LF;
354 else if (skipnextlf)
355 newlinetypes |= NEWLINE_CR;
356 *dst++ = c;
357 skipnextlf = 0;
360 if (shortread) {
361 /* If this is EOF, update type flags. */
362 if (skipnextlf && *bzerror == BZ_STREAM_END)
363 newlinetypes |= NEWLINE_CR;
364 break;
367 f->f_newlinetypes = newlinetypes;
368 f->f_skipnextlf = skipnextlf;
369 return dst - buf;
371 #endif
373 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
374 static void
375 Util_DropReadAhead(BZ2FileObject *f)
377 if (f->f_buf != NULL) {
378 PyMem_Free(f->f_buf);
379 f->f_buf = NULL;
383 /* This is a hacked version of Python's fileobject.c:readahead(). */
384 static int
385 Util_ReadAhead(BZ2FileObject *f, int bufsize)
387 int chunksize;
388 int bzerror;
390 if (f->f_buf != NULL) {
391 if((f->f_bufend - f->f_bufptr) >= 1)
392 return 0;
393 else
394 Util_DropReadAhead(f);
396 if (f->mode == MODE_READ_EOF) {
397 return -1;
399 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
400 return -1;
402 Py_BEGIN_ALLOW_THREADS
403 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
404 bufsize, f);
405 Py_END_ALLOW_THREADS
406 f->pos += chunksize;
407 if (bzerror == BZ_STREAM_END) {
408 f->size = f->pos;
409 f->mode = MODE_READ_EOF;
410 } else if (bzerror != BZ_OK) {
411 Util_CatchBZ2Error(bzerror);
412 Util_DropReadAhead(f);
413 return -1;
415 f->f_bufptr = f->f_buf;
416 f->f_bufend = f->f_buf + chunksize;
417 return 0;
420 /* This is a hacked version of Python's
421 * fileobject.c:readahead_get_line_skip(). */
422 static PyStringObject *
423 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
425 PyStringObject* s;
426 char *bufptr;
427 char *buf;
428 int len;
430 if (f->f_buf == NULL)
431 if (Util_ReadAhead(f, bufsize) < 0)
432 return NULL;
434 len = f->f_bufend - f->f_bufptr;
435 if (len == 0)
436 return (PyStringObject *)
437 PyString_FromStringAndSize(NULL, skip);
438 bufptr = memchr(f->f_bufptr, '\n', len);
439 if (bufptr != NULL) {
440 bufptr++; /* Count the '\n' */
441 len = bufptr - f->f_bufptr;
442 s = (PyStringObject *)
443 PyString_FromStringAndSize(NULL, skip+len);
444 if (s == NULL)
445 return NULL;
446 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
447 f->f_bufptr = bufptr;
448 if (bufptr == f->f_bufend)
449 Util_DropReadAhead(f);
450 } else {
451 bufptr = f->f_bufptr;
452 buf = f->f_buf;
453 f->f_buf = NULL; /* Force new readahead buffer */
454 s = Util_ReadAheadGetLineSkip(f, skip+len,
455 bufsize + (bufsize>>2));
456 if (s == NULL) {
457 PyMem_Free(buf);
458 return NULL;
460 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
461 PyMem_Free(buf);
463 return s;
466 /* ===================================================================== */
467 /* Methods of BZ2File. */
469 PyDoc_STRVAR(BZ2File_read__doc__,
470 "read([size]) -> string\n\
472 Read at most size uncompressed bytes, returned as a string. If the size\n\
473 argument is negative or omitted, read until EOF is reached.\n\
476 /* This is a hacked version of Python's fileobject.c:file_read(). */
477 static PyObject *
478 BZ2File_read(BZ2FileObject *self, PyObject *args)
480 long bytesrequested = -1;
481 size_t bytesread, buffersize, chunksize;
482 int bzerror;
483 PyObject *ret = NULL;
485 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
486 return NULL;
488 ACQUIRE_LOCK(self);
489 switch (self->mode) {
490 case MODE_READ:
491 break;
492 case MODE_READ_EOF:
493 ret = PyString_FromString("");
494 goto cleanup;
495 case MODE_CLOSED:
496 PyErr_SetString(PyExc_ValueError,
497 "I/O operation on closed file");
498 goto cleanup;
499 default:
500 PyErr_SetString(PyExc_IOError,
501 "file is not ready for reading");
502 goto cleanup;
505 if (bytesrequested < 0)
506 buffersize = Util_NewBufferSize((size_t)0);
507 else
508 buffersize = bytesrequested;
509 if (buffersize > INT_MAX) {
510 PyErr_SetString(PyExc_OverflowError,
511 "requested number of bytes is "
512 "more than a Python string can hold");
513 goto cleanup;
515 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
516 if (ret == NULL)
517 goto cleanup;
518 bytesread = 0;
520 for (;;) {
521 Py_BEGIN_ALLOW_THREADS
522 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
523 BUF(ret)+bytesread,
524 buffersize-bytesread,
525 self);
526 self->pos += chunksize;
527 Py_END_ALLOW_THREADS
528 bytesread += chunksize;
529 if (bzerror == BZ_STREAM_END) {
530 self->size = self->pos;
531 self->mode = MODE_READ_EOF;
532 break;
533 } else if (bzerror != BZ_OK) {
534 Util_CatchBZ2Error(bzerror);
535 Py_DECREF(ret);
536 ret = NULL;
537 goto cleanup;
539 if (bytesrequested < 0) {
540 buffersize = Util_NewBufferSize(buffersize);
541 if (_PyString_Resize(&ret, buffersize) < 0)
542 goto cleanup;
543 } else {
544 break;
547 if (bytesread != buffersize)
548 _PyString_Resize(&ret, bytesread);
550 cleanup:
551 RELEASE_LOCK(self);
552 return ret;
555 PyDoc_STRVAR(BZ2File_readline__doc__,
556 "readline([size]) -> string\n\
558 Return the next line from the file, as a string, retaining newline.\n\
559 A non-negative size argument will limit the maximum number of bytes to\n\
560 return (an incomplete line may be returned then). Return an empty\n\
561 string at EOF.\n\
564 static PyObject *
565 BZ2File_readline(BZ2FileObject *self, PyObject *args)
567 PyObject *ret = NULL;
568 int sizehint = -1;
570 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
571 return NULL;
573 ACQUIRE_LOCK(self);
574 switch (self->mode) {
575 case MODE_READ:
576 break;
577 case MODE_READ_EOF:
578 ret = PyString_FromString("");
579 goto cleanup;
580 case MODE_CLOSED:
581 PyErr_SetString(PyExc_ValueError,
582 "I/O operation on closed file");
583 goto cleanup;
584 default:
585 PyErr_SetString(PyExc_IOError,
586 "file is not ready for reading");
587 goto cleanup;
590 if (sizehint == 0)
591 ret = PyString_FromString("");
592 else
593 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
595 cleanup:
596 RELEASE_LOCK(self);
597 return ret;
600 PyDoc_STRVAR(BZ2File_readlines__doc__,
601 "readlines([size]) -> list\n\
603 Call readline() repeatedly and return a list of lines read.\n\
604 The optional size argument, if given, is an approximate bound on the\n\
605 total number of bytes in the lines returned.\n\
608 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
609 static PyObject *
610 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
612 long sizehint = 0;
613 PyObject *list = NULL;
614 PyObject *line;
615 char small_buffer[SMALLCHUNK];
616 char *buffer = small_buffer;
617 size_t buffersize = SMALLCHUNK;
618 PyObject *big_buffer = NULL;
619 size_t nfilled = 0;
620 size_t nread;
621 size_t totalread = 0;
622 char *p, *q, *end;
623 int err;
624 int shortread = 0;
625 int bzerror;
627 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
628 return NULL;
630 ACQUIRE_LOCK(self);
631 switch (self->mode) {
632 case MODE_READ:
633 break;
634 case MODE_READ_EOF:
635 list = PyList_New(0);
636 goto cleanup;
637 case MODE_CLOSED:
638 PyErr_SetString(PyExc_ValueError,
639 "I/O operation on closed file");
640 goto cleanup;
641 default:
642 PyErr_SetString(PyExc_IOError,
643 "file is not ready for reading");
644 goto cleanup;
647 if ((list = PyList_New(0)) == NULL)
648 goto cleanup;
650 for (;;) {
651 Py_BEGIN_ALLOW_THREADS
652 nread = Util_UnivNewlineRead(&bzerror, self->fp,
653 buffer+nfilled,
654 buffersize-nfilled, self);
655 self->pos += nread;
656 Py_END_ALLOW_THREADS
657 if (bzerror == BZ_STREAM_END) {
658 self->size = self->pos;
659 self->mode = MODE_READ_EOF;
660 if (nread == 0) {
661 sizehint = 0;
662 break;
664 shortread = 1;
665 } else if (bzerror != BZ_OK) {
666 Util_CatchBZ2Error(bzerror);
667 error:
668 Py_DECREF(list);
669 list = NULL;
670 goto cleanup;
672 totalread += nread;
673 p = memchr(buffer+nfilled, '\n', nread);
674 if (p == NULL) {
675 /* Need a larger buffer to fit this line */
676 nfilled += nread;
677 buffersize *= 2;
678 if (buffersize > INT_MAX) {
679 PyErr_SetString(PyExc_OverflowError,
680 "line is longer than a Python string can hold");
681 goto error;
683 if (big_buffer == NULL) {
684 /* Create the big buffer */
685 big_buffer = PyString_FromStringAndSize(
686 NULL, buffersize);
687 if (big_buffer == NULL)
688 goto error;
689 buffer = PyString_AS_STRING(big_buffer);
690 memcpy(buffer, small_buffer, nfilled);
692 else {
693 /* Grow the big buffer */
694 _PyString_Resize(&big_buffer, buffersize);
695 buffer = PyString_AS_STRING(big_buffer);
697 continue;
699 end = buffer+nfilled+nread;
700 q = buffer;
701 do {
702 /* Process complete lines */
703 p++;
704 line = PyString_FromStringAndSize(q, p-q);
705 if (line == NULL)
706 goto error;
707 err = PyList_Append(list, line);
708 Py_DECREF(line);
709 if (err != 0)
710 goto error;
711 q = p;
712 p = memchr(q, '\n', end-q);
713 } while (p != NULL);
714 /* Move the remaining incomplete line to the start */
715 nfilled = end-q;
716 memmove(buffer, q, nfilled);
717 if (sizehint > 0)
718 if (totalread >= (size_t)sizehint)
719 break;
720 if (shortread) {
721 sizehint = 0;
722 break;
725 if (nfilled != 0) {
726 /* Partial last line */
727 line = PyString_FromStringAndSize(buffer, nfilled);
728 if (line == NULL)
729 goto error;
730 if (sizehint > 0) {
731 /* Need to complete the last line */
732 PyObject *rest = Util_GetLine(self, 0);
733 if (rest == NULL) {
734 Py_DECREF(line);
735 goto error;
737 PyString_Concat(&line, rest);
738 Py_DECREF(rest);
739 if (line == NULL)
740 goto error;
742 err = PyList_Append(list, line);
743 Py_DECREF(line);
744 if (err != 0)
745 goto error;
748 cleanup:
749 RELEASE_LOCK(self);
750 if (big_buffer) {
751 Py_DECREF(big_buffer);
753 return list;
756 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
757 "xreadlines() -> self\n\
759 For backward compatibility. BZ2File objects now include the performance\n\
760 optimizations previously implemented in the xreadlines module.\n\
763 PyDoc_STRVAR(BZ2File_write__doc__,
764 "write(data) -> None\n\
766 Write the 'data' string to file. Note that due to buffering, close() may\n\
767 be needed before the file on disk reflects the data written.\n\
770 /* This is a hacked version of Python's fileobject.c:file_write(). */
771 static PyObject *
772 BZ2File_write(BZ2FileObject *self, PyObject *args)
774 PyObject *ret = NULL;
775 char *buf;
776 int len;
777 int bzerror;
779 if (!PyArg_ParseTuple(args, "s#", &buf, &len))
780 return NULL;
782 ACQUIRE_LOCK(self);
783 switch (self->mode) {
784 case MODE_WRITE:
785 break;
787 case MODE_CLOSED:
788 PyErr_SetString(PyExc_ValueError,
789 "I/O operation on closed file");
790 goto cleanup;;
792 default:
793 PyErr_SetString(PyExc_IOError,
794 "file is not ready for writing");
795 goto cleanup;;
798 self->f_softspace = 0;
800 Py_BEGIN_ALLOW_THREADS
801 BZ2_bzWrite (&bzerror, self->fp, buf, len);
802 self->pos += len;
803 Py_END_ALLOW_THREADS
805 if (bzerror != BZ_OK) {
806 Util_CatchBZ2Error(bzerror);
807 goto cleanup;
810 Py_INCREF(Py_None);
811 ret = Py_None;
813 cleanup:
814 RELEASE_LOCK(self);
815 return ret;
818 PyDoc_STRVAR(BZ2File_writelines__doc__,
819 "writelines(sequence_of_strings) -> None\n\
821 Write the sequence of strings to the file. Note that newlines are not\n\
822 added. The sequence can be any iterable object producing strings. This is\n\
823 equivalent to calling write() for each string.\n\
826 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
827 static PyObject *
828 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
830 #define CHUNKSIZE 1000
831 PyObject *list = NULL;
832 PyObject *iter = NULL;
833 PyObject *ret = NULL;
834 PyObject *line;
835 int i, j, index, len, islist;
836 int bzerror;
838 ACQUIRE_LOCK(self);
839 islist = PyList_Check(seq);
840 if (!islist) {
841 iter = PyObject_GetIter(seq);
842 if (iter == NULL) {
843 PyErr_SetString(PyExc_TypeError,
844 "writelines() requires an iterable argument");
845 goto error;
847 list = PyList_New(CHUNKSIZE);
848 if (list == NULL)
849 goto error;
852 /* Strategy: slurp CHUNKSIZE lines into a private list,
853 checking that they are all strings, then write that list
854 without holding the interpreter lock, then come back for more. */
855 for (index = 0; ; index += CHUNKSIZE) {
856 if (islist) {
857 Py_XDECREF(list);
858 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
859 if (list == NULL)
860 goto error;
861 j = PyList_GET_SIZE(list);
863 else {
864 for (j = 0; j < CHUNKSIZE; j++) {
865 line = PyIter_Next(iter);
866 if (line == NULL) {
867 if (PyErr_Occurred())
868 goto error;
869 break;
871 PyList_SetItem(list, j, line);
874 if (j == 0)
875 break;
877 /* Check that all entries are indeed strings. If not,
878 apply the same rules as for file.write() and
879 convert the rets to strings. This is slow, but
880 seems to be the only way since all conversion APIs
881 could potentially execute Python code. */
882 for (i = 0; i < j; i++) {
883 PyObject *v = PyList_GET_ITEM(list, i);
884 if (!PyString_Check(v)) {
885 const char *buffer;
886 int len;
887 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
888 PyErr_SetString(PyExc_TypeError,
889 "writelines() "
890 "argument must be "
891 "a sequence of "
892 "strings");
893 goto error;
895 line = PyString_FromStringAndSize(buffer,
896 len);
897 if (line == NULL)
898 goto error;
899 Py_DECREF(v);
900 PyList_SET_ITEM(list, i, line);
904 self->f_softspace = 0;
906 /* Since we are releasing the global lock, the
907 following code may *not* execute Python code. */
908 Py_BEGIN_ALLOW_THREADS
909 for (i = 0; i < j; i++) {
910 line = PyList_GET_ITEM(list, i);
911 len = PyString_GET_SIZE(line);
912 BZ2_bzWrite (&bzerror, self->fp,
913 PyString_AS_STRING(line), len);
914 if (bzerror != BZ_OK) {
915 Py_BLOCK_THREADS
916 Util_CatchBZ2Error(bzerror);
917 goto error;
920 Py_END_ALLOW_THREADS
922 if (j < CHUNKSIZE)
923 break;
926 Py_INCREF(Py_None);
927 ret = Py_None;
929 error:
930 RELEASE_LOCK(self);
931 Py_XDECREF(list);
932 Py_XDECREF(iter);
933 return ret;
934 #undef CHUNKSIZE
937 PyDoc_STRVAR(BZ2File_seek__doc__,
938 "seek(offset [, whence]) -> None\n\
940 Move to new file position. Argument offset is a byte count. Optional\n\
941 argument whence defaults to 0 (offset from start of file, offset\n\
942 should be >= 0); other values are 1 (move relative to current position,\n\
943 positive or negative), and 2 (move relative to end of file, usually\n\
944 negative, although many platforms allow seeking beyond the end of a file).\n\
946 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
947 the operation may be extremely slow.\n\
950 static PyObject *
951 BZ2File_seek(BZ2FileObject *self, PyObject *args)
953 int where = 0;
954 long offset;
955 char small_buffer[SMALLCHUNK];
956 char *buffer = small_buffer;
957 size_t buffersize = SMALLCHUNK;
958 int bytesread = 0;
959 int readsize;
960 int chunksize;
961 int bzerror;
962 int rewind = 0;
963 PyObject *ret = NULL;
965 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
966 return NULL;
968 ACQUIRE_LOCK(self);
969 Util_DropReadAhead(self);
970 switch (self->mode) {
971 case MODE_READ:
972 case MODE_READ_EOF:
973 break;
975 case MODE_CLOSED:
976 PyErr_SetString(PyExc_ValueError,
977 "I/O operation on closed file");
978 goto cleanup;;
980 default:
981 PyErr_SetString(PyExc_IOError,
982 "seek works only while reading");
983 goto cleanup;;
986 if (offset < 0) {
987 if (where == 1) {
988 offset = self->pos + offset;
989 rewind = 1;
990 } else if (where == 2) {
991 if (self->size == -1) {
992 assert(self->mode != MODE_READ_EOF);
993 for (;;) {
994 Py_BEGIN_ALLOW_THREADS
995 chunksize = Util_UnivNewlineRead(
996 &bzerror, self->fp,
997 buffer, buffersize,
998 self);
999 self->pos += chunksize;
1000 Py_END_ALLOW_THREADS
1002 bytesread += chunksize;
1003 if (bzerror == BZ_STREAM_END) {
1004 break;
1005 } else if (bzerror != BZ_OK) {
1006 Util_CatchBZ2Error(bzerror);
1007 goto cleanup;
1010 self->mode = MODE_READ_EOF;
1011 self->size = self->pos;
1012 bytesread = 0;
1014 offset = self->size + offset;
1015 if (offset >= self->pos)
1016 offset -= self->pos;
1017 else
1018 rewind = 1;
1020 if (offset < 0)
1021 offset = 0;
1022 } else if (where == 0) {
1023 if (offset >= self->pos)
1024 offset -= self->pos;
1025 else
1026 rewind = 1;
1029 if (rewind) {
1030 BZ2_bzReadClose(&bzerror, self->fp);
1031 if (bzerror != BZ_OK) {
1032 Util_CatchBZ2Error(bzerror);
1033 goto cleanup;
1035 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1036 if (!ret)
1037 goto cleanup;
1038 Py_DECREF(ret);
1039 ret = NULL;
1040 self->pos = 0;
1041 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1042 0, 0, NULL, 0);
1043 if (bzerror != BZ_OK) {
1044 Util_CatchBZ2Error(bzerror);
1045 goto cleanup;
1047 self->mode = MODE_READ;
1048 } else if (self->mode == MODE_READ_EOF) {
1049 goto exit;
1052 if (offset == 0)
1053 goto exit;
1055 /* Before getting here, offset must be set to the number of bytes
1056 * to walk forward. */
1057 for (;;) {
1058 if ((size_t)offset-bytesread > buffersize)
1059 readsize = buffersize;
1060 else
1061 readsize = offset-bytesread;
1062 Py_BEGIN_ALLOW_THREADS
1063 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1064 buffer, readsize, self);
1065 self->pos += chunksize;
1066 Py_END_ALLOW_THREADS
1067 bytesread += chunksize;
1068 if (bzerror == BZ_STREAM_END) {
1069 self->size = self->pos;
1070 self->mode = MODE_READ_EOF;
1071 break;
1072 } else if (bzerror != BZ_OK) {
1073 Util_CatchBZ2Error(bzerror);
1074 goto cleanup;
1076 if (bytesread == offset)
1077 break;
1080 exit:
1081 Py_INCREF(Py_None);
1082 ret = Py_None;
1084 cleanup:
1085 RELEASE_LOCK(self);
1086 return ret;
1089 PyDoc_STRVAR(BZ2File_tell__doc__,
1090 "tell() -> int\n\
1092 Return the current file position, an integer (may be a long integer).\n\
1095 static PyObject *
1096 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1098 PyObject *ret = NULL;
1100 if (self->mode == MODE_CLOSED) {
1101 PyErr_SetString(PyExc_ValueError,
1102 "I/O operation on closed file");
1103 goto cleanup;
1106 ret = PyInt_FromLong(self->pos);
1108 cleanup:
1109 return ret;
1112 PyDoc_STRVAR(BZ2File_close__doc__,
1113 "close() -> None or (perhaps) an integer\n\
1115 Close the file. Sets data attribute .closed to true. A closed file\n\
1116 cannot be used for further I/O operations. close() may be called more\n\
1117 than once without error.\n\
1120 static PyObject *
1121 BZ2File_close(BZ2FileObject *self)
1123 PyObject *ret = NULL;
1124 int bzerror = BZ_OK;
1126 ACQUIRE_LOCK(self);
1127 switch (self->mode) {
1128 case MODE_READ:
1129 case MODE_READ_EOF:
1130 BZ2_bzReadClose(&bzerror, self->fp);
1131 break;
1132 case MODE_WRITE:
1133 BZ2_bzWriteClose(&bzerror, self->fp,
1134 0, NULL, NULL);
1135 break;
1137 self->mode = MODE_CLOSED;
1138 ret = PyObject_CallMethod(self->file, "close", NULL);
1139 if (bzerror != BZ_OK) {
1140 Util_CatchBZ2Error(bzerror);
1141 Py_XDECREF(ret);
1142 ret = NULL;
1145 RELEASE_LOCK(self);
1146 return ret;
1149 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1151 static PyMethodDef BZ2File_methods[] = {
1152 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1153 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1154 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1155 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1156 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1157 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1158 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1159 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1160 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1161 {NULL, NULL} /* sentinel */
1165 /* ===================================================================== */
1166 /* Getters and setters of BZ2File. */
1168 #ifdef WITH_UNIVERSAL_NEWLINES
1169 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1170 static PyObject *
1171 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1173 switch (self->f_newlinetypes) {
1174 case NEWLINE_UNKNOWN:
1175 Py_INCREF(Py_None);
1176 return Py_None;
1177 case NEWLINE_CR:
1178 return PyString_FromString("\r");
1179 case NEWLINE_LF:
1180 return PyString_FromString("\n");
1181 case NEWLINE_CR|NEWLINE_LF:
1182 return Py_BuildValue("(ss)", "\r", "\n");
1183 case NEWLINE_CRLF:
1184 return PyString_FromString("\r\n");
1185 case NEWLINE_CR|NEWLINE_CRLF:
1186 return Py_BuildValue("(ss)", "\r", "\r\n");
1187 case NEWLINE_LF|NEWLINE_CRLF:
1188 return Py_BuildValue("(ss)", "\n", "\r\n");
1189 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1190 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1191 default:
1192 PyErr_Format(PyExc_SystemError,
1193 "Unknown newlines value 0x%x\n",
1194 self->f_newlinetypes);
1195 return NULL;
1198 #endif
1200 static PyObject *
1201 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1203 return PyInt_FromLong(self->mode == MODE_CLOSED);
1206 static PyObject *
1207 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1209 return PyObject_GetAttrString(self->file, "mode");
1212 static PyObject *
1213 BZ2File_get_name(BZ2FileObject *self, void *closure)
1215 return PyObject_GetAttrString(self->file, "name");
1218 static PyGetSetDef BZ2File_getset[] = {
1219 {"closed", (getter)BZ2File_get_closed, NULL,
1220 "True if the file is closed"},
1221 #ifdef WITH_UNIVERSAL_NEWLINES
1222 {"newlines", (getter)BZ2File_get_newlines, NULL,
1223 "end-of-line convention used in this file"},
1224 #endif
1225 {"mode", (getter)BZ2File_get_mode, NULL,
1226 "file mode ('r', 'w', or 'U')"},
1227 {"name", (getter)BZ2File_get_name, NULL,
1228 "file name"},
1229 {NULL} /* Sentinel */
1233 /* ===================================================================== */
1234 /* Members of BZ2File_Type. */
1236 #undef OFF
1237 #define OFF(x) offsetof(BZ2FileObject, x)
1239 static PyMemberDef BZ2File_members[] = {
1240 {"softspace", T_INT, OFF(f_softspace), 0,
1241 "flag indicating that a space needs to be printed; used by print"},
1242 {NULL} /* Sentinel */
1245 /* ===================================================================== */
1246 /* Slot definitions for BZ2File_Type. */
1248 static int
1249 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1251 static char *kwlist[] = {"filename", "mode", "buffering",
1252 "compresslevel", 0};
1253 PyObject *name;
1254 char *mode = "r";
1255 int buffering = -1;
1256 int compresslevel = 9;
1257 int bzerror;
1258 int mode_char = 0;
1260 self->size = -1;
1262 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1263 kwlist, &name, &mode, &buffering,
1264 &compresslevel))
1265 return -1;
1267 if (compresslevel < 1 || compresslevel > 9) {
1268 PyErr_SetString(PyExc_ValueError,
1269 "compresslevel must be between 1 and 9");
1270 return -1;
1273 for (;;) {
1274 int error = 0;
1275 switch (*mode) {
1276 case 'r':
1277 case 'w':
1278 if (mode_char)
1279 error = 1;
1280 mode_char = *mode;
1281 break;
1283 case 'b':
1284 break;
1286 case 'U':
1287 self->f_univ_newline = 1;
1288 break;
1290 default:
1291 error = 1;
1292 break;
1294 if (error) {
1295 PyErr_Format(PyExc_ValueError,
1296 "invalid mode char %c", *mode);
1297 return -1;
1299 mode++;
1300 if (*mode == '\0')
1301 break;
1304 mode = (mode_char == 'r') ? "rb" : "wb";
1306 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1307 name, mode, buffering);
1308 if (self->file == NULL)
1309 return -1;
1311 /* From now on, we have stuff to dealloc, so jump to error label
1312 * instead of returning */
1314 #ifdef WITH_THREAD
1315 self->lock = PyThread_allocate_lock();
1316 if (!self->lock)
1317 goto error;
1318 #endif
1320 if (mode_char == 'r')
1321 self->fp = BZ2_bzReadOpen(&bzerror,
1322 PyFile_AsFile(self->file),
1323 0, 0, NULL, 0);
1324 else
1325 self->fp = BZ2_bzWriteOpen(&bzerror,
1326 PyFile_AsFile(self->file),
1327 compresslevel, 0, 0);
1329 if (bzerror != BZ_OK) {
1330 Util_CatchBZ2Error(bzerror);
1331 goto error;
1334 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1336 return 0;
1338 error:
1339 Py_DECREF(self->file);
1340 #ifdef WITH_THREAD
1341 if (self->lock)
1342 PyThread_free_lock(self->lock);
1343 #endif
1344 return -1;
1347 static void
1348 BZ2File_dealloc(BZ2FileObject *self)
1350 int bzerror;
1351 #ifdef WITH_THREAD
1352 if (self->lock)
1353 PyThread_free_lock(self->lock);
1354 #endif
1355 switch (self->mode) {
1356 case MODE_READ:
1357 case MODE_READ_EOF:
1358 BZ2_bzReadClose(&bzerror, self->fp);
1359 break;
1360 case MODE_WRITE:
1361 BZ2_bzWriteClose(&bzerror, self->fp,
1362 0, NULL, NULL);
1363 break;
1365 Util_DropReadAhead(self);
1366 Py_DECREF(self->file);
1367 self->ob_type->tp_free((PyObject *)self);
1370 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1371 static PyObject *
1372 BZ2File_getiter(BZ2FileObject *self)
1374 if (self->mode == MODE_CLOSED) {
1375 PyErr_SetString(PyExc_ValueError,
1376 "I/O operation on closed file");
1377 return NULL;
1379 Py_INCREF((PyObject*)self);
1380 return (PyObject *)self;
1383 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1384 #define READAHEAD_BUFSIZE 8192
1385 static PyObject *
1386 BZ2File_iternext(BZ2FileObject *self)
1388 PyStringObject* ret;
1389 ACQUIRE_LOCK(self);
1390 if (self->mode == MODE_CLOSED) {
1391 PyErr_SetString(PyExc_ValueError,
1392 "I/O operation on closed file");
1393 return NULL;
1395 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1396 RELEASE_LOCK(self);
1397 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1398 Py_XDECREF(ret);
1399 return NULL;
1401 return (PyObject *)ret;
1404 /* ===================================================================== */
1405 /* BZ2File_Type definition. */
1407 PyDoc_VAR(BZ2File__doc__) =
1408 PyDoc_STR(
1409 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1411 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1412 writing. When opened for writing, the file will be created if it doesn't\n\
1413 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1414 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1415 is given, must be a number between 1 and 9.\n\
1417 #ifdef WITH_UNIVERSAL_NEWLINES
1418 PyDoc_STR(
1419 "\n\
1420 Add a 'U' to mode to open the file for input with universal newline\n\
1421 support. Any line ending in the input file will be seen as a '\\n' in\n\
1422 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1423 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1424 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1425 newlines are available only when reading.\n\
1427 #endif
1430 static PyTypeObject BZ2File_Type = {
1431 PyObject_HEAD_INIT(NULL)
1432 0, /*ob_size*/
1433 "bz2.BZ2File", /*tp_name*/
1434 sizeof(BZ2FileObject), /*tp_basicsize*/
1435 0, /*tp_itemsize*/
1436 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1437 0, /*tp_print*/
1438 0, /*tp_getattr*/
1439 0, /*tp_setattr*/
1440 0, /*tp_compare*/
1441 0, /*tp_repr*/
1442 0, /*tp_as_number*/
1443 0, /*tp_as_sequence*/
1444 0, /*tp_as_mapping*/
1445 0, /*tp_hash*/
1446 0, /*tp_call*/
1447 0, /*tp_str*/
1448 PyObject_GenericGetAttr,/*tp_getattro*/
1449 PyObject_GenericSetAttr,/*tp_setattro*/
1450 0, /*tp_as_buffer*/
1451 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1452 BZ2File__doc__, /*tp_doc*/
1453 0, /*tp_traverse*/
1454 0, /*tp_clear*/
1455 0, /*tp_richcompare*/
1456 0, /*tp_weaklistoffset*/
1457 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1458 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1459 BZ2File_methods, /*tp_methods*/
1460 BZ2File_members, /*tp_members*/
1461 BZ2File_getset, /*tp_getset*/
1462 0, /*tp_base*/
1463 0, /*tp_dict*/
1464 0, /*tp_descr_get*/
1465 0, /*tp_descr_set*/
1466 0, /*tp_dictoffset*/
1467 (initproc)BZ2File_init, /*tp_init*/
1468 PyType_GenericAlloc, /*tp_alloc*/
1469 PyType_GenericNew, /*tp_new*/
1470 _PyObject_Del, /*tp_free*/
1471 0, /*tp_is_gc*/
1475 /* ===================================================================== */
1476 /* Methods of BZ2Comp. */
1478 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1479 "compress(data) -> string\n\
1481 Provide more data to the compressor object. It will return chunks of\n\
1482 compressed data whenever possible. When you've finished providing data\n\
1483 to compress, call the flush() method to finish the compression process,\n\
1484 and return what is left in the internal buffers.\n\
1487 static PyObject *
1488 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1490 char *data;
1491 int datasize;
1492 int bufsize = SMALLCHUNK;
1493 LONG_LONG totalout;
1494 PyObject *ret = NULL;
1495 bz_stream *bzs = &self->bzs;
1496 int bzerror;
1498 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1499 return NULL;
1501 ACQUIRE_LOCK(self);
1502 if (!self->running) {
1503 PyErr_SetString(PyExc_ValueError,
1504 "this object was already flushed");
1505 goto error;
1508 ret = PyString_FromStringAndSize(NULL, bufsize);
1509 if (!ret)
1510 goto error;
1512 bzs->next_in = data;
1513 bzs->avail_in = datasize;
1514 bzs->next_out = BUF(ret);
1515 bzs->avail_out = bufsize;
1517 totalout = BZS_TOTAL_OUT(bzs);
1519 for (;;) {
1520 Py_BEGIN_ALLOW_THREADS
1521 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1522 Py_END_ALLOW_THREADS
1523 if (bzerror != BZ_RUN_OK) {
1524 Util_CatchBZ2Error(bzerror);
1525 goto error;
1527 if (bzs->avail_out == 0) {
1528 bufsize = Util_NewBufferSize(bufsize);
1529 if (_PyString_Resize(&ret, bufsize) < 0) {
1530 BZ2_bzCompressEnd(bzs);
1531 goto error;
1533 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1534 - totalout);
1535 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1536 } else if (bzs->avail_in == 0) {
1537 break;
1541 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1543 RELEASE_LOCK(self);
1544 return ret;
1546 error:
1547 RELEASE_LOCK(self);
1548 Py_XDECREF(ret);
1549 return NULL;
1552 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1553 "flush() -> string\n\
1555 Finish the compression process and return what is left in internal buffers.\n\
1556 You must not use the compressor object after calling this method.\n\
1559 static PyObject *
1560 BZ2Comp_flush(BZ2CompObject *self)
1562 int bufsize = SMALLCHUNK;
1563 PyObject *ret = NULL;
1564 bz_stream *bzs = &self->bzs;
1565 LONG_LONG totalout;
1566 int bzerror;
1568 ACQUIRE_LOCK(self);
1569 if (!self->running) {
1570 PyErr_SetString(PyExc_ValueError, "object was already "
1571 "flushed");
1572 goto error;
1574 self->running = 0;
1576 ret = PyString_FromStringAndSize(NULL, bufsize);
1577 if (!ret)
1578 goto error;
1580 bzs->next_out = BUF(ret);
1581 bzs->avail_out = bufsize;
1583 totalout = BZS_TOTAL_OUT(bzs);
1585 for (;;) {
1586 Py_BEGIN_ALLOW_THREADS
1587 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1588 Py_END_ALLOW_THREADS
1589 if (bzerror == BZ_STREAM_END) {
1590 break;
1591 } else if (bzerror != BZ_FINISH_OK) {
1592 Util_CatchBZ2Error(bzerror);
1593 goto error;
1595 if (bzs->avail_out == 0) {
1596 bufsize = Util_NewBufferSize(bufsize);
1597 if (_PyString_Resize(&ret, bufsize) < 0)
1598 goto error;
1599 bzs->next_out = BUF(ret);
1600 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1601 - totalout);
1602 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1606 if (bzs->avail_out != 0)
1607 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1609 RELEASE_LOCK(self);
1610 return ret;
1612 error:
1613 RELEASE_LOCK(self);
1614 Py_XDECREF(ret);
1615 return NULL;
1618 static PyMethodDef BZ2Comp_methods[] = {
1619 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1620 BZ2Comp_compress__doc__},
1621 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1622 BZ2Comp_flush__doc__},
1623 {NULL, NULL} /* sentinel */
1627 /* ===================================================================== */
1628 /* Slot definitions for BZ2Comp_Type. */
1630 static int
1631 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1633 int compresslevel = 9;
1634 int bzerror;
1635 static char *kwlist[] = {"compresslevel", 0};
1637 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1638 kwlist, &compresslevel))
1639 return -1;
1641 if (compresslevel < 1 || compresslevel > 9) {
1642 PyErr_SetString(PyExc_ValueError,
1643 "compresslevel must be between 1 and 9");
1644 goto error;
1647 #ifdef WITH_THREAD
1648 self->lock = PyThread_allocate_lock();
1649 if (!self->lock)
1650 goto error;
1651 #endif
1653 memset(&self->bzs, 0, sizeof(bz_stream));
1654 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1655 if (bzerror != BZ_OK) {
1656 Util_CatchBZ2Error(bzerror);
1657 goto error;
1660 self->running = 1;
1662 return 0;
1663 error:
1664 #ifdef WITH_THREAD
1665 if (self->lock)
1666 PyThread_free_lock(self->lock);
1667 #endif
1668 return -1;
1671 static void
1672 BZ2Comp_dealloc(BZ2CompObject *self)
1674 #ifdef WITH_THREAD
1675 if (self->lock)
1676 PyThread_free_lock(self->lock);
1677 #endif
1678 BZ2_bzCompressEnd(&self->bzs);
1679 self->ob_type->tp_free((PyObject *)self);
1683 /* ===================================================================== */
1684 /* BZ2Comp_Type definition. */
1686 PyDoc_STRVAR(BZ2Comp__doc__,
1687 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1689 Create a new compressor object. This object may be used to compress\n\
1690 data sequentially. If you want to compress data in one shot, use the\n\
1691 compress() function instead. The compresslevel parameter, if given,\n\
1692 must be a number between 1 and 9.\n\
1695 static PyTypeObject BZ2Comp_Type = {
1696 PyObject_HEAD_INIT(NULL)
1697 0, /*ob_size*/
1698 "bz2.BZ2Compressor", /*tp_name*/
1699 sizeof(BZ2CompObject), /*tp_basicsize*/
1700 0, /*tp_itemsize*/
1701 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1702 0, /*tp_print*/
1703 0, /*tp_getattr*/
1704 0, /*tp_setattr*/
1705 0, /*tp_compare*/
1706 0, /*tp_repr*/
1707 0, /*tp_as_number*/
1708 0, /*tp_as_sequence*/
1709 0, /*tp_as_mapping*/
1710 0, /*tp_hash*/
1711 0, /*tp_call*/
1712 0, /*tp_str*/
1713 PyObject_GenericGetAttr,/*tp_getattro*/
1714 PyObject_GenericSetAttr,/*tp_setattro*/
1715 0, /*tp_as_buffer*/
1716 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1717 BZ2Comp__doc__, /*tp_doc*/
1718 0, /*tp_traverse*/
1719 0, /*tp_clear*/
1720 0, /*tp_richcompare*/
1721 0, /*tp_weaklistoffset*/
1722 0, /*tp_iter*/
1723 0, /*tp_iternext*/
1724 BZ2Comp_methods, /*tp_methods*/
1725 0, /*tp_members*/
1726 0, /*tp_getset*/
1727 0, /*tp_base*/
1728 0, /*tp_dict*/
1729 0, /*tp_descr_get*/
1730 0, /*tp_descr_set*/
1731 0, /*tp_dictoffset*/
1732 (initproc)BZ2Comp_init, /*tp_init*/
1733 PyType_GenericAlloc, /*tp_alloc*/
1734 PyType_GenericNew, /*tp_new*/
1735 _PyObject_Del, /*tp_free*/
1736 0, /*tp_is_gc*/
1740 /* ===================================================================== */
1741 /* Members of BZ2Decomp. */
1743 #undef OFF
1744 #define OFF(x) offsetof(BZ2DecompObject, x)
1746 static PyMemberDef BZ2Decomp_members[] = {
1747 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1748 {NULL} /* Sentinel */
1752 /* ===================================================================== */
1753 /* Methods of BZ2Decomp. */
1755 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1756 "decompress(data) -> string\n\
1758 Provide more data to the decompressor object. It will return chunks\n\
1759 of decompressed data whenever possible. If you try to decompress data\n\
1760 after the end of stream is found, EOFError will be raised. If any data\n\
1761 was found after the end of stream, it'll be ignored and saved in\n\
1762 unused_data attribute.\n\
1765 static PyObject *
1766 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1768 char *data;
1769 int datasize;
1770 int bufsize = SMALLCHUNK;
1771 LONG_LONG totalout;
1772 PyObject *ret = NULL;
1773 bz_stream *bzs = &self->bzs;
1774 int bzerror;
1776 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1777 return NULL;
1779 ACQUIRE_LOCK(self);
1780 if (!self->running) {
1781 PyErr_SetString(PyExc_EOFError, "end of stream was "
1782 "already found");
1783 goto error;
1786 ret = PyString_FromStringAndSize(NULL, bufsize);
1787 if (!ret)
1788 goto error;
1790 bzs->next_in = data;
1791 bzs->avail_in = datasize;
1792 bzs->next_out = BUF(ret);
1793 bzs->avail_out = bufsize;
1795 totalout = BZS_TOTAL_OUT(bzs);
1797 for (;;) {
1798 Py_BEGIN_ALLOW_THREADS
1799 bzerror = BZ2_bzDecompress(bzs);
1800 Py_END_ALLOW_THREADS
1801 if (bzerror == BZ_STREAM_END) {
1802 if (bzs->avail_in != 0) {
1803 Py_DECREF(self->unused_data);
1804 self->unused_data =
1805 PyString_FromStringAndSize(bzs->next_in,
1806 bzs->avail_in);
1808 self->running = 0;
1809 break;
1811 if (bzerror != BZ_OK) {
1812 Util_CatchBZ2Error(bzerror);
1813 goto error;
1815 if (bzs->avail_out == 0) {
1816 bufsize = Util_NewBufferSize(bufsize);
1817 if (_PyString_Resize(&ret, bufsize) < 0) {
1818 BZ2_bzDecompressEnd(bzs);
1819 goto error;
1821 bzs->next_out = BUF(ret);
1822 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1823 - totalout);
1824 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1825 } else if (bzs->avail_in == 0) {
1826 break;
1830 if (bzs->avail_out != 0)
1831 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1833 RELEASE_LOCK(self);
1834 return ret;
1836 error:
1837 RELEASE_LOCK(self);
1838 Py_XDECREF(ret);
1839 return NULL;
1842 static PyMethodDef BZ2Decomp_methods[] = {
1843 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1844 {NULL, NULL} /* sentinel */
1848 /* ===================================================================== */
1849 /* Slot definitions for BZ2Decomp_Type. */
1851 static int
1852 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1854 int bzerror;
1856 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1857 return -1;
1859 #ifdef WITH_THREAD
1860 self->lock = PyThread_allocate_lock();
1861 if (!self->lock)
1862 goto error;
1863 #endif
1865 self->unused_data = PyString_FromString("");
1866 if (!self->unused_data)
1867 goto error;
1869 memset(&self->bzs, 0, sizeof(bz_stream));
1870 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1871 if (bzerror != BZ_OK) {
1872 Util_CatchBZ2Error(bzerror);
1873 goto error;
1876 self->running = 1;
1878 return 0;
1880 error:
1881 #ifdef WITH_THREAD
1882 if (self->lock)
1883 PyThread_free_lock(self->lock);
1884 #endif
1885 Py_XDECREF(self->unused_data);
1886 return -1;
1889 static void
1890 BZ2Decomp_dealloc(BZ2DecompObject *self)
1892 #ifdef WITH_THREAD
1893 if (self->lock)
1894 PyThread_free_lock(self->lock);
1895 #endif
1896 Py_XDECREF(self->unused_data);
1897 BZ2_bzDecompressEnd(&self->bzs);
1898 self->ob_type->tp_free((PyObject *)self);
1902 /* ===================================================================== */
1903 /* BZ2Decomp_Type definition. */
1905 PyDoc_STRVAR(BZ2Decomp__doc__,
1906 "BZ2Decompressor() -> decompressor object\n\
1908 Create a new decompressor object. This object may be used to decompress\n\
1909 data sequentially. If you want to decompress data in one shot, use the\n\
1910 decompress() function instead.\n\
1913 static PyTypeObject BZ2Decomp_Type = {
1914 PyObject_HEAD_INIT(NULL)
1915 0, /*ob_size*/
1916 "bz2.BZ2Decompressor", /*tp_name*/
1917 sizeof(BZ2DecompObject), /*tp_basicsize*/
1918 0, /*tp_itemsize*/
1919 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1920 0, /*tp_print*/
1921 0, /*tp_getattr*/
1922 0, /*tp_setattr*/
1923 0, /*tp_compare*/
1924 0, /*tp_repr*/
1925 0, /*tp_as_number*/
1926 0, /*tp_as_sequence*/
1927 0, /*tp_as_mapping*/
1928 0, /*tp_hash*/
1929 0, /*tp_call*/
1930 0, /*tp_str*/
1931 PyObject_GenericGetAttr,/*tp_getattro*/
1932 PyObject_GenericSetAttr,/*tp_setattro*/
1933 0, /*tp_as_buffer*/
1934 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1935 BZ2Decomp__doc__, /*tp_doc*/
1936 0, /*tp_traverse*/
1937 0, /*tp_clear*/
1938 0, /*tp_richcompare*/
1939 0, /*tp_weaklistoffset*/
1940 0, /*tp_iter*/
1941 0, /*tp_iternext*/
1942 BZ2Decomp_methods, /*tp_methods*/
1943 BZ2Decomp_members, /*tp_members*/
1944 0, /*tp_getset*/
1945 0, /*tp_base*/
1946 0, /*tp_dict*/
1947 0, /*tp_descr_get*/
1948 0, /*tp_descr_set*/
1949 0, /*tp_dictoffset*/
1950 (initproc)BZ2Decomp_init, /*tp_init*/
1951 PyType_GenericAlloc, /*tp_alloc*/
1952 PyType_GenericNew, /*tp_new*/
1953 _PyObject_Del, /*tp_free*/
1954 0, /*tp_is_gc*/
1958 /* ===================================================================== */
1959 /* Module functions. */
1961 PyDoc_STRVAR(bz2_compress__doc__,
1962 "compress(data [, compresslevel=9]) -> string\n\
1964 Compress data in one shot. If you want to compress data sequentially,\n\
1965 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1966 given, must be a number between 1 and 9.\n\
1969 static PyObject *
1970 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1972 int compresslevel=9;
1973 char *data;
1974 int datasize;
1975 int bufsize;
1976 PyObject *ret = NULL;
1977 bz_stream _bzs;
1978 bz_stream *bzs = &_bzs;
1979 int bzerror;
1980 static char *kwlist[] = {"data", "compresslevel", 0};
1982 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1983 kwlist, &data, &datasize,
1984 &compresslevel))
1985 return NULL;
1987 if (compresslevel < 1 || compresslevel > 9) {
1988 PyErr_SetString(PyExc_ValueError,
1989 "compresslevel must be between 1 and 9");
1990 return NULL;
1993 /* Conforming to bz2 manual, this is large enough to fit compressed
1994 * data in one shot. We will check it later anyway. */
1995 bufsize = datasize + (datasize/100+1) + 600;
1997 ret = PyString_FromStringAndSize(NULL, bufsize);
1998 if (!ret)
1999 return NULL;
2001 memset(bzs, 0, sizeof(bz_stream));
2003 bzs->next_in = data;
2004 bzs->avail_in = datasize;
2005 bzs->next_out = BUF(ret);
2006 bzs->avail_out = bufsize;
2008 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2009 if (bzerror != BZ_OK) {
2010 Util_CatchBZ2Error(bzerror);
2011 Py_DECREF(ret);
2012 return NULL;
2015 for (;;) {
2016 Py_BEGIN_ALLOW_THREADS
2017 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2018 Py_END_ALLOW_THREADS
2019 if (bzerror == BZ_STREAM_END) {
2020 break;
2021 } else if (bzerror != BZ_FINISH_OK) {
2022 BZ2_bzCompressEnd(bzs);
2023 Util_CatchBZ2Error(bzerror);
2024 Py_DECREF(ret);
2025 return NULL;
2027 if (bzs->avail_out == 0) {
2028 bufsize = Util_NewBufferSize(bufsize);
2029 if (_PyString_Resize(&ret, bufsize) < 0) {
2030 BZ2_bzCompressEnd(bzs);
2031 Py_DECREF(ret);
2032 return NULL;
2034 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2035 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2039 if (bzs->avail_out != 0)
2040 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2041 BZ2_bzCompressEnd(bzs);
2043 return ret;
2046 PyDoc_STRVAR(bz2_decompress__doc__,
2047 "decompress(data) -> decompressed data\n\
2049 Decompress data in one shot. If you want to decompress data sequentially,\n\
2050 use an instance of BZ2Decompressor instead.\n\
2053 static PyObject *
2054 bz2_decompress(PyObject *self, PyObject *args)
2056 char *data;
2057 int datasize;
2058 int bufsize = SMALLCHUNK;
2059 PyObject *ret;
2060 bz_stream _bzs;
2061 bz_stream *bzs = &_bzs;
2062 int bzerror;
2064 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2065 return NULL;
2067 if (datasize == 0)
2068 return PyString_FromString("");
2070 ret = PyString_FromStringAndSize(NULL, bufsize);
2071 if (!ret)
2072 return NULL;
2074 memset(bzs, 0, sizeof(bz_stream));
2076 bzs->next_in = data;
2077 bzs->avail_in = datasize;
2078 bzs->next_out = BUF(ret);
2079 bzs->avail_out = bufsize;
2081 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2082 if (bzerror != BZ_OK) {
2083 Util_CatchBZ2Error(bzerror);
2084 Py_DECREF(ret);
2085 return NULL;
2088 for (;;) {
2089 Py_BEGIN_ALLOW_THREADS
2090 bzerror = BZ2_bzDecompress(bzs);
2091 Py_END_ALLOW_THREADS
2092 if (bzerror == BZ_STREAM_END) {
2093 break;
2094 } else if (bzerror != BZ_OK) {
2095 BZ2_bzDecompressEnd(bzs);
2096 Util_CatchBZ2Error(bzerror);
2097 Py_DECREF(ret);
2098 return NULL;
2100 if (bzs->avail_out == 0) {
2101 bufsize = Util_NewBufferSize(bufsize);
2102 if (_PyString_Resize(&ret, bufsize) < 0) {
2103 BZ2_bzDecompressEnd(bzs);
2104 Py_DECREF(ret);
2105 return NULL;
2107 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2108 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2109 } else if (bzs->avail_in == 0) {
2110 BZ2_bzDecompressEnd(bzs);
2111 PyErr_SetString(PyExc_ValueError,
2112 "couldn't find end of stream");
2113 Py_DECREF(ret);
2114 return NULL;
2118 if (bzs->avail_out != 0)
2119 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2120 BZ2_bzDecompressEnd(bzs);
2122 return ret;
2125 static PyMethodDef bz2_methods[] = {
2126 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2127 bz2_compress__doc__},
2128 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2129 bz2_decompress__doc__},
2130 {NULL, NULL} /* sentinel */
2133 /* ===================================================================== */
2134 /* Initialization function. */
2136 PyDoc_STRVAR(bz2__doc__,
2137 "The python bz2 module provides a comprehensive interface for\n\
2138 the bz2 compression library. It implements a complete file\n\
2139 interface, one shot (de)compression functions, and types for\n\
2140 sequential (de)compression.\n\
2143 DL_EXPORT(void)
2144 initbz2(void)
2146 PyObject *m;
2148 BZ2File_Type.ob_type = &PyType_Type;
2149 BZ2Comp_Type.ob_type = &PyType_Type;
2150 BZ2Decomp_Type.ob_type = &PyType_Type;
2152 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2154 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2156 Py_INCREF(&BZ2File_Type);
2157 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2159 Py_INCREF(&BZ2Comp_Type);
2160 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2162 Py_INCREF(&BZ2Decomp_Type);
2163 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);