3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
13 #include "structmember.h"
19 static char __author__
[] =
20 "The bz2 python module was written by:\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
29 #define MODE_READ_EOF 2
32 #define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
35 #define BZS_TOTAL_OUT(bzs) \
36 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
37 #elif SIZEOF_LONG_LONG >= 8
38 #define BZS_TOTAL_OUT(bzs) \
39 (((LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
41 #define BZS_TOTAL_OUT(bzs) \
46 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
47 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
49 #define ACQUIRE_LOCK(obj)
50 #define RELEASE_LOCK(obj)
53 #ifdef WITH_UNIVERSAL_NEWLINES
54 /* Bits in f_newlinetypes */
55 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
56 #define NEWLINE_CR 1 /* \r newline seen */
57 #define NEWLINE_LF 2 /* \n newline seen */
58 #define NEWLINE_CRLF 4 /* \r\n newline seen */
61 /* ===================================================================== */
62 /* Structure definitions. */
68 char* f_buf
; /* Allocated readahead buffer */
69 char* f_bufend
; /* Points after last occupied position */
70 char* f_bufptr
; /* Current buffer position */
72 int f_softspace
; /* Flag used by 'print' command */
74 #ifdef WITH_UNIVERSAL_NEWLINES
75 int f_univ_newline
; /* Handle any newline convention */
76 int f_newlinetypes
; /* Types of newlines seen */
77 int f_skipnextlf
; /* Skip next \n */
85 PyThread_type_lock lock
;
94 PyThread_type_lock lock
;
102 PyObject
*unused_data
;
104 PyThread_type_lock lock
;
108 /* ===================================================================== */
109 /* Utility functions. */
112 Util_CatchBZ2Error(int bzerror
)
120 case BZ_CONFIG_ERROR
:
121 PyErr_SetString(PyExc_SystemError
,
122 "the bz2 library was not compiled "
128 PyErr_SetString(PyExc_ValueError
,
129 "the bz2 library has received wrong "
140 case BZ_DATA_ERROR_MAGIC
:
141 PyErr_SetString(PyExc_IOError
, "invalid data stream");
146 PyErr_SetString(PyExc_IOError
, "unknown IO error");
150 case BZ_UNEXPECTED_EOF
:
151 PyErr_SetString(PyExc_EOFError
,
152 "compressed file ended before the "
153 "logical end-of-stream was detected");
157 case BZ_SEQUENCE_ERROR
:
158 PyErr_SetString(PyExc_RuntimeError
,
159 "wrong sequence of bz2 library "
168 #define SMALLCHUNK 8192
170 #define SMALLCHUNK BUFSIZ
174 #define BIGCHUNK (512 * 32)
176 #define BIGCHUNK (512 * 1024)
179 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
181 Util_NewBufferSize(size_t currentsize
)
183 if (currentsize
> SMALLCHUNK
) {
184 /* Keep doubling until we reach BIGCHUNK;
185 then keep adding BIGCHUNK. */
186 if (currentsize
<= BIGCHUNK
)
187 return currentsize
+ currentsize
;
189 return currentsize
+ BIGCHUNK
;
191 return currentsize
+ SMALLCHUNK
;
194 /* This is a hacked version of Python's fileobject.c:get_line(). */
196 Util_GetLine(BZ2FileObject
*f
, int n
)
200 size_t total_v_size
; /* total # of slots in buffer */
201 size_t used_v_size
; /* # used slots in buffer */
202 size_t increment
; /* amount to increment the buffer */
205 #ifdef WITH_UNIVERSAL_NEWLINES
206 int newlinetypes
= f
->f_newlinetypes
;
207 int skipnextlf
= f
->f_skipnextlf
;
208 int univ_newline
= f
->f_univ_newline
;
211 total_v_size
= n
> 0 ? n
: 100;
212 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
217 end
= buf
+ total_v_size
;
220 Py_BEGIN_ALLOW_THREADS
221 #ifdef WITH_UNIVERSAL_NEWLINES
224 BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
226 if (bzerror
!= BZ_OK
|| buf
== end
)
231 /* Seeing a \n here with
232 * skipnextlf true means we
235 newlinetypes
|= NEWLINE_CRLF
;
236 BZ2_bzRead(&bzerror
, f
->fp
,
238 if (bzerror
!= BZ_OK
)
241 newlinetypes
|= NEWLINE_CR
;
247 } else if ( c
== '\n')
248 newlinetypes
|= NEWLINE_LF
;
250 if (c
== '\n') break;
252 if (bzerror
== BZ_STREAM_END
&& skipnextlf
)
253 newlinetypes
|= NEWLINE_CR
;
254 } else /* If not universal newlines use the normal loop */
257 BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
260 } while (bzerror
== BZ_OK
&& c
!= '\n' && buf
!= end
);
262 #ifdef WITH_UNIVERSAL_NEWLINES
263 f
->f_newlinetypes
= newlinetypes
;
264 f
->f_skipnextlf
= skipnextlf
;
266 if (bzerror
== BZ_STREAM_END
) {
268 f
->mode
= MODE_READ_EOF
;
270 } else if (bzerror
!= BZ_OK
) {
271 Util_CatchBZ2Error(bzerror
);
277 /* Must be because buf == end */
280 used_v_size
= total_v_size
;
281 increment
= total_v_size
>> 2; /* mild exponential growth */
282 total_v_size
+= increment
;
283 if (total_v_size
> INT_MAX
) {
284 PyErr_SetString(PyExc_OverflowError
,
285 "line is longer than a Python string can hold");
289 if (_PyString_Resize(&v
, total_v_size
) < 0)
291 buf
= BUF(v
) + used_v_size
;
292 end
= BUF(v
) + total_v_size
;
295 used_v_size
= buf
- BUF(v
);
296 if (used_v_size
!= total_v_size
)
297 _PyString_Resize(&v
, used_v_size
);
301 #ifndef WITH_UNIVERSAL_NEWLINES
302 #define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
304 /* This is a hacked version of Python's
305 * fileobject.c:Py_UniversalNewlineFread(). */
307 Util_UnivNewlineRead(int *bzerror
, BZFILE
*stream
,
308 char* buf
, size_t n
, BZ2FileObject
*f
)
311 int newlinetypes
, skipnextlf
;
314 assert(stream
!= NULL
);
316 if (!f
->f_univ_newline
)
317 return BZ2_bzRead(bzerror
, stream
, buf
, n
);
319 newlinetypes
= f
->f_newlinetypes
;
320 skipnextlf
= f
->f_skipnextlf
;
322 /* Invariant: n is the number of bytes remaining to be filled
330 nread
= BZ2_bzRead(bzerror
, stream
, dst
, n
);
332 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
333 shortread
= n
!= 0; /* true iff EOF or error */
337 /* Save as LF and set flag to skip next LF. */
341 else if (skipnextlf
&& c
== '\n') {
342 /* Skip LF, and remember we saw CR LF. */
344 newlinetypes
|= NEWLINE_CRLF
;
348 /* Normal char to be stored in buffer. Also
349 * update the newlinetypes flag if either this
350 * is an LF or the previous char was a CR.
353 newlinetypes
|= NEWLINE_LF
;
355 newlinetypes
|= NEWLINE_CR
;
361 /* If this is EOF, update type flags. */
362 if (skipnextlf
&& *bzerror
== BZ_STREAM_END
)
363 newlinetypes
|= NEWLINE_CR
;
367 f
->f_newlinetypes
= newlinetypes
;
368 f
->f_skipnextlf
= skipnextlf
;
373 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
375 Util_DropReadAhead(BZ2FileObject
*f
)
377 if (f
->f_buf
!= NULL
) {
378 PyMem_Free(f
->f_buf
);
383 /* This is a hacked version of Python's fileobject.c:readahead(). */
385 Util_ReadAhead(BZ2FileObject
*f
, int bufsize
)
390 if (f
->f_buf
!= NULL
) {
391 if((f
->f_bufend
- f
->f_bufptr
) >= 1)
394 Util_DropReadAhead(f
);
396 if (f
->mode
== MODE_READ_EOF
) {
399 if ((f
->f_buf
= PyMem_Malloc(bufsize
)) == NULL
) {
402 Py_BEGIN_ALLOW_THREADS
403 chunksize
= Util_UnivNewlineRead(&bzerror
, f
->fp
, f
->f_buf
,
407 if (bzerror
== BZ_STREAM_END
) {
409 f
->mode
= MODE_READ_EOF
;
410 } else if (bzerror
!= BZ_OK
) {
411 Util_CatchBZ2Error(bzerror
);
412 Util_DropReadAhead(f
);
415 f
->f_bufptr
= f
->f_buf
;
416 f
->f_bufend
= f
->f_buf
+ chunksize
;
420 /* This is a hacked version of Python's
421 * fileobject.c:readahead_get_line_skip(). */
422 static PyStringObject
*
423 Util_ReadAheadGetLineSkip(BZ2FileObject
*f
, int skip
, int bufsize
)
430 if (f
->f_buf
== NULL
)
431 if (Util_ReadAhead(f
, bufsize
) < 0)
434 len
= f
->f_bufend
- f
->f_bufptr
;
436 return (PyStringObject
*)
437 PyString_FromStringAndSize(NULL
, skip
);
438 bufptr
= memchr(f
->f_bufptr
, '\n', len
);
439 if (bufptr
!= NULL
) {
440 bufptr
++; /* Count the '\n' */
441 len
= bufptr
- f
->f_bufptr
;
442 s
= (PyStringObject
*)
443 PyString_FromStringAndSize(NULL
, skip
+len
);
446 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
447 f
->f_bufptr
= bufptr
;
448 if (bufptr
== f
->f_bufend
)
449 Util_DropReadAhead(f
);
451 bufptr
= f
->f_bufptr
;
453 f
->f_buf
= NULL
; /* Force new readahead buffer */
454 s
= Util_ReadAheadGetLineSkip(f
, skip
+len
,
455 bufsize
+ (bufsize
>>2));
460 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
466 /* ===================================================================== */
467 /* Methods of BZ2File. */
469 PyDoc_STRVAR(BZ2File_read__doc__
,
470 "read([size]) -> string\n\
472 Read at most size uncompressed bytes, returned as a string. If the size\n\
473 argument is negative or omitted, read until EOF is reached.\n\
476 /* This is a hacked version of Python's fileobject.c:file_read(). */
478 BZ2File_read(BZ2FileObject
*self
, PyObject
*args
)
480 long bytesrequested
= -1;
481 size_t bytesread
, buffersize
, chunksize
;
483 PyObject
*ret
= NULL
;
485 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
489 switch (self
->mode
) {
493 ret
= PyString_FromString("");
496 PyErr_SetString(PyExc_ValueError
,
497 "I/O operation on closed file");
500 PyErr_SetString(PyExc_IOError
,
501 "file is not ready for reading");
505 if (bytesrequested
< 0)
506 buffersize
= Util_NewBufferSize((size_t)0);
508 buffersize
= bytesrequested
;
509 if (buffersize
> INT_MAX
) {
510 PyErr_SetString(PyExc_OverflowError
,
511 "requested number of bytes is "
512 "more than a Python string can hold");
515 ret
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
521 Py_BEGIN_ALLOW_THREADS
522 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
524 buffersize
-bytesread
,
526 self
->pos
+= chunksize
;
528 bytesread
+= chunksize
;
529 if (bzerror
== BZ_STREAM_END
) {
530 self
->size
= self
->pos
;
531 self
->mode
= MODE_READ_EOF
;
533 } else if (bzerror
!= BZ_OK
) {
534 Util_CatchBZ2Error(bzerror
);
539 if (bytesrequested
< 0) {
540 buffersize
= Util_NewBufferSize(buffersize
);
541 if (_PyString_Resize(&ret
, buffersize
) < 0)
547 if (bytesread
!= buffersize
)
548 _PyString_Resize(&ret
, bytesread
);
555 PyDoc_STRVAR(BZ2File_readline__doc__
,
556 "readline([size]) -> string\n\
558 Return the next line from the file, as a string, retaining newline.\n\
559 A non-negative size argument will limit the maximum number of bytes to\n\
560 return (an incomplete line may be returned then). Return an empty\n\
565 BZ2File_readline(BZ2FileObject
*self
, PyObject
*args
)
567 PyObject
*ret
= NULL
;
570 if (!PyArg_ParseTuple(args
, "|i:readline", &sizehint
))
574 switch (self
->mode
) {
578 ret
= PyString_FromString("");
581 PyErr_SetString(PyExc_ValueError
,
582 "I/O operation on closed file");
585 PyErr_SetString(PyExc_IOError
,
586 "file is not ready for reading");
591 ret
= PyString_FromString("");
593 ret
= Util_GetLine(self
, (sizehint
< 0) ? 0 : sizehint
);
600 PyDoc_STRVAR(BZ2File_readlines__doc__
,
601 "readlines([size]) -> list\n\
603 Call readline() repeatedly and return a list of lines read.\n\
604 The optional size argument, if given, is an approximate bound on the\n\
605 total number of bytes in the lines returned.\n\
608 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
610 BZ2File_readlines(BZ2FileObject
*self
, PyObject
*args
)
613 PyObject
*list
= NULL
;
615 char small_buffer
[SMALLCHUNK
];
616 char *buffer
= small_buffer
;
617 size_t buffersize
= SMALLCHUNK
;
618 PyObject
*big_buffer
= NULL
;
621 size_t totalread
= 0;
627 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
631 switch (self
->mode
) {
635 list
= PyList_New(0);
638 PyErr_SetString(PyExc_ValueError
,
639 "I/O operation on closed file");
642 PyErr_SetString(PyExc_IOError
,
643 "file is not ready for reading");
647 if ((list
= PyList_New(0)) == NULL
)
651 Py_BEGIN_ALLOW_THREADS
652 nread
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
654 buffersize
-nfilled
, self
);
657 if (bzerror
== BZ_STREAM_END
) {
658 self
->size
= self
->pos
;
659 self
->mode
= MODE_READ_EOF
;
665 } else if (bzerror
!= BZ_OK
) {
666 Util_CatchBZ2Error(bzerror
);
673 p
= memchr(buffer
+nfilled
, '\n', nread
);
675 /* Need a larger buffer to fit this line */
678 if (buffersize
> INT_MAX
) {
679 PyErr_SetString(PyExc_OverflowError
,
680 "line is longer than a Python string can hold");
683 if (big_buffer
== NULL
) {
684 /* Create the big buffer */
685 big_buffer
= PyString_FromStringAndSize(
687 if (big_buffer
== NULL
)
689 buffer
= PyString_AS_STRING(big_buffer
);
690 memcpy(buffer
, small_buffer
, nfilled
);
693 /* Grow the big buffer */
694 _PyString_Resize(&big_buffer
, buffersize
);
695 buffer
= PyString_AS_STRING(big_buffer
);
699 end
= buffer
+nfilled
+nread
;
702 /* Process complete lines */
704 line
= PyString_FromStringAndSize(q
, p
-q
);
707 err
= PyList_Append(list
, line
);
712 p
= memchr(q
, '\n', end
-q
);
714 /* Move the remaining incomplete line to the start */
716 memmove(buffer
, q
, nfilled
);
718 if (totalread
>= (size_t)sizehint
)
726 /* Partial last line */
727 line
= PyString_FromStringAndSize(buffer
, nfilled
);
731 /* Need to complete the last line */
732 PyObject
*rest
= Util_GetLine(self
, 0);
737 PyString_Concat(&line
, rest
);
742 err
= PyList_Append(list
, line
);
751 Py_DECREF(big_buffer
);
756 PyDoc_STRVAR(BZ2File_xreadlines__doc__
,
757 "xreadlines() -> self\n\
759 For backward compatibility. BZ2File objects now include the performance\n\
760 optimizations previously implemented in the xreadlines module.\n\
763 PyDoc_STRVAR(BZ2File_write__doc__
,
764 "write(data) -> None\n\
766 Write the 'data' string to file. Note that due to buffering, close() may\n\
767 be needed before the file on disk reflects the data written.\n\
770 /* This is a hacked version of Python's fileobject.c:file_write(). */
772 BZ2File_write(BZ2FileObject
*self
, PyObject
*args
)
774 PyObject
*ret
= NULL
;
779 if (!PyArg_ParseTuple(args
, "s#", &buf
, &len
))
783 switch (self
->mode
) {
788 PyErr_SetString(PyExc_ValueError
,
789 "I/O operation on closed file");
793 PyErr_SetString(PyExc_IOError
,
794 "file is not ready for writing");
798 self
->f_softspace
= 0;
800 Py_BEGIN_ALLOW_THREADS
801 BZ2_bzWrite (&bzerror
, self
->fp
, buf
, len
);
805 if (bzerror
!= BZ_OK
) {
806 Util_CatchBZ2Error(bzerror
);
818 PyDoc_STRVAR(BZ2File_writelines__doc__
,
819 "writelines(sequence_of_strings) -> None\n\
821 Write the sequence of strings to the file. Note that newlines are not\n\
822 added. The sequence can be any iterable object producing strings. This is\n\
823 equivalent to calling write() for each string.\n\
826 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
828 BZ2File_writelines(BZ2FileObject
*self
, PyObject
*seq
)
830 #define CHUNKSIZE 1000
831 PyObject
*list
= NULL
;
832 PyObject
*iter
= NULL
;
833 PyObject
*ret
= NULL
;
835 int i
, j
, index
, len
, islist
;
839 islist
= PyList_Check(seq
);
841 iter
= PyObject_GetIter(seq
);
843 PyErr_SetString(PyExc_TypeError
,
844 "writelines() requires an iterable argument");
847 list
= PyList_New(CHUNKSIZE
);
852 /* Strategy: slurp CHUNKSIZE lines into a private list,
853 checking that they are all strings, then write that list
854 without holding the interpreter lock, then come back for more. */
855 for (index
= 0; ; index
+= CHUNKSIZE
) {
858 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
861 j
= PyList_GET_SIZE(list
);
864 for (j
= 0; j
< CHUNKSIZE
; j
++) {
865 line
= PyIter_Next(iter
);
867 if (PyErr_Occurred())
871 PyList_SetItem(list
, j
, line
);
877 /* Check that all entries are indeed strings. If not,
878 apply the same rules as for file.write() and
879 convert the rets to strings. This is slow, but
880 seems to be the only way since all conversion APIs
881 could potentially execute Python code. */
882 for (i
= 0; i
< j
; i
++) {
883 PyObject
*v
= PyList_GET_ITEM(list
, i
);
884 if (!PyString_Check(v
)) {
887 if (PyObject_AsCharBuffer(v
, &buffer
, &len
)) {
888 PyErr_SetString(PyExc_TypeError
,
895 line
= PyString_FromStringAndSize(buffer
,
900 PyList_SET_ITEM(list
, i
, line
);
904 self
->f_softspace
= 0;
906 /* Since we are releasing the global lock, the
907 following code may *not* execute Python code. */
908 Py_BEGIN_ALLOW_THREADS
909 for (i
= 0; i
< j
; i
++) {
910 line
= PyList_GET_ITEM(list
, i
);
911 len
= PyString_GET_SIZE(line
);
912 BZ2_bzWrite (&bzerror
, self
->fp
,
913 PyString_AS_STRING(line
), len
);
914 if (bzerror
!= BZ_OK
) {
916 Util_CatchBZ2Error(bzerror
);
937 PyDoc_STRVAR(BZ2File_seek__doc__
,
938 "seek(offset [, whence]) -> None\n\
940 Move to new file position. Argument offset is a byte count. Optional\n\
941 argument whence defaults to 0 (offset from start of file, offset\n\
942 should be >= 0); other values are 1 (move relative to current position,\n\
943 positive or negative), and 2 (move relative to end of file, usually\n\
944 negative, although many platforms allow seeking beyond the end of a file).\n\
946 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
947 the operation may be extremely slow.\n\
951 BZ2File_seek(BZ2FileObject
*self
, PyObject
*args
)
955 char small_buffer
[SMALLCHUNK
];
956 char *buffer
= small_buffer
;
957 size_t buffersize
= SMALLCHUNK
;
963 PyObject
*ret
= NULL
;
965 if (!PyArg_ParseTuple(args
, "l|i:seek", &offset
, &where
))
969 Util_DropReadAhead(self
);
970 switch (self
->mode
) {
976 PyErr_SetString(PyExc_ValueError
,
977 "I/O operation on closed file");
981 PyErr_SetString(PyExc_IOError
,
982 "seek works only while reading");
988 offset
= self
->pos
+ offset
;
990 } else if (where
== 2) {
991 if (self
->size
== -1) {
992 assert(self
->mode
!= MODE_READ_EOF
);
994 Py_BEGIN_ALLOW_THREADS
995 chunksize
= Util_UnivNewlineRead(
999 self
->pos
+= chunksize
;
1000 Py_END_ALLOW_THREADS
1002 bytesread
+= chunksize
;
1003 if (bzerror
== BZ_STREAM_END
) {
1005 } else if (bzerror
!= BZ_OK
) {
1006 Util_CatchBZ2Error(bzerror
);
1010 self
->mode
= MODE_READ_EOF
;
1011 self
->size
= self
->pos
;
1014 offset
= self
->size
+ offset
;
1015 if (offset
>= self
->pos
)
1016 offset
-= self
->pos
;
1022 } else if (where
== 0) {
1023 if (offset
>= self
->pos
)
1024 offset
-= self
->pos
;
1030 BZ2_bzReadClose(&bzerror
, self
->fp
);
1031 if (bzerror
!= BZ_OK
) {
1032 Util_CatchBZ2Error(bzerror
);
1035 ret
= PyObject_CallMethod(self
->file
, "seek", "(i)", 0);
1041 self
->fp
= BZ2_bzReadOpen(&bzerror
, PyFile_AsFile(self
->file
),
1043 if (bzerror
!= BZ_OK
) {
1044 Util_CatchBZ2Error(bzerror
);
1047 self
->mode
= MODE_READ
;
1048 } else if (self
->mode
== MODE_READ_EOF
) {
1055 /* Before getting here, offset must be set to the number of bytes
1056 * to walk forward. */
1058 if ((size_t)offset
-bytesread
> buffersize
)
1059 readsize
= buffersize
;
1061 readsize
= offset
-bytesread
;
1062 Py_BEGIN_ALLOW_THREADS
1063 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
1064 buffer
, readsize
, self
);
1065 self
->pos
+= chunksize
;
1066 Py_END_ALLOW_THREADS
1067 bytesread
+= chunksize
;
1068 if (bzerror
== BZ_STREAM_END
) {
1069 self
->size
= self
->pos
;
1070 self
->mode
= MODE_READ_EOF
;
1072 } else if (bzerror
!= BZ_OK
) {
1073 Util_CatchBZ2Error(bzerror
);
1076 if (bytesread
== offset
)
1089 PyDoc_STRVAR(BZ2File_tell__doc__
,
1092 Return the current file position, an integer (may be a long integer).\n\
1096 BZ2File_tell(BZ2FileObject
*self
, PyObject
*args
)
1098 PyObject
*ret
= NULL
;
1100 if (self
->mode
== MODE_CLOSED
) {
1101 PyErr_SetString(PyExc_ValueError
,
1102 "I/O operation on closed file");
1106 ret
= PyInt_FromLong(self
->pos
);
1112 PyDoc_STRVAR(BZ2File_close__doc__
,
1113 "close() -> None or (perhaps) an integer\n\
1115 Close the file. Sets data attribute .closed to true. A closed file\n\
1116 cannot be used for further I/O operations. close() may be called more\n\
1117 than once without error.\n\
1121 BZ2File_close(BZ2FileObject
*self
)
1123 PyObject
*ret
= NULL
;
1124 int bzerror
= BZ_OK
;
1127 switch (self
->mode
) {
1130 BZ2_bzReadClose(&bzerror
, self
->fp
);
1133 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1137 self
->mode
= MODE_CLOSED
;
1138 ret
= PyObject_CallMethod(self
->file
, "close", NULL
);
1139 if (bzerror
!= BZ_OK
) {
1140 Util_CatchBZ2Error(bzerror
);
1149 static PyObject
*BZ2File_getiter(BZ2FileObject
*self
);
1151 static PyMethodDef BZ2File_methods
[] = {
1152 {"read", (PyCFunction
)BZ2File_read
, METH_VARARGS
, BZ2File_read__doc__
},
1153 {"readline", (PyCFunction
)BZ2File_readline
, METH_VARARGS
, BZ2File_readline__doc__
},
1154 {"readlines", (PyCFunction
)BZ2File_readlines
, METH_VARARGS
, BZ2File_readlines__doc__
},
1155 {"xreadlines", (PyCFunction
)BZ2File_getiter
, METH_VARARGS
, BZ2File_xreadlines__doc__
},
1156 {"write", (PyCFunction
)BZ2File_write
, METH_VARARGS
, BZ2File_write__doc__
},
1157 {"writelines", (PyCFunction
)BZ2File_writelines
, METH_O
, BZ2File_writelines__doc__
},
1158 {"seek", (PyCFunction
)BZ2File_seek
, METH_VARARGS
, BZ2File_seek__doc__
},
1159 {"tell", (PyCFunction
)BZ2File_tell
, METH_NOARGS
, BZ2File_tell__doc__
},
1160 {"close", (PyCFunction
)BZ2File_close
, METH_NOARGS
, BZ2File_close__doc__
},
1161 {NULL
, NULL
} /* sentinel */
1165 /* ===================================================================== */
1166 /* Getters and setters of BZ2File. */
1168 #ifdef WITH_UNIVERSAL_NEWLINES
1169 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1171 BZ2File_get_newlines(BZ2FileObject
*self
, void *closure
)
1173 switch (self
->f_newlinetypes
) {
1174 case NEWLINE_UNKNOWN
:
1178 return PyString_FromString("\r");
1180 return PyString_FromString("\n");
1181 case NEWLINE_CR
|NEWLINE_LF
:
1182 return Py_BuildValue("(ss)", "\r", "\n");
1184 return PyString_FromString("\r\n");
1185 case NEWLINE_CR
|NEWLINE_CRLF
:
1186 return Py_BuildValue("(ss)", "\r", "\r\n");
1187 case NEWLINE_LF
|NEWLINE_CRLF
:
1188 return Py_BuildValue("(ss)", "\n", "\r\n");
1189 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1190 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1192 PyErr_Format(PyExc_SystemError
,
1193 "Unknown newlines value 0x%x\n",
1194 self
->f_newlinetypes
);
1201 BZ2File_get_closed(BZ2FileObject
*self
, void *closure
)
1203 return PyInt_FromLong(self
->mode
== MODE_CLOSED
);
1207 BZ2File_get_mode(BZ2FileObject
*self
, void *closure
)
1209 return PyObject_GetAttrString(self
->file
, "mode");
1213 BZ2File_get_name(BZ2FileObject
*self
, void *closure
)
1215 return PyObject_GetAttrString(self
->file
, "name");
1218 static PyGetSetDef BZ2File_getset
[] = {
1219 {"closed", (getter
)BZ2File_get_closed
, NULL
,
1220 "True if the file is closed"},
1221 #ifdef WITH_UNIVERSAL_NEWLINES
1222 {"newlines", (getter
)BZ2File_get_newlines
, NULL
,
1223 "end-of-line convention used in this file"},
1225 {"mode", (getter
)BZ2File_get_mode
, NULL
,
1226 "file mode ('r', 'w', or 'U')"},
1227 {"name", (getter
)BZ2File_get_name
, NULL
,
1229 {NULL
} /* Sentinel */
1233 /* ===================================================================== */
1234 /* Members of BZ2File_Type. */
1237 #define OFF(x) offsetof(BZ2FileObject, x)
1239 static PyMemberDef BZ2File_members
[] = {
1240 {"softspace", T_INT
, OFF(f_softspace
), 0,
1241 "flag indicating that a space needs to be printed; used by print"},
1242 {NULL
} /* Sentinel */
1245 /* ===================================================================== */
1246 /* Slot definitions for BZ2File_Type. */
1249 BZ2File_init(BZ2FileObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1251 static char *kwlist
[] = {"filename", "mode", "buffering",
1252 "compresslevel", 0};
1256 int compresslevel
= 9;
1262 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "O|sii:BZ2File",
1263 kwlist
, &name
, &mode
, &buffering
,
1267 if (compresslevel
< 1 || compresslevel
> 9) {
1268 PyErr_SetString(PyExc_ValueError
,
1269 "compresslevel must be between 1 and 9");
1287 self
->f_univ_newline
= 1;
1295 PyErr_Format(PyExc_ValueError
,
1296 "invalid mode char %c", *mode
);
1304 mode
= (mode_char
== 'r') ? "rb" : "wb";
1306 self
->file
= PyObject_CallFunction((PyObject
*)&PyFile_Type
, "(Osi)",
1307 name
, mode
, buffering
);
1308 if (self
->file
== NULL
)
1311 /* From now on, we have stuff to dealloc, so jump to error label
1312 * instead of returning */
1315 self
->lock
= PyThread_allocate_lock();
1320 if (mode_char
== 'r')
1321 self
->fp
= BZ2_bzReadOpen(&bzerror
,
1322 PyFile_AsFile(self
->file
),
1325 self
->fp
= BZ2_bzWriteOpen(&bzerror
,
1326 PyFile_AsFile(self
->file
),
1327 compresslevel
, 0, 0);
1329 if (bzerror
!= BZ_OK
) {
1330 Util_CatchBZ2Error(bzerror
);
1334 self
->mode
= (mode_char
== 'r') ? MODE_READ
: MODE_WRITE
;
1339 Py_DECREF(self
->file
);
1342 PyThread_free_lock(self
->lock
);
1348 BZ2File_dealloc(BZ2FileObject
*self
)
1353 PyThread_free_lock(self
->lock
);
1355 switch (self
->mode
) {
1358 BZ2_bzReadClose(&bzerror
, self
->fp
);
1361 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1365 Util_DropReadAhead(self
);
1366 Py_DECREF(self
->file
);
1367 self
->ob_type
->tp_free((PyObject
*)self
);
1370 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1372 BZ2File_getiter(BZ2FileObject
*self
)
1374 if (self
->mode
== MODE_CLOSED
) {
1375 PyErr_SetString(PyExc_ValueError
,
1376 "I/O operation on closed file");
1379 Py_INCREF((PyObject
*)self
);
1380 return (PyObject
*)self
;
1383 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1384 #define READAHEAD_BUFSIZE 8192
1386 BZ2File_iternext(BZ2FileObject
*self
)
1388 PyStringObject
* ret
;
1390 if (self
->mode
== MODE_CLOSED
) {
1391 PyErr_SetString(PyExc_ValueError
,
1392 "I/O operation on closed file");
1395 ret
= Util_ReadAheadGetLineSkip(self
, 0, READAHEAD_BUFSIZE
);
1397 if (ret
== NULL
|| PyString_GET_SIZE(ret
) == 0) {
1401 return (PyObject
*)ret
;
1404 /* ===================================================================== */
1405 /* BZ2File_Type definition. */
1407 PyDoc_VAR(BZ2File__doc__
) =
1409 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1411 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1412 writing. When opened for writing, the file will be created if it doesn't\n\
1413 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1414 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1415 is given, must be a number between 1 and 9.\n\
1417 #ifdef WITH_UNIVERSAL_NEWLINES
1420 Add a 'U' to mode to open the file for input with universal newline\n\
1421 support. Any line ending in the input file will be seen as a '\\n' in\n\
1422 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1423 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1424 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1425 newlines are available only when reading.\n\
1430 static PyTypeObject BZ2File_Type
= {
1431 PyObject_HEAD_INIT(NULL
)
1433 "bz2.BZ2File", /*tp_name*/
1434 sizeof(BZ2FileObject
), /*tp_basicsize*/
1436 (destructor
)BZ2File_dealloc
, /*tp_dealloc*/
1443 0, /*tp_as_sequence*/
1444 0, /*tp_as_mapping*/
1448 PyObject_GenericGetAttr
,/*tp_getattro*/
1449 PyObject_GenericSetAttr
,/*tp_setattro*/
1451 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1452 BZ2File__doc__
, /*tp_doc*/
1455 0, /*tp_richcompare*/
1456 0, /*tp_weaklistoffset*/
1457 (getiterfunc
)BZ2File_getiter
, /*tp_iter*/
1458 (iternextfunc
)BZ2File_iternext
, /*tp_iternext*/
1459 BZ2File_methods
, /*tp_methods*/
1460 BZ2File_members
, /*tp_members*/
1461 BZ2File_getset
, /*tp_getset*/
1466 0, /*tp_dictoffset*/
1467 (initproc
)BZ2File_init
, /*tp_init*/
1468 PyType_GenericAlloc
, /*tp_alloc*/
1469 PyType_GenericNew
, /*tp_new*/
1470 _PyObject_Del
, /*tp_free*/
1475 /* ===================================================================== */
1476 /* Methods of BZ2Comp. */
1478 PyDoc_STRVAR(BZ2Comp_compress__doc__
,
1479 "compress(data) -> string\n\
1481 Provide more data to the compressor object. It will return chunks of\n\
1482 compressed data whenever possible. When you've finished providing data\n\
1483 to compress, call the flush() method to finish the compression process,\n\
1484 and return what is left in the internal buffers.\n\
1488 BZ2Comp_compress(BZ2CompObject
*self
, PyObject
*args
)
1492 int bufsize
= SMALLCHUNK
;
1494 PyObject
*ret
= NULL
;
1495 bz_stream
*bzs
= &self
->bzs
;
1498 if (!PyArg_ParseTuple(args
, "s#", &data
, &datasize
))
1502 if (!self
->running
) {
1503 PyErr_SetString(PyExc_ValueError
,
1504 "this object was already flushed");
1508 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1512 bzs
->next_in
= data
;
1513 bzs
->avail_in
= datasize
;
1514 bzs
->next_out
= BUF(ret
);
1515 bzs
->avail_out
= bufsize
;
1517 totalout
= BZS_TOTAL_OUT(bzs
);
1520 Py_BEGIN_ALLOW_THREADS
1521 bzerror
= BZ2_bzCompress(bzs
, BZ_RUN
);
1522 Py_END_ALLOW_THREADS
1523 if (bzerror
!= BZ_RUN_OK
) {
1524 Util_CatchBZ2Error(bzerror
);
1527 if (bzs
->avail_out
== 0) {
1528 bufsize
= Util_NewBufferSize(bufsize
);
1529 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1530 BZ2_bzCompressEnd(bzs
);
1533 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1535 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1536 } else if (bzs
->avail_in
== 0) {
1541 _PyString_Resize(&ret
, (int)(BZS_TOTAL_OUT(bzs
) - totalout
));
1552 PyDoc_STRVAR(BZ2Comp_flush__doc__
,
1553 "flush() -> string\n\
1555 Finish the compression process and return what is left in internal buffers.\n\
1556 You must not use the compressor object after calling this method.\n\
1560 BZ2Comp_flush(BZ2CompObject
*self
)
1562 int bufsize
= SMALLCHUNK
;
1563 PyObject
*ret
= NULL
;
1564 bz_stream
*bzs
= &self
->bzs
;
1569 if (!self
->running
) {
1570 PyErr_SetString(PyExc_ValueError
, "object was already "
1576 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1580 bzs
->next_out
= BUF(ret
);
1581 bzs
->avail_out
= bufsize
;
1583 totalout
= BZS_TOTAL_OUT(bzs
);
1586 Py_BEGIN_ALLOW_THREADS
1587 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
1588 Py_END_ALLOW_THREADS
1589 if (bzerror
== BZ_STREAM_END
) {
1591 } else if (bzerror
!= BZ_FINISH_OK
) {
1592 Util_CatchBZ2Error(bzerror
);
1595 if (bzs
->avail_out
== 0) {
1596 bufsize
= Util_NewBufferSize(bufsize
);
1597 if (_PyString_Resize(&ret
, bufsize
) < 0)
1599 bzs
->next_out
= BUF(ret
);
1600 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1602 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1606 if (bzs
->avail_out
!= 0)
1607 _PyString_Resize(&ret
, (int)(BZS_TOTAL_OUT(bzs
) - totalout
));
1618 static PyMethodDef BZ2Comp_methods
[] = {
1619 {"compress", (PyCFunction
)BZ2Comp_compress
, METH_VARARGS
,
1620 BZ2Comp_compress__doc__
},
1621 {"flush", (PyCFunction
)BZ2Comp_flush
, METH_NOARGS
,
1622 BZ2Comp_flush__doc__
},
1623 {NULL
, NULL
} /* sentinel */
1627 /* ===================================================================== */
1628 /* Slot definitions for BZ2Comp_Type. */
1631 BZ2Comp_init(BZ2CompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1633 int compresslevel
= 9;
1635 static char *kwlist
[] = {"compresslevel", 0};
1637 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|i:BZ2Compressor",
1638 kwlist
, &compresslevel
))
1641 if (compresslevel
< 1 || compresslevel
> 9) {
1642 PyErr_SetString(PyExc_ValueError
,
1643 "compresslevel must be between 1 and 9");
1648 self
->lock
= PyThread_allocate_lock();
1653 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1654 bzerror
= BZ2_bzCompressInit(&self
->bzs
, compresslevel
, 0, 0);
1655 if (bzerror
!= BZ_OK
) {
1656 Util_CatchBZ2Error(bzerror
);
1666 PyThread_free_lock(self
->lock
);
1672 BZ2Comp_dealloc(BZ2CompObject
*self
)
1676 PyThread_free_lock(self
->lock
);
1678 BZ2_bzCompressEnd(&self
->bzs
);
1679 self
->ob_type
->tp_free((PyObject
*)self
);
1683 /* ===================================================================== */
1684 /* BZ2Comp_Type definition. */
1686 PyDoc_STRVAR(BZ2Comp__doc__
,
1687 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1689 Create a new compressor object. This object may be used to compress\n\
1690 data sequentially. If you want to compress data in one shot, use the\n\
1691 compress() function instead. The compresslevel parameter, if given,\n\
1692 must be a number between 1 and 9.\n\
1695 static PyTypeObject BZ2Comp_Type
= {
1696 PyObject_HEAD_INIT(NULL
)
1698 "bz2.BZ2Compressor", /*tp_name*/
1699 sizeof(BZ2CompObject
), /*tp_basicsize*/
1701 (destructor
)BZ2Comp_dealloc
, /*tp_dealloc*/
1708 0, /*tp_as_sequence*/
1709 0, /*tp_as_mapping*/
1713 PyObject_GenericGetAttr
,/*tp_getattro*/
1714 PyObject_GenericSetAttr
,/*tp_setattro*/
1716 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1717 BZ2Comp__doc__
, /*tp_doc*/
1720 0, /*tp_richcompare*/
1721 0, /*tp_weaklistoffset*/
1724 BZ2Comp_methods
, /*tp_methods*/
1731 0, /*tp_dictoffset*/
1732 (initproc
)BZ2Comp_init
, /*tp_init*/
1733 PyType_GenericAlloc
, /*tp_alloc*/
1734 PyType_GenericNew
, /*tp_new*/
1735 _PyObject_Del
, /*tp_free*/
1740 /* ===================================================================== */
1741 /* Members of BZ2Decomp. */
1744 #define OFF(x) offsetof(BZ2DecompObject, x)
1746 static PyMemberDef BZ2Decomp_members
[] = {
1747 {"unused_data", T_OBJECT
, OFF(unused_data
), RO
},
1748 {NULL
} /* Sentinel */
1752 /* ===================================================================== */
1753 /* Methods of BZ2Decomp. */
1755 PyDoc_STRVAR(BZ2Decomp_decompress__doc__
,
1756 "decompress(data) -> string\n\
1758 Provide more data to the decompressor object. It will return chunks\n\
1759 of decompressed data whenever possible. If you try to decompress data\n\
1760 after the end of stream is found, EOFError will be raised. If any data\n\
1761 was found after the end of stream, it'll be ignored and saved in\n\
1762 unused_data attribute.\n\
1766 BZ2Decomp_decompress(BZ2DecompObject
*self
, PyObject
*args
)
1770 int bufsize
= SMALLCHUNK
;
1772 PyObject
*ret
= NULL
;
1773 bz_stream
*bzs
= &self
->bzs
;
1776 if (!PyArg_ParseTuple(args
, "s#", &data
, &datasize
))
1780 if (!self
->running
) {
1781 PyErr_SetString(PyExc_EOFError
, "end of stream was "
1786 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1790 bzs
->next_in
= data
;
1791 bzs
->avail_in
= datasize
;
1792 bzs
->next_out
= BUF(ret
);
1793 bzs
->avail_out
= bufsize
;
1795 totalout
= BZS_TOTAL_OUT(bzs
);
1798 Py_BEGIN_ALLOW_THREADS
1799 bzerror
= BZ2_bzDecompress(bzs
);
1800 Py_END_ALLOW_THREADS
1801 if (bzerror
== BZ_STREAM_END
) {
1802 if (bzs
->avail_in
!= 0) {
1803 Py_DECREF(self
->unused_data
);
1805 PyString_FromStringAndSize(bzs
->next_in
,
1811 if (bzerror
!= BZ_OK
) {
1812 Util_CatchBZ2Error(bzerror
);
1815 if (bzs
->avail_out
== 0) {
1816 bufsize
= Util_NewBufferSize(bufsize
);
1817 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1818 BZ2_bzDecompressEnd(bzs
);
1821 bzs
->next_out
= BUF(ret
);
1822 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1824 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1825 } else if (bzs
->avail_in
== 0) {
1830 if (bzs
->avail_out
!= 0)
1831 _PyString_Resize(&ret
, (int)(BZS_TOTAL_OUT(bzs
) - totalout
));
1842 static PyMethodDef BZ2Decomp_methods
[] = {
1843 {"decompress", (PyCFunction
)BZ2Decomp_decompress
, METH_VARARGS
, BZ2Decomp_decompress__doc__
},
1844 {NULL
, NULL
} /* sentinel */
1848 /* ===================================================================== */
1849 /* Slot definitions for BZ2Decomp_Type. */
1852 BZ2Decomp_init(BZ2DecompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1856 if (!PyArg_ParseTuple(args
, ":BZ2Decompressor"))
1860 self
->lock
= PyThread_allocate_lock();
1865 self
->unused_data
= PyString_FromString("");
1866 if (!self
->unused_data
)
1869 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1870 bzerror
= BZ2_bzDecompressInit(&self
->bzs
, 0, 0);
1871 if (bzerror
!= BZ_OK
) {
1872 Util_CatchBZ2Error(bzerror
);
1883 PyThread_free_lock(self
->lock
);
1885 Py_XDECREF(self
->unused_data
);
1890 BZ2Decomp_dealloc(BZ2DecompObject
*self
)
1894 PyThread_free_lock(self
->lock
);
1896 Py_XDECREF(self
->unused_data
);
1897 BZ2_bzDecompressEnd(&self
->bzs
);
1898 self
->ob_type
->tp_free((PyObject
*)self
);
1902 /* ===================================================================== */
1903 /* BZ2Decomp_Type definition. */
1905 PyDoc_STRVAR(BZ2Decomp__doc__
,
1906 "BZ2Decompressor() -> decompressor object\n\
1908 Create a new decompressor object. This object may be used to decompress\n\
1909 data sequentially. If you want to decompress data in one shot, use the\n\
1910 decompress() function instead.\n\
1913 static PyTypeObject BZ2Decomp_Type
= {
1914 PyObject_HEAD_INIT(NULL
)
1916 "bz2.BZ2Decompressor", /*tp_name*/
1917 sizeof(BZ2DecompObject
), /*tp_basicsize*/
1919 (destructor
)BZ2Decomp_dealloc
, /*tp_dealloc*/
1926 0, /*tp_as_sequence*/
1927 0, /*tp_as_mapping*/
1931 PyObject_GenericGetAttr
,/*tp_getattro*/
1932 PyObject_GenericSetAttr
,/*tp_setattro*/
1934 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1935 BZ2Decomp__doc__
, /*tp_doc*/
1938 0, /*tp_richcompare*/
1939 0, /*tp_weaklistoffset*/
1942 BZ2Decomp_methods
, /*tp_methods*/
1943 BZ2Decomp_members
, /*tp_members*/
1949 0, /*tp_dictoffset*/
1950 (initproc
)BZ2Decomp_init
, /*tp_init*/
1951 PyType_GenericAlloc
, /*tp_alloc*/
1952 PyType_GenericNew
, /*tp_new*/
1953 _PyObject_Del
, /*tp_free*/
1958 /* ===================================================================== */
1959 /* Module functions. */
1961 PyDoc_STRVAR(bz2_compress__doc__
,
1962 "compress(data [, compresslevel=9]) -> string\n\
1964 Compress data in one shot. If you want to compress data sequentially,\n\
1965 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1966 given, must be a number between 1 and 9.\n\
1970 bz2_compress(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1972 int compresslevel
=9;
1976 PyObject
*ret
= NULL
;
1978 bz_stream
*bzs
= &_bzs
;
1980 static char *kwlist
[] = {"data", "compresslevel", 0};
1982 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "s#|i",
1983 kwlist
, &data
, &datasize
,
1987 if (compresslevel
< 1 || compresslevel
> 9) {
1988 PyErr_SetString(PyExc_ValueError
,
1989 "compresslevel must be between 1 and 9");
1993 /* Conforming to bz2 manual, this is large enough to fit compressed
1994 * data in one shot. We will check it later anyway. */
1995 bufsize
= datasize
+ (datasize
/100+1) + 600;
1997 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2001 memset(bzs
, 0, sizeof(bz_stream
));
2003 bzs
->next_in
= data
;
2004 bzs
->avail_in
= datasize
;
2005 bzs
->next_out
= BUF(ret
);
2006 bzs
->avail_out
= bufsize
;
2008 bzerror
= BZ2_bzCompressInit(bzs
, compresslevel
, 0, 0);
2009 if (bzerror
!= BZ_OK
) {
2010 Util_CatchBZ2Error(bzerror
);
2016 Py_BEGIN_ALLOW_THREADS
2017 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
2018 Py_END_ALLOW_THREADS
2019 if (bzerror
== BZ_STREAM_END
) {
2021 } else if (bzerror
!= BZ_FINISH_OK
) {
2022 BZ2_bzCompressEnd(bzs
);
2023 Util_CatchBZ2Error(bzerror
);
2027 if (bzs
->avail_out
== 0) {
2028 bufsize
= Util_NewBufferSize(bufsize
);
2029 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2030 BZ2_bzCompressEnd(bzs
);
2034 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2035 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2039 if (bzs
->avail_out
!= 0)
2040 _PyString_Resize(&ret
, (int)BZS_TOTAL_OUT(bzs
));
2041 BZ2_bzCompressEnd(bzs
);
2046 PyDoc_STRVAR(bz2_decompress__doc__
,
2047 "decompress(data) -> decompressed data\n\
2049 Decompress data in one shot. If you want to decompress data sequentially,\n\
2050 use an instance of BZ2Decompressor instead.\n\
2054 bz2_decompress(PyObject
*self
, PyObject
*args
)
2058 int bufsize
= SMALLCHUNK
;
2061 bz_stream
*bzs
= &_bzs
;
2064 if (!PyArg_ParseTuple(args
, "s#", &data
, &datasize
))
2068 return PyString_FromString("");
2070 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2074 memset(bzs
, 0, sizeof(bz_stream
));
2076 bzs
->next_in
= data
;
2077 bzs
->avail_in
= datasize
;
2078 bzs
->next_out
= BUF(ret
);
2079 bzs
->avail_out
= bufsize
;
2081 bzerror
= BZ2_bzDecompressInit(bzs
, 0, 0);
2082 if (bzerror
!= BZ_OK
) {
2083 Util_CatchBZ2Error(bzerror
);
2089 Py_BEGIN_ALLOW_THREADS
2090 bzerror
= BZ2_bzDecompress(bzs
);
2091 Py_END_ALLOW_THREADS
2092 if (bzerror
== BZ_STREAM_END
) {
2094 } else if (bzerror
!= BZ_OK
) {
2095 BZ2_bzDecompressEnd(bzs
);
2096 Util_CatchBZ2Error(bzerror
);
2100 if (bzs
->avail_out
== 0) {
2101 bufsize
= Util_NewBufferSize(bufsize
);
2102 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2103 BZ2_bzDecompressEnd(bzs
);
2107 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2108 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2109 } else if (bzs
->avail_in
== 0) {
2110 BZ2_bzDecompressEnd(bzs
);
2111 PyErr_SetString(PyExc_ValueError
,
2112 "couldn't find end of stream");
2118 if (bzs
->avail_out
!= 0)
2119 _PyString_Resize(&ret
, (int)BZS_TOTAL_OUT(bzs
));
2120 BZ2_bzDecompressEnd(bzs
);
2125 static PyMethodDef bz2_methods
[] = {
2126 {"compress", (PyCFunction
) bz2_compress
, METH_VARARGS
|METH_KEYWORDS
,
2127 bz2_compress__doc__
},
2128 {"decompress", (PyCFunction
) bz2_decompress
, METH_VARARGS
,
2129 bz2_decompress__doc__
},
2130 {NULL
, NULL
} /* sentinel */
2133 /* ===================================================================== */
2134 /* Initialization function. */
2136 PyDoc_STRVAR(bz2__doc__
,
2137 "The python bz2 module provides a comprehensive interface for\n\
2138 the bz2 compression library. It implements a complete file\n\
2139 interface, one shot (de)compression functions, and types for\n\
2140 sequential (de)compression.\n\
2148 BZ2File_Type
.ob_type
= &PyType_Type
;
2149 BZ2Comp_Type
.ob_type
= &PyType_Type
;
2150 BZ2Decomp_Type
.ob_type
= &PyType_Type
;
2152 m
= Py_InitModule3("bz2", bz2_methods
, bz2__doc__
);
2154 PyModule_AddObject(m
, "__author__", PyString_FromString(__author__
));
2156 Py_INCREF(&BZ2File_Type
);
2157 PyModule_AddObject(m
, "BZ2File", (PyObject
*)&BZ2File_Type
);
2159 Py_INCREF(&BZ2Comp_Type
);
2160 PyModule_AddObject(m
, "BZ2Compressor", (PyObject
*)&BZ2Comp_Type
);
2162 Py_INCREF(&BZ2Decomp_Type
);
2163 PyModule_AddObject(m
, "BZ2Decompressor", (PyObject
*)&BZ2Decomp_Type
);