3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
13 #include "structmember.h"
19 static char __author__
[] =
20 "The bz2 python module was written by:\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
29 #define MODE_READ_EOF 2
32 #define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
35 #ifdef BZ_CONFIG_ERROR
38 #define BZS_TOTAL_OUT(bzs) \
39 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40 #elif SIZEOF_LONG_LONG >= 8
41 #define BZS_TOTAL_OUT(bzs) \
42 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
44 #define BZS_TOTAL_OUT(bzs) \
48 #else /* ! BZ_CONFIG_ERROR */
50 #define BZ2_bzRead bzRead
51 #define BZ2_bzReadOpen bzReadOpen
52 #define BZ2_bzReadClose bzReadClose
53 #define BZ2_bzWrite bzWrite
54 #define BZ2_bzWriteOpen bzWriteOpen
55 #define BZ2_bzWriteClose bzWriteClose
56 #define BZ2_bzCompress bzCompress
57 #define BZ2_bzCompressInit bzCompressInit
58 #define BZ2_bzCompressEnd bzCompressEnd
59 #define BZ2_bzDecompress bzDecompress
60 #define BZ2_bzDecompressInit bzDecompressInit
61 #define BZ2_bzDecompressEnd bzDecompressEnd
63 #define BZS_TOTAL_OUT(bzs) bzs->total_out
65 #endif /* ! BZ_CONFIG_ERROR */
69 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
70 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
72 #define ACQUIRE_LOCK(obj)
73 #define RELEASE_LOCK(obj)
76 #ifdef WITH_UNIVERSAL_NEWLINES
77 /* Bits in f_newlinetypes */
78 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
79 #define NEWLINE_CR 1 /* \r newline seen */
80 #define NEWLINE_LF 2 /* \n newline seen */
81 #define NEWLINE_CRLF 4 /* \r\n newline seen */
84 /* ===================================================================== */
85 /* Structure definitions. */
91 char* f_buf
; /* Allocated readahead buffer */
92 char* f_bufend
; /* Points after last occupied position */
93 char* f_bufptr
; /* Current buffer position */
95 int f_softspace
; /* Flag used by 'print' command */
97 #ifdef WITH_UNIVERSAL_NEWLINES
98 int f_univ_newline
; /* Handle any newline convention */
99 int f_newlinetypes
; /* Types of newlines seen */
100 int f_skipnextlf
; /* Skip next \n */
108 PyThread_type_lock lock
;
117 PyThread_type_lock lock
;
125 PyObject
*unused_data
;
127 PyThread_type_lock lock
;
131 /* ===================================================================== */
132 /* Utility functions. */
135 Util_CatchBZ2Error(int bzerror
)
143 #ifdef BZ_CONFIG_ERROR
144 case BZ_CONFIG_ERROR
:
145 PyErr_SetString(PyExc_SystemError
,
146 "the bz2 library was not compiled "
153 PyErr_SetString(PyExc_ValueError
,
154 "the bz2 library has received wrong "
165 case BZ_DATA_ERROR_MAGIC
:
166 PyErr_SetString(PyExc_IOError
, "invalid data stream");
171 PyErr_SetString(PyExc_IOError
, "unknown IO error");
175 case BZ_UNEXPECTED_EOF
:
176 PyErr_SetString(PyExc_EOFError
,
177 "compressed file ended before the "
178 "logical end-of-stream was detected");
182 case BZ_SEQUENCE_ERROR
:
183 PyErr_SetString(PyExc_RuntimeError
,
184 "wrong sequence of bz2 library "
193 #define SMALLCHUNK 8192
195 #define SMALLCHUNK BUFSIZ
199 #define BIGCHUNK (512 * 32)
201 #define BIGCHUNK (512 * 1024)
204 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
206 Util_NewBufferSize(size_t currentsize
)
208 if (currentsize
> SMALLCHUNK
) {
209 /* Keep doubling until we reach BIGCHUNK;
210 then keep adding BIGCHUNK. */
211 if (currentsize
<= BIGCHUNK
)
212 return currentsize
+ currentsize
;
214 return currentsize
+ BIGCHUNK
;
216 return currentsize
+ SMALLCHUNK
;
219 /* This is a hacked version of Python's fileobject.c:get_line(). */
221 Util_GetLine(BZ2FileObject
*f
, int n
)
225 size_t total_v_size
; /* total # of slots in buffer */
226 size_t used_v_size
; /* # used slots in buffer */
227 size_t increment
; /* amount to increment the buffer */
230 #ifdef WITH_UNIVERSAL_NEWLINES
231 int newlinetypes
= f
->f_newlinetypes
;
232 int skipnextlf
= f
->f_skipnextlf
;
233 int univ_newline
= f
->f_univ_newline
;
236 total_v_size
= n
> 0 ? n
: 100;
237 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
242 end
= buf
+ total_v_size
;
245 Py_BEGIN_ALLOW_THREADS
246 #ifdef WITH_UNIVERSAL_NEWLINES
249 BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
251 if (bzerror
!= BZ_OK
|| buf
== end
)
256 /* Seeing a \n here with
257 * skipnextlf true means we
260 newlinetypes
|= NEWLINE_CRLF
;
261 BZ2_bzRead(&bzerror
, f
->fp
,
263 if (bzerror
!= BZ_OK
)
266 newlinetypes
|= NEWLINE_CR
;
272 } else if ( c
== '\n')
273 newlinetypes
|= NEWLINE_LF
;
275 if (c
== '\n') break;
277 if (bzerror
== BZ_STREAM_END
&& skipnextlf
)
278 newlinetypes
|= NEWLINE_CR
;
279 } else /* If not universal newlines use the normal loop */
282 BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
285 } while (bzerror
== BZ_OK
&& c
!= '\n' && buf
!= end
);
287 #ifdef WITH_UNIVERSAL_NEWLINES
288 f
->f_newlinetypes
= newlinetypes
;
289 f
->f_skipnextlf
= skipnextlf
;
291 if (bzerror
== BZ_STREAM_END
) {
293 f
->mode
= MODE_READ_EOF
;
295 } else if (bzerror
!= BZ_OK
) {
296 Util_CatchBZ2Error(bzerror
);
302 /* Must be because buf == end */
305 used_v_size
= total_v_size
;
306 increment
= total_v_size
>> 2; /* mild exponential growth */
307 total_v_size
+= increment
;
308 if (total_v_size
> INT_MAX
) {
309 PyErr_SetString(PyExc_OverflowError
,
310 "line is longer than a Python string can hold");
314 if (_PyString_Resize(&v
, total_v_size
) < 0)
316 buf
= BUF(v
) + used_v_size
;
317 end
= BUF(v
) + total_v_size
;
320 used_v_size
= buf
- BUF(v
);
321 if (used_v_size
!= total_v_size
)
322 _PyString_Resize(&v
, used_v_size
);
326 #ifndef WITH_UNIVERSAL_NEWLINES
327 #define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
329 /* This is a hacked version of Python's
330 * fileobject.c:Py_UniversalNewlineFread(). */
332 Util_UnivNewlineRead(int *bzerror
, BZFILE
*stream
,
333 char* buf
, size_t n
, BZ2FileObject
*f
)
336 int newlinetypes
, skipnextlf
;
339 assert(stream
!= NULL
);
341 if (!f
->f_univ_newline
)
342 return BZ2_bzRead(bzerror
, stream
, buf
, n
);
344 newlinetypes
= f
->f_newlinetypes
;
345 skipnextlf
= f
->f_skipnextlf
;
347 /* Invariant: n is the number of bytes remaining to be filled
355 nread
= BZ2_bzRead(bzerror
, stream
, dst
, n
);
357 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
358 shortread
= n
!= 0; /* true iff EOF or error */
362 /* Save as LF and set flag to skip next LF. */
366 else if (skipnextlf
&& c
== '\n') {
367 /* Skip LF, and remember we saw CR LF. */
369 newlinetypes
|= NEWLINE_CRLF
;
373 /* Normal char to be stored in buffer. Also
374 * update the newlinetypes flag if either this
375 * is an LF or the previous char was a CR.
378 newlinetypes
|= NEWLINE_LF
;
380 newlinetypes
|= NEWLINE_CR
;
386 /* If this is EOF, update type flags. */
387 if (skipnextlf
&& *bzerror
== BZ_STREAM_END
)
388 newlinetypes
|= NEWLINE_CR
;
392 f
->f_newlinetypes
= newlinetypes
;
393 f
->f_skipnextlf
= skipnextlf
;
398 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
400 Util_DropReadAhead(BZ2FileObject
*f
)
402 if (f
->f_buf
!= NULL
) {
403 PyMem_Free(f
->f_buf
);
408 /* This is a hacked version of Python's fileobject.c:readahead(). */
410 Util_ReadAhead(BZ2FileObject
*f
, int bufsize
)
415 if (f
->f_buf
!= NULL
) {
416 if((f
->f_bufend
- f
->f_bufptr
) >= 1)
419 Util_DropReadAhead(f
);
421 if (f
->mode
== MODE_READ_EOF
) {
424 if ((f
->f_buf
= PyMem_Malloc(bufsize
)) == NULL
) {
427 Py_BEGIN_ALLOW_THREADS
428 chunksize
= Util_UnivNewlineRead(&bzerror
, f
->fp
, f
->f_buf
,
432 if (bzerror
== BZ_STREAM_END
) {
434 f
->mode
= MODE_READ_EOF
;
435 } else if (bzerror
!= BZ_OK
) {
436 Util_CatchBZ2Error(bzerror
);
437 Util_DropReadAhead(f
);
440 f
->f_bufptr
= f
->f_buf
;
441 f
->f_bufend
= f
->f_buf
+ chunksize
;
445 /* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447 static PyStringObject
*
448 Util_ReadAheadGetLineSkip(BZ2FileObject
*f
, int skip
, int bufsize
)
455 if (f
->f_buf
== NULL
)
456 if (Util_ReadAhead(f
, bufsize
) < 0)
459 len
= f
->f_bufend
- f
->f_bufptr
;
461 return (PyStringObject
*)
462 PyString_FromStringAndSize(NULL
, skip
);
463 bufptr
= memchr(f
->f_bufptr
, '\n', len
);
464 if (bufptr
!= NULL
) {
465 bufptr
++; /* Count the '\n' */
466 len
= bufptr
- f
->f_bufptr
;
467 s
= (PyStringObject
*)
468 PyString_FromStringAndSize(NULL
, skip
+len
);
471 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
472 f
->f_bufptr
= bufptr
;
473 if (bufptr
== f
->f_bufend
)
474 Util_DropReadAhead(f
);
476 bufptr
= f
->f_bufptr
;
478 f
->f_buf
= NULL
; /* Force new readahead buffer */
479 s
= Util_ReadAheadGetLineSkip(f
, skip
+len
,
480 bufsize
+ (bufsize
>>2));
485 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
491 /* ===================================================================== */
492 /* Methods of BZ2File. */
494 PyDoc_STRVAR(BZ2File_read__doc__
,
495 "read([size]) -> string\n\
497 Read at most size uncompressed bytes, returned as a string. If the size\n\
498 argument is negative or omitted, read until EOF is reached.\n\
501 /* This is a hacked version of Python's fileobject.c:file_read(). */
503 BZ2File_read(BZ2FileObject
*self
, PyObject
*args
)
505 long bytesrequested
= -1;
506 size_t bytesread
, buffersize
, chunksize
;
508 PyObject
*ret
= NULL
;
510 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
514 switch (self
->mode
) {
518 ret
= PyString_FromString("");
521 PyErr_SetString(PyExc_ValueError
,
522 "I/O operation on closed file");
525 PyErr_SetString(PyExc_IOError
,
526 "file is not ready for reading");
530 if (bytesrequested
< 0)
531 buffersize
= Util_NewBufferSize((size_t)0);
533 buffersize
= bytesrequested
;
534 if (buffersize
> INT_MAX
) {
535 PyErr_SetString(PyExc_OverflowError
,
536 "requested number of bytes is "
537 "more than a Python string can hold");
540 ret
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
546 Py_BEGIN_ALLOW_THREADS
547 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
549 buffersize
-bytesread
,
551 self
->pos
+= chunksize
;
553 bytesread
+= chunksize
;
554 if (bzerror
== BZ_STREAM_END
) {
555 self
->size
= self
->pos
;
556 self
->mode
= MODE_READ_EOF
;
558 } else if (bzerror
!= BZ_OK
) {
559 Util_CatchBZ2Error(bzerror
);
564 if (bytesrequested
< 0) {
565 buffersize
= Util_NewBufferSize(buffersize
);
566 if (_PyString_Resize(&ret
, buffersize
) < 0)
572 if (bytesread
!= buffersize
)
573 _PyString_Resize(&ret
, bytesread
);
580 PyDoc_STRVAR(BZ2File_readline__doc__
,
581 "readline([size]) -> string\n\
583 Return the next line from the file, as a string, retaining newline.\n\
584 A non-negative size argument will limit the maximum number of bytes to\n\
585 return (an incomplete line may be returned then). Return an empty\n\
590 BZ2File_readline(BZ2FileObject
*self
, PyObject
*args
)
592 PyObject
*ret
= NULL
;
595 if (!PyArg_ParseTuple(args
, "|i:readline", &sizehint
))
599 switch (self
->mode
) {
603 ret
= PyString_FromString("");
606 PyErr_SetString(PyExc_ValueError
,
607 "I/O operation on closed file");
610 PyErr_SetString(PyExc_IOError
,
611 "file is not ready for reading");
616 ret
= PyString_FromString("");
618 ret
= Util_GetLine(self
, (sizehint
< 0) ? 0 : sizehint
);
625 PyDoc_STRVAR(BZ2File_readlines__doc__
,
626 "readlines([size]) -> list\n\
628 Call readline() repeatedly and return a list of lines read.\n\
629 The optional size argument, if given, is an approximate bound on the\n\
630 total number of bytes in the lines returned.\n\
633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
635 BZ2File_readlines(BZ2FileObject
*self
, PyObject
*args
)
638 PyObject
*list
= NULL
;
640 char small_buffer
[SMALLCHUNK
];
641 char *buffer
= small_buffer
;
642 size_t buffersize
= SMALLCHUNK
;
643 PyObject
*big_buffer
= NULL
;
646 size_t totalread
= 0;
652 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
656 switch (self
->mode
) {
660 list
= PyList_New(0);
663 PyErr_SetString(PyExc_ValueError
,
664 "I/O operation on closed file");
667 PyErr_SetString(PyExc_IOError
,
668 "file is not ready for reading");
672 if ((list
= PyList_New(0)) == NULL
)
676 Py_BEGIN_ALLOW_THREADS
677 nread
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
679 buffersize
-nfilled
, self
);
682 if (bzerror
== BZ_STREAM_END
) {
683 self
->size
= self
->pos
;
684 self
->mode
= MODE_READ_EOF
;
690 } else if (bzerror
!= BZ_OK
) {
691 Util_CatchBZ2Error(bzerror
);
698 p
= memchr(buffer
+nfilled
, '\n', nread
);
700 /* Need a larger buffer to fit this line */
703 if (buffersize
> INT_MAX
) {
704 PyErr_SetString(PyExc_OverflowError
,
705 "line is longer than a Python string can hold");
708 if (big_buffer
== NULL
) {
709 /* Create the big buffer */
710 big_buffer
= PyString_FromStringAndSize(
712 if (big_buffer
== NULL
)
714 buffer
= PyString_AS_STRING(big_buffer
);
715 memcpy(buffer
, small_buffer
, nfilled
);
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer
, buffersize
);
720 buffer
= PyString_AS_STRING(big_buffer
);
724 end
= buffer
+nfilled
+nread
;
727 /* Process complete lines */
729 line
= PyString_FromStringAndSize(q
, p
-q
);
732 err
= PyList_Append(list
, line
);
737 p
= memchr(q
, '\n', end
-q
);
739 /* Move the remaining incomplete line to the start */
741 memmove(buffer
, q
, nfilled
);
743 if (totalread
>= (size_t)sizehint
)
751 /* Partial last line */
752 line
= PyString_FromStringAndSize(buffer
, nfilled
);
756 /* Need to complete the last line */
757 PyObject
*rest
= Util_GetLine(self
, 0);
762 PyString_Concat(&line
, rest
);
767 err
= PyList_Append(list
, line
);
776 Py_DECREF(big_buffer
);
781 PyDoc_STRVAR(BZ2File_xreadlines__doc__
,
782 "xreadlines() -> self\n\
784 For backward compatibility. BZ2File objects now include the performance\n\
785 optimizations previously implemented in the xreadlines module.\n\
788 PyDoc_STRVAR(BZ2File_write__doc__
,
789 "write(data) -> None\n\
791 Write the 'data' string to file. Note that due to buffering, close() may\n\
792 be needed before the file on disk reflects the data written.\n\
795 /* This is a hacked version of Python's fileobject.c:file_write(). */
797 BZ2File_write(BZ2FileObject
*self
, PyObject
*args
)
799 PyObject
*ret
= NULL
;
804 if (!PyArg_ParseTuple(args
, "s#", &buf
, &len
))
808 switch (self
->mode
) {
813 PyErr_SetString(PyExc_ValueError
,
814 "I/O operation on closed file");
818 PyErr_SetString(PyExc_IOError
,
819 "file is not ready for writing");
823 self
->f_softspace
= 0;
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror
, self
->fp
, buf
, len
);
830 if (bzerror
!= BZ_OK
) {
831 Util_CatchBZ2Error(bzerror
);
843 PyDoc_STRVAR(BZ2File_writelines__doc__
,
844 "writelines(sequence_of_strings) -> None\n\
846 Write the sequence of strings to the file. Note that newlines are not\n\
847 added. The sequence can be any iterable object producing strings. This is\n\
848 equivalent to calling write() for each string.\n\
851 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
853 BZ2File_writelines(BZ2FileObject
*self
, PyObject
*seq
)
855 #define CHUNKSIZE 1000
856 PyObject
*list
= NULL
;
857 PyObject
*iter
= NULL
;
858 PyObject
*ret
= NULL
;
860 int i
, j
, index
, len
, islist
;
864 islist
= PyList_Check(seq
);
866 iter
= PyObject_GetIter(seq
);
868 PyErr_SetString(PyExc_TypeError
,
869 "writelines() requires an iterable argument");
872 list
= PyList_New(CHUNKSIZE
);
877 /* Strategy: slurp CHUNKSIZE lines into a private list,
878 checking that they are all strings, then write that list
879 without holding the interpreter lock, then come back for more. */
880 for (index
= 0; ; index
+= CHUNKSIZE
) {
883 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
886 j
= PyList_GET_SIZE(list
);
889 for (j
= 0; j
< CHUNKSIZE
; j
++) {
890 line
= PyIter_Next(iter
);
892 if (PyErr_Occurred())
896 PyList_SetItem(list
, j
, line
);
902 /* Check that all entries are indeed strings. If not,
903 apply the same rules as for file.write() and
904 convert the rets to strings. This is slow, but
905 seems to be the only way since all conversion APIs
906 could potentially execute Python code. */
907 for (i
= 0; i
< j
; i
++) {
908 PyObject
*v
= PyList_GET_ITEM(list
, i
);
909 if (!PyString_Check(v
)) {
912 if (PyObject_AsCharBuffer(v
, &buffer
, &len
)) {
913 PyErr_SetString(PyExc_TypeError
,
920 line
= PyString_FromStringAndSize(buffer
,
925 PyList_SET_ITEM(list
, i
, line
);
929 self
->f_softspace
= 0;
931 /* Since we are releasing the global lock, the
932 following code may *not* execute Python code. */
933 Py_BEGIN_ALLOW_THREADS
934 for (i
= 0; i
< j
; i
++) {
935 line
= PyList_GET_ITEM(list
, i
);
936 len
= PyString_GET_SIZE(line
);
937 BZ2_bzWrite (&bzerror
, self
->fp
,
938 PyString_AS_STRING(line
), len
);
939 if (bzerror
!= BZ_OK
) {
941 Util_CatchBZ2Error(bzerror
);
962 PyDoc_STRVAR(BZ2File_seek__doc__
,
963 "seek(offset [, whence]) -> None\n\
965 Move to new file position. Argument offset is a byte count. Optional\n\
966 argument whence defaults to 0 (offset from start of file, offset\n\
967 should be >= 0); other values are 1 (move relative to current position,\n\
968 positive or negative), and 2 (move relative to end of file, usually\n\
969 negative, although many platforms allow seeking beyond the end of a file).\n\
971 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
972 the operation may be extremely slow.\n\
976 BZ2File_seek(BZ2FileObject
*self
, PyObject
*args
)
980 char small_buffer
[SMALLCHUNK
];
981 char *buffer
= small_buffer
;
982 size_t buffersize
= SMALLCHUNK
;
988 PyObject
*ret
= NULL
;
990 if (!PyArg_ParseTuple(args
, "l|i:seek", &offset
, &where
))
994 Util_DropReadAhead(self
);
995 switch (self
->mode
) {
1001 PyErr_SetString(PyExc_ValueError
,
1002 "I/O operation on closed file");
1006 PyErr_SetString(PyExc_IOError
,
1007 "seek works only while reading");
1013 offset
= self
->pos
+ offset
;
1015 } else if (where
== 2) {
1016 if (self
->size
== -1) {
1017 assert(self
->mode
!= MODE_READ_EOF
);
1019 Py_BEGIN_ALLOW_THREADS
1020 chunksize
= Util_UnivNewlineRead(
1024 self
->pos
+= chunksize
;
1025 Py_END_ALLOW_THREADS
1027 bytesread
+= chunksize
;
1028 if (bzerror
== BZ_STREAM_END
) {
1030 } else if (bzerror
!= BZ_OK
) {
1031 Util_CatchBZ2Error(bzerror
);
1035 self
->mode
= MODE_READ_EOF
;
1036 self
->size
= self
->pos
;
1039 offset
= self
->size
+ offset
;
1040 if (offset
>= self
->pos
)
1041 offset
-= self
->pos
;
1047 } else if (where
== 0) {
1048 if (offset
>= self
->pos
)
1049 offset
-= self
->pos
;
1055 BZ2_bzReadClose(&bzerror
, self
->fp
);
1056 if (bzerror
!= BZ_OK
) {
1057 Util_CatchBZ2Error(bzerror
);
1060 ret
= PyObject_CallMethod(self
->file
, "seek", "(i)", 0);
1066 self
->fp
= BZ2_bzReadOpen(&bzerror
, PyFile_AsFile(self
->file
),
1068 if (bzerror
!= BZ_OK
) {
1069 Util_CatchBZ2Error(bzerror
);
1072 self
->mode
= MODE_READ
;
1073 } else if (self
->mode
== MODE_READ_EOF
) {
1080 /* Before getting here, offset must be set to the number of bytes
1081 * to walk forward. */
1083 if ((size_t)offset
-bytesread
> buffersize
)
1084 readsize
= buffersize
;
1086 readsize
= offset
-bytesread
;
1087 Py_BEGIN_ALLOW_THREADS
1088 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
1089 buffer
, readsize
, self
);
1090 self
->pos
+= chunksize
;
1091 Py_END_ALLOW_THREADS
1092 bytesread
+= chunksize
;
1093 if (bzerror
== BZ_STREAM_END
) {
1094 self
->size
= self
->pos
;
1095 self
->mode
= MODE_READ_EOF
;
1097 } else if (bzerror
!= BZ_OK
) {
1098 Util_CatchBZ2Error(bzerror
);
1101 if (bytesread
== offset
)
1114 PyDoc_STRVAR(BZ2File_tell__doc__
,
1117 Return the current file position, an integer (may be a long integer).\n\
1121 BZ2File_tell(BZ2FileObject
*self
, PyObject
*args
)
1123 PyObject
*ret
= NULL
;
1125 if (self
->mode
== MODE_CLOSED
) {
1126 PyErr_SetString(PyExc_ValueError
,
1127 "I/O operation on closed file");
1131 ret
= PyInt_FromLong(self
->pos
);
1137 PyDoc_STRVAR(BZ2File_close__doc__
,
1138 "close() -> None or (perhaps) an integer\n\
1140 Close the file. Sets data attribute .closed to true. A closed file\n\
1141 cannot be used for further I/O operations. close() may be called more\n\
1142 than once without error.\n\
1146 BZ2File_close(BZ2FileObject
*self
)
1148 PyObject
*ret
= NULL
;
1149 int bzerror
= BZ_OK
;
1152 switch (self
->mode
) {
1155 BZ2_bzReadClose(&bzerror
, self
->fp
);
1158 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1162 self
->mode
= MODE_CLOSED
;
1163 ret
= PyObject_CallMethod(self
->file
, "close", NULL
);
1164 if (bzerror
!= BZ_OK
) {
1165 Util_CatchBZ2Error(bzerror
);
1174 static PyObject
*BZ2File_getiter(BZ2FileObject
*self
);
1176 static PyMethodDef BZ2File_methods
[] = {
1177 {"read", (PyCFunction
)BZ2File_read
, METH_VARARGS
, BZ2File_read__doc__
},
1178 {"readline", (PyCFunction
)BZ2File_readline
, METH_VARARGS
, BZ2File_readline__doc__
},
1179 {"readlines", (PyCFunction
)BZ2File_readlines
, METH_VARARGS
, BZ2File_readlines__doc__
},
1180 {"xreadlines", (PyCFunction
)BZ2File_getiter
, METH_VARARGS
, BZ2File_xreadlines__doc__
},
1181 {"write", (PyCFunction
)BZ2File_write
, METH_VARARGS
, BZ2File_write__doc__
},
1182 {"writelines", (PyCFunction
)BZ2File_writelines
, METH_O
, BZ2File_writelines__doc__
},
1183 {"seek", (PyCFunction
)BZ2File_seek
, METH_VARARGS
, BZ2File_seek__doc__
},
1184 {"tell", (PyCFunction
)BZ2File_tell
, METH_NOARGS
, BZ2File_tell__doc__
},
1185 {"close", (PyCFunction
)BZ2File_close
, METH_NOARGS
, BZ2File_close__doc__
},
1186 {NULL
, NULL
} /* sentinel */
1190 /* ===================================================================== */
1191 /* Getters and setters of BZ2File. */
1193 #ifdef WITH_UNIVERSAL_NEWLINES
1194 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1196 BZ2File_get_newlines(BZ2FileObject
*self
, void *closure
)
1198 switch (self
->f_newlinetypes
) {
1199 case NEWLINE_UNKNOWN
:
1203 return PyString_FromString("\r");
1205 return PyString_FromString("\n");
1206 case NEWLINE_CR
|NEWLINE_LF
:
1207 return Py_BuildValue("(ss)", "\r", "\n");
1209 return PyString_FromString("\r\n");
1210 case NEWLINE_CR
|NEWLINE_CRLF
:
1211 return Py_BuildValue("(ss)", "\r", "\r\n");
1212 case NEWLINE_LF
|NEWLINE_CRLF
:
1213 return Py_BuildValue("(ss)", "\n", "\r\n");
1214 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1215 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1217 PyErr_Format(PyExc_SystemError
,
1218 "Unknown newlines value 0x%x\n",
1219 self
->f_newlinetypes
);
1226 BZ2File_get_closed(BZ2FileObject
*self
, void *closure
)
1228 return PyInt_FromLong(self
->mode
== MODE_CLOSED
);
1232 BZ2File_get_mode(BZ2FileObject
*self
, void *closure
)
1234 return PyObject_GetAttrString(self
->file
, "mode");
1238 BZ2File_get_name(BZ2FileObject
*self
, void *closure
)
1240 return PyObject_GetAttrString(self
->file
, "name");
1243 static PyGetSetDef BZ2File_getset
[] = {
1244 {"closed", (getter
)BZ2File_get_closed
, NULL
,
1245 "True if the file is closed"},
1246 #ifdef WITH_UNIVERSAL_NEWLINES
1247 {"newlines", (getter
)BZ2File_get_newlines
, NULL
,
1248 "end-of-line convention used in this file"},
1250 {"mode", (getter
)BZ2File_get_mode
, NULL
,
1251 "file mode ('r', 'w', or 'U')"},
1252 {"name", (getter
)BZ2File_get_name
, NULL
,
1254 {NULL
} /* Sentinel */
1258 /* ===================================================================== */
1259 /* Members of BZ2File_Type. */
1262 #define OFF(x) offsetof(BZ2FileObject, x)
1264 static PyMemberDef BZ2File_members
[] = {
1265 {"softspace", T_INT
, OFF(f_softspace
), 0,
1266 "flag indicating that a space needs to be printed; used by print"},
1267 {NULL
} /* Sentinel */
1270 /* ===================================================================== */
1271 /* Slot definitions for BZ2File_Type. */
1274 BZ2File_init(BZ2FileObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1276 static char *kwlist
[] = {"filename", "mode", "buffering",
1277 "compresslevel", 0};
1281 int compresslevel
= 9;
1287 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "O|sii:BZ2File",
1288 kwlist
, &name
, &mode
, &buffering
,
1292 if (compresslevel
< 1 || compresslevel
> 9) {
1293 PyErr_SetString(PyExc_ValueError
,
1294 "compresslevel must be between 1 and 9");
1312 #ifdef WITH_UNIVERSAL_NEWLINES
1313 self
->f_univ_newline
= 1;
1322 PyErr_Format(PyExc_ValueError
,
1323 "invalid mode char %c", *mode
);
1331 mode
= (mode_char
== 'r') ? "rb" : "wb";
1333 self
->file
= PyObject_CallFunction((PyObject
*)&PyFile_Type
, "(Osi)",
1334 name
, mode
, buffering
);
1335 if (self
->file
== NULL
)
1338 /* From now on, we have stuff to dealloc, so jump to error label
1339 * instead of returning */
1342 self
->lock
= PyThread_allocate_lock();
1347 if (mode_char
== 'r')
1348 self
->fp
= BZ2_bzReadOpen(&bzerror
,
1349 PyFile_AsFile(self
->file
),
1352 self
->fp
= BZ2_bzWriteOpen(&bzerror
,
1353 PyFile_AsFile(self
->file
),
1354 compresslevel
, 0, 0);
1356 if (bzerror
!= BZ_OK
) {
1357 Util_CatchBZ2Error(bzerror
);
1361 self
->mode
= (mode_char
== 'r') ? MODE_READ
: MODE_WRITE
;
1366 Py_DECREF(self
->file
);
1369 PyThread_free_lock(self
->lock
);
1375 BZ2File_dealloc(BZ2FileObject
*self
)
1380 PyThread_free_lock(self
->lock
);
1382 switch (self
->mode
) {
1385 BZ2_bzReadClose(&bzerror
, self
->fp
);
1388 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1392 Util_DropReadAhead(self
);
1393 Py_XDECREF(self
->file
);
1394 self
->ob_type
->tp_free((PyObject
*)self
);
1397 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1399 BZ2File_getiter(BZ2FileObject
*self
)
1401 if (self
->mode
== MODE_CLOSED
) {
1402 PyErr_SetString(PyExc_ValueError
,
1403 "I/O operation on closed file");
1406 Py_INCREF((PyObject
*)self
);
1407 return (PyObject
*)self
;
1410 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1411 #define READAHEAD_BUFSIZE 8192
1413 BZ2File_iternext(BZ2FileObject
*self
)
1415 PyStringObject
* ret
;
1417 if (self
->mode
== MODE_CLOSED
) {
1418 PyErr_SetString(PyExc_ValueError
,
1419 "I/O operation on closed file");
1422 ret
= Util_ReadAheadGetLineSkip(self
, 0, READAHEAD_BUFSIZE
);
1424 if (ret
== NULL
|| PyString_GET_SIZE(ret
) == 0) {
1428 return (PyObject
*)ret
;
1431 /* ===================================================================== */
1432 /* BZ2File_Type definition. */
1434 PyDoc_VAR(BZ2File__doc__
) =
1436 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1438 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1439 writing. When opened for writing, the file will be created if it doesn't\n\
1440 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1441 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1442 is given, must be a number between 1 and 9.\n\
1444 #ifdef WITH_UNIVERSAL_NEWLINES
1447 Add a 'U' to mode to open the file for input with universal newline\n\
1448 support. Any line ending in the input file will be seen as a '\\n' in\n\
1449 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1450 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1451 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1452 newlines are available only when reading.\n\
1457 static PyTypeObject BZ2File_Type
= {
1458 PyObject_HEAD_INIT(NULL
)
1460 "bz2.BZ2File", /*tp_name*/
1461 sizeof(BZ2FileObject
), /*tp_basicsize*/
1463 (destructor
)BZ2File_dealloc
, /*tp_dealloc*/
1470 0, /*tp_as_sequence*/
1471 0, /*tp_as_mapping*/
1475 PyObject_GenericGetAttr
,/*tp_getattro*/
1476 PyObject_GenericSetAttr
,/*tp_setattro*/
1478 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1479 BZ2File__doc__
, /*tp_doc*/
1482 0, /*tp_richcompare*/
1483 0, /*tp_weaklistoffset*/
1484 (getiterfunc
)BZ2File_getiter
, /*tp_iter*/
1485 (iternextfunc
)BZ2File_iternext
, /*tp_iternext*/
1486 BZ2File_methods
, /*tp_methods*/
1487 BZ2File_members
, /*tp_members*/
1488 BZ2File_getset
, /*tp_getset*/
1493 0, /*tp_dictoffset*/
1494 (initproc
)BZ2File_init
, /*tp_init*/
1495 PyType_GenericAlloc
, /*tp_alloc*/
1496 PyType_GenericNew
, /*tp_new*/
1497 _PyObject_Del
, /*tp_free*/
1502 /* ===================================================================== */
1503 /* Methods of BZ2Comp. */
1505 PyDoc_STRVAR(BZ2Comp_compress__doc__
,
1506 "compress(data) -> string\n\
1508 Provide more data to the compressor object. It will return chunks of\n\
1509 compressed data whenever possible. When you've finished providing data\n\
1510 to compress, call the flush() method to finish the compression process,\n\
1511 and return what is left in the internal buffers.\n\
1515 BZ2Comp_compress(BZ2CompObject
*self
, PyObject
*args
)
1519 int bufsize
= SMALLCHUNK
;
1520 PY_LONG_LONG totalout
;
1521 PyObject
*ret
= NULL
;
1522 bz_stream
*bzs
= &self
->bzs
;
1525 if (!PyArg_ParseTuple(args
, "s#", &data
, &datasize
))
1529 if (!self
->running
) {
1530 PyErr_SetString(PyExc_ValueError
,
1531 "this object was already flushed");
1535 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1539 bzs
->next_in
= data
;
1540 bzs
->avail_in
= datasize
;
1541 bzs
->next_out
= BUF(ret
);
1542 bzs
->avail_out
= bufsize
;
1544 totalout
= BZS_TOTAL_OUT(bzs
);
1547 Py_BEGIN_ALLOW_THREADS
1548 bzerror
= BZ2_bzCompress(bzs
, BZ_RUN
);
1549 Py_END_ALLOW_THREADS
1550 if (bzerror
!= BZ_RUN_OK
) {
1551 Util_CatchBZ2Error(bzerror
);
1554 if (bzs
->avail_out
== 0) {
1555 bufsize
= Util_NewBufferSize(bufsize
);
1556 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1557 BZ2_bzCompressEnd(bzs
);
1560 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1562 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1563 } else if (bzs
->avail_in
== 0) {
1568 _PyString_Resize(&ret
, (int)(BZS_TOTAL_OUT(bzs
) - totalout
));
1579 PyDoc_STRVAR(BZ2Comp_flush__doc__
,
1580 "flush() -> string\n\
1582 Finish the compression process and return what is left in internal buffers.\n\
1583 You must not use the compressor object after calling this method.\n\
1587 BZ2Comp_flush(BZ2CompObject
*self
)
1589 int bufsize
= SMALLCHUNK
;
1590 PyObject
*ret
= NULL
;
1591 bz_stream
*bzs
= &self
->bzs
;
1592 PY_LONG_LONG totalout
;
1596 if (!self
->running
) {
1597 PyErr_SetString(PyExc_ValueError
, "object was already "
1603 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1607 bzs
->next_out
= BUF(ret
);
1608 bzs
->avail_out
= bufsize
;
1610 totalout
= BZS_TOTAL_OUT(bzs
);
1613 Py_BEGIN_ALLOW_THREADS
1614 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
1615 Py_END_ALLOW_THREADS
1616 if (bzerror
== BZ_STREAM_END
) {
1618 } else if (bzerror
!= BZ_FINISH_OK
) {
1619 Util_CatchBZ2Error(bzerror
);
1622 if (bzs
->avail_out
== 0) {
1623 bufsize
= Util_NewBufferSize(bufsize
);
1624 if (_PyString_Resize(&ret
, bufsize
) < 0)
1626 bzs
->next_out
= BUF(ret
);
1627 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1629 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1633 if (bzs
->avail_out
!= 0)
1634 _PyString_Resize(&ret
, (int)(BZS_TOTAL_OUT(bzs
) - totalout
));
1645 static PyMethodDef BZ2Comp_methods
[] = {
1646 {"compress", (PyCFunction
)BZ2Comp_compress
, METH_VARARGS
,
1647 BZ2Comp_compress__doc__
},
1648 {"flush", (PyCFunction
)BZ2Comp_flush
, METH_NOARGS
,
1649 BZ2Comp_flush__doc__
},
1650 {NULL
, NULL
} /* sentinel */
1654 /* ===================================================================== */
1655 /* Slot definitions for BZ2Comp_Type. */
1658 BZ2Comp_init(BZ2CompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1660 int compresslevel
= 9;
1662 static char *kwlist
[] = {"compresslevel", 0};
1664 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|i:BZ2Compressor",
1665 kwlist
, &compresslevel
))
1668 if (compresslevel
< 1 || compresslevel
> 9) {
1669 PyErr_SetString(PyExc_ValueError
,
1670 "compresslevel must be between 1 and 9");
1675 self
->lock
= PyThread_allocate_lock();
1680 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1681 bzerror
= BZ2_bzCompressInit(&self
->bzs
, compresslevel
, 0, 0);
1682 if (bzerror
!= BZ_OK
) {
1683 Util_CatchBZ2Error(bzerror
);
1693 PyThread_free_lock(self
->lock
);
1699 BZ2Comp_dealloc(BZ2CompObject
*self
)
1703 PyThread_free_lock(self
->lock
);
1705 BZ2_bzCompressEnd(&self
->bzs
);
1706 self
->ob_type
->tp_free((PyObject
*)self
);
1710 /* ===================================================================== */
1711 /* BZ2Comp_Type definition. */
1713 PyDoc_STRVAR(BZ2Comp__doc__
,
1714 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1716 Create a new compressor object. This object may be used to compress\n\
1717 data sequentially. If you want to compress data in one shot, use the\n\
1718 compress() function instead. The compresslevel parameter, if given,\n\
1719 must be a number between 1 and 9.\n\
1722 static PyTypeObject BZ2Comp_Type
= {
1723 PyObject_HEAD_INIT(NULL
)
1725 "bz2.BZ2Compressor", /*tp_name*/
1726 sizeof(BZ2CompObject
), /*tp_basicsize*/
1728 (destructor
)BZ2Comp_dealloc
, /*tp_dealloc*/
1735 0, /*tp_as_sequence*/
1736 0, /*tp_as_mapping*/
1740 PyObject_GenericGetAttr
,/*tp_getattro*/
1741 PyObject_GenericSetAttr
,/*tp_setattro*/
1743 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1744 BZ2Comp__doc__
, /*tp_doc*/
1747 0, /*tp_richcompare*/
1748 0, /*tp_weaklistoffset*/
1751 BZ2Comp_methods
, /*tp_methods*/
1758 0, /*tp_dictoffset*/
1759 (initproc
)BZ2Comp_init
, /*tp_init*/
1760 PyType_GenericAlloc
, /*tp_alloc*/
1761 PyType_GenericNew
, /*tp_new*/
1762 _PyObject_Del
, /*tp_free*/
1767 /* ===================================================================== */
1768 /* Members of BZ2Decomp. */
1771 #define OFF(x) offsetof(BZ2DecompObject, x)
1773 static PyMemberDef BZ2Decomp_members
[] = {
1774 {"unused_data", T_OBJECT
, OFF(unused_data
), RO
},
1775 {NULL
} /* Sentinel */
1779 /* ===================================================================== */
1780 /* Methods of BZ2Decomp. */
1782 PyDoc_STRVAR(BZ2Decomp_decompress__doc__
,
1783 "decompress(data) -> string\n\
1785 Provide more data to the decompressor object. It will return chunks\n\
1786 of decompressed data whenever possible. If you try to decompress data\n\
1787 after the end of stream is found, EOFError will be raised. If any data\n\
1788 was found after the end of stream, it'll be ignored and saved in\n\
1789 unused_data attribute.\n\
1793 BZ2Decomp_decompress(BZ2DecompObject
*self
, PyObject
*args
)
1797 int bufsize
= SMALLCHUNK
;
1798 PY_LONG_LONG totalout
;
1799 PyObject
*ret
= NULL
;
1800 bz_stream
*bzs
= &self
->bzs
;
1803 if (!PyArg_ParseTuple(args
, "s#", &data
, &datasize
))
1807 if (!self
->running
) {
1808 PyErr_SetString(PyExc_EOFError
, "end of stream was "
1813 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1817 bzs
->next_in
= data
;
1818 bzs
->avail_in
= datasize
;
1819 bzs
->next_out
= BUF(ret
);
1820 bzs
->avail_out
= bufsize
;
1822 totalout
= BZS_TOTAL_OUT(bzs
);
1825 Py_BEGIN_ALLOW_THREADS
1826 bzerror
= BZ2_bzDecompress(bzs
);
1827 Py_END_ALLOW_THREADS
1828 if (bzerror
== BZ_STREAM_END
) {
1829 if (bzs
->avail_in
!= 0) {
1830 Py_DECREF(self
->unused_data
);
1832 PyString_FromStringAndSize(bzs
->next_in
,
1838 if (bzerror
!= BZ_OK
) {
1839 Util_CatchBZ2Error(bzerror
);
1842 if (bzs
->avail_out
== 0) {
1843 bufsize
= Util_NewBufferSize(bufsize
);
1844 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1845 BZ2_bzDecompressEnd(bzs
);
1848 bzs
->next_out
= BUF(ret
);
1849 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1851 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1852 } else if (bzs
->avail_in
== 0) {
1857 if (bzs
->avail_out
!= 0)
1858 _PyString_Resize(&ret
, (int)(BZS_TOTAL_OUT(bzs
) - totalout
));
1869 static PyMethodDef BZ2Decomp_methods
[] = {
1870 {"decompress", (PyCFunction
)BZ2Decomp_decompress
, METH_VARARGS
, BZ2Decomp_decompress__doc__
},
1871 {NULL
, NULL
} /* sentinel */
1875 /* ===================================================================== */
1876 /* Slot definitions for BZ2Decomp_Type. */
1879 BZ2Decomp_init(BZ2DecompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1883 if (!PyArg_ParseTuple(args
, ":BZ2Decompressor"))
1887 self
->lock
= PyThread_allocate_lock();
1892 self
->unused_data
= PyString_FromString("");
1893 if (!self
->unused_data
)
1896 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1897 bzerror
= BZ2_bzDecompressInit(&self
->bzs
, 0, 0);
1898 if (bzerror
!= BZ_OK
) {
1899 Util_CatchBZ2Error(bzerror
);
1910 PyThread_free_lock(self
->lock
);
1912 Py_XDECREF(self
->unused_data
);
1917 BZ2Decomp_dealloc(BZ2DecompObject
*self
)
1921 PyThread_free_lock(self
->lock
);
1923 Py_XDECREF(self
->unused_data
);
1924 BZ2_bzDecompressEnd(&self
->bzs
);
1925 self
->ob_type
->tp_free((PyObject
*)self
);
1929 /* ===================================================================== */
1930 /* BZ2Decomp_Type definition. */
1932 PyDoc_STRVAR(BZ2Decomp__doc__
,
1933 "BZ2Decompressor() -> decompressor object\n\
1935 Create a new decompressor object. This object may be used to decompress\n\
1936 data sequentially. If you want to decompress data in one shot, use the\n\
1937 decompress() function instead.\n\
1940 static PyTypeObject BZ2Decomp_Type
= {
1941 PyObject_HEAD_INIT(NULL
)
1943 "bz2.BZ2Decompressor", /*tp_name*/
1944 sizeof(BZ2DecompObject
), /*tp_basicsize*/
1946 (destructor
)BZ2Decomp_dealloc
, /*tp_dealloc*/
1953 0, /*tp_as_sequence*/
1954 0, /*tp_as_mapping*/
1958 PyObject_GenericGetAttr
,/*tp_getattro*/
1959 PyObject_GenericSetAttr
,/*tp_setattro*/
1961 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1962 BZ2Decomp__doc__
, /*tp_doc*/
1965 0, /*tp_richcompare*/
1966 0, /*tp_weaklistoffset*/
1969 BZ2Decomp_methods
, /*tp_methods*/
1970 BZ2Decomp_members
, /*tp_members*/
1976 0, /*tp_dictoffset*/
1977 (initproc
)BZ2Decomp_init
, /*tp_init*/
1978 PyType_GenericAlloc
, /*tp_alloc*/
1979 PyType_GenericNew
, /*tp_new*/
1980 _PyObject_Del
, /*tp_free*/
1985 /* ===================================================================== */
1986 /* Module functions. */
1988 PyDoc_STRVAR(bz2_compress__doc__
,
1989 "compress(data [, compresslevel=9]) -> string\n\
1991 Compress data in one shot. If you want to compress data sequentially,\n\
1992 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1993 given, must be a number between 1 and 9.\n\
1997 bz2_compress(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1999 int compresslevel
=9;
2003 PyObject
*ret
= NULL
;
2005 bz_stream
*bzs
= &_bzs
;
2007 static char *kwlist
[] = {"data", "compresslevel", 0};
2009 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "s#|i",
2010 kwlist
, &data
, &datasize
,
2014 if (compresslevel
< 1 || compresslevel
> 9) {
2015 PyErr_SetString(PyExc_ValueError
,
2016 "compresslevel must be between 1 and 9");
2020 /* Conforming to bz2 manual, this is large enough to fit compressed
2021 * data in one shot. We will check it later anyway. */
2022 bufsize
= datasize
+ (datasize
/100+1) + 600;
2024 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2028 memset(bzs
, 0, sizeof(bz_stream
));
2030 bzs
->next_in
= data
;
2031 bzs
->avail_in
= datasize
;
2032 bzs
->next_out
= BUF(ret
);
2033 bzs
->avail_out
= bufsize
;
2035 bzerror
= BZ2_bzCompressInit(bzs
, compresslevel
, 0, 0);
2036 if (bzerror
!= BZ_OK
) {
2037 Util_CatchBZ2Error(bzerror
);
2043 Py_BEGIN_ALLOW_THREADS
2044 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
2045 Py_END_ALLOW_THREADS
2046 if (bzerror
== BZ_STREAM_END
) {
2048 } else if (bzerror
!= BZ_FINISH_OK
) {
2049 BZ2_bzCompressEnd(bzs
);
2050 Util_CatchBZ2Error(bzerror
);
2054 if (bzs
->avail_out
== 0) {
2055 bufsize
= Util_NewBufferSize(bufsize
);
2056 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2057 BZ2_bzCompressEnd(bzs
);
2061 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2062 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2066 if (bzs
->avail_out
!= 0)
2067 _PyString_Resize(&ret
, (int)BZS_TOTAL_OUT(bzs
));
2068 BZ2_bzCompressEnd(bzs
);
2073 PyDoc_STRVAR(bz2_decompress__doc__
,
2074 "decompress(data) -> decompressed data\n\
2076 Decompress data in one shot. If you want to decompress data sequentially,\n\
2077 use an instance of BZ2Decompressor instead.\n\
2081 bz2_decompress(PyObject
*self
, PyObject
*args
)
2085 int bufsize
= SMALLCHUNK
;
2088 bz_stream
*bzs
= &_bzs
;
2091 if (!PyArg_ParseTuple(args
, "s#", &data
, &datasize
))
2095 return PyString_FromString("");
2097 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2101 memset(bzs
, 0, sizeof(bz_stream
));
2103 bzs
->next_in
= data
;
2104 bzs
->avail_in
= datasize
;
2105 bzs
->next_out
= BUF(ret
);
2106 bzs
->avail_out
= bufsize
;
2108 bzerror
= BZ2_bzDecompressInit(bzs
, 0, 0);
2109 if (bzerror
!= BZ_OK
) {
2110 Util_CatchBZ2Error(bzerror
);
2116 Py_BEGIN_ALLOW_THREADS
2117 bzerror
= BZ2_bzDecompress(bzs
);
2118 Py_END_ALLOW_THREADS
2119 if (bzerror
== BZ_STREAM_END
) {
2121 } else if (bzerror
!= BZ_OK
) {
2122 BZ2_bzDecompressEnd(bzs
);
2123 Util_CatchBZ2Error(bzerror
);
2127 if (bzs
->avail_out
== 0) {
2128 bufsize
= Util_NewBufferSize(bufsize
);
2129 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2130 BZ2_bzDecompressEnd(bzs
);
2134 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2135 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2136 } else if (bzs
->avail_in
== 0) {
2137 BZ2_bzDecompressEnd(bzs
);
2138 PyErr_SetString(PyExc_ValueError
,
2139 "couldn't find end of stream");
2145 if (bzs
->avail_out
!= 0)
2146 _PyString_Resize(&ret
, (int)BZS_TOTAL_OUT(bzs
));
2147 BZ2_bzDecompressEnd(bzs
);
2152 static PyMethodDef bz2_methods
[] = {
2153 {"compress", (PyCFunction
) bz2_compress
, METH_VARARGS
|METH_KEYWORDS
,
2154 bz2_compress__doc__
},
2155 {"decompress", (PyCFunction
) bz2_decompress
, METH_VARARGS
,
2156 bz2_decompress__doc__
},
2157 {NULL
, NULL
} /* sentinel */
2160 /* ===================================================================== */
2161 /* Initialization function. */
2163 PyDoc_STRVAR(bz2__doc__
,
2164 "The python bz2 module provides a comprehensive interface for\n\
2165 the bz2 compression library. It implements a complete file\n\
2166 interface, one shot (de)compression functions, and types for\n\
2167 sequential (de)compression.\n\
2175 BZ2File_Type
.ob_type
= &PyType_Type
;
2176 BZ2Comp_Type
.ob_type
= &PyType_Type
;
2177 BZ2Decomp_Type
.ob_type
= &PyType_Type
;
2179 m
= Py_InitModule3("bz2", bz2_methods
, bz2__doc__
);
2181 PyModule_AddObject(m
, "__author__", PyString_FromString(__author__
));
2183 Py_INCREF(&BZ2File_Type
);
2184 PyModule_AddObject(m
, "BZ2File", (PyObject
*)&BZ2File_Type
);
2186 Py_INCREF(&BZ2Comp_Type
);
2187 PyModule_AddObject(m
, "BZ2Compressor", (PyObject
*)&BZ2Comp_Type
);
2189 Py_INCREF(&BZ2Decomp_Type
);
2190 PyModule_AddObject(m
, "BZ2Decompressor", (PyObject
*)&BZ2Decomp_Type
);