3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
13 #include "structmember.h"
19 static char __author__
[] =
20 "The bz2 python module was written by:\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
29 #define MODE_READ_EOF 2
32 #define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
35 #ifdef BZ_CONFIG_ERROR
38 #define BZS_TOTAL_OUT(bzs) \
39 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40 #elif SIZEOF_LONG_LONG >= 8
41 #define BZS_TOTAL_OUT(bzs) \
42 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
44 #define BZS_TOTAL_OUT(bzs) \
48 #else /* ! BZ_CONFIG_ERROR */
50 #define BZ2_bzRead bzRead
51 #define BZ2_bzReadOpen bzReadOpen
52 #define BZ2_bzReadClose bzReadClose
53 #define BZ2_bzWrite bzWrite
54 #define BZ2_bzWriteOpen bzWriteOpen
55 #define BZ2_bzWriteClose bzWriteClose
56 #define BZ2_bzCompress bzCompress
57 #define BZ2_bzCompressInit bzCompressInit
58 #define BZ2_bzCompressEnd bzCompressEnd
59 #define BZ2_bzDecompress bzDecompress
60 #define BZ2_bzDecompressInit bzDecompressInit
61 #define BZ2_bzDecompressEnd bzDecompressEnd
63 #define BZS_TOTAL_OUT(bzs) bzs->total_out
65 #endif /* ! BZ_CONFIG_ERROR */
69 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
70 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
72 #define ACQUIRE_LOCK(obj)
73 #define RELEASE_LOCK(obj)
76 /* Bits in f_newlinetypes */
77 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
78 #define NEWLINE_CR 1 /* \r newline seen */
79 #define NEWLINE_LF 2 /* \n newline seen */
80 #define NEWLINE_CRLF 4 /* \r\n newline seen */
82 /* ===================================================================== */
83 /* Structure definitions. */
89 char* f_buf
; /* Allocated readahead buffer */
90 char* f_bufend
; /* Points after last occupied position */
91 char* f_bufptr
; /* Current buffer position */
93 int f_softspace
; /* Flag used by 'print' command */
95 int f_univ_newline
; /* Handle any newline convention */
96 int f_newlinetypes
; /* Types of newlines seen */
97 int f_skipnextlf
; /* Skip next \n */
104 PyThread_type_lock lock
;
113 PyThread_type_lock lock
;
121 PyObject
*unused_data
;
123 PyThread_type_lock lock
;
127 /* ===================================================================== */
128 /* Utility functions. */
131 Util_CatchBZ2Error(int bzerror
)
139 #ifdef BZ_CONFIG_ERROR
140 case BZ_CONFIG_ERROR
:
141 PyErr_SetString(PyExc_SystemError
,
142 "the bz2 library was not compiled "
149 PyErr_SetString(PyExc_ValueError
,
150 "the bz2 library has received wrong "
161 case BZ_DATA_ERROR_MAGIC
:
162 PyErr_SetString(PyExc_IOError
, "invalid data stream");
167 PyErr_SetString(PyExc_IOError
, "unknown IO error");
171 case BZ_UNEXPECTED_EOF
:
172 PyErr_SetString(PyExc_EOFError
,
173 "compressed file ended before the "
174 "logical end-of-stream was detected");
178 case BZ_SEQUENCE_ERROR
:
179 PyErr_SetString(PyExc_RuntimeError
,
180 "wrong sequence of bz2 library "
189 #define SMALLCHUNK 8192
191 #define SMALLCHUNK BUFSIZ
195 #define BIGCHUNK (512 * 32)
197 #define BIGCHUNK (512 * 1024)
200 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
202 Util_NewBufferSize(size_t currentsize
)
204 if (currentsize
> SMALLCHUNK
) {
205 /* Keep doubling until we reach BIGCHUNK;
206 then keep adding BIGCHUNK. */
207 if (currentsize
<= BIGCHUNK
)
208 return currentsize
+ currentsize
;
210 return currentsize
+ BIGCHUNK
;
212 return currentsize
+ SMALLCHUNK
;
215 /* This is a hacked version of Python's fileobject.c:get_line(). */
217 Util_GetLine(BZ2FileObject
*f
, int n
)
221 size_t total_v_size
; /* total # of slots in buffer */
222 size_t used_v_size
; /* # used slots in buffer */
223 size_t increment
; /* amount to increment the buffer */
226 int newlinetypes
= f
->f_newlinetypes
;
227 int skipnextlf
= f
->f_skipnextlf
;
228 int univ_newline
= f
->f_univ_newline
;
230 total_v_size
= n
> 0 ? n
: 100;
231 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
236 end
= buf
+ total_v_size
;
239 Py_BEGIN_ALLOW_THREADS
242 BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
244 if (bzerror
!= BZ_OK
|| buf
== end
)
249 /* Seeing a \n here with
250 * skipnextlf true means we
253 newlinetypes
|= NEWLINE_CRLF
;
254 BZ2_bzRead(&bzerror
, f
->fp
,
256 if (bzerror
!= BZ_OK
)
259 newlinetypes
|= NEWLINE_CR
;
265 } else if ( c
== '\n')
266 newlinetypes
|= NEWLINE_LF
;
268 if (c
== '\n') break;
270 if (bzerror
== BZ_STREAM_END
&& skipnextlf
)
271 newlinetypes
|= NEWLINE_CR
;
272 } else /* If not universal newlines use the normal loop */
274 BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
277 } while (bzerror
== BZ_OK
&& c
!= '\n' && buf
!= end
);
279 f
->f_newlinetypes
= newlinetypes
;
280 f
->f_skipnextlf
= skipnextlf
;
281 if (bzerror
== BZ_STREAM_END
) {
283 f
->mode
= MODE_READ_EOF
;
285 } else if (bzerror
!= BZ_OK
) {
286 Util_CatchBZ2Error(bzerror
);
292 /* Must be because buf == end */
295 used_v_size
= total_v_size
;
296 increment
= total_v_size
>> 2; /* mild exponential growth */
297 total_v_size
+= increment
;
298 if (total_v_size
> INT_MAX
) {
299 PyErr_SetString(PyExc_OverflowError
,
300 "line is longer than a Python string can hold");
304 if (_PyString_Resize(&v
, total_v_size
) < 0)
306 buf
= BUF(v
) + used_v_size
;
307 end
= BUF(v
) + total_v_size
;
310 used_v_size
= buf
- BUF(v
);
311 if (used_v_size
!= total_v_size
)
312 _PyString_Resize(&v
, used_v_size
);
316 /* This is a hacked version of Python's
317 * fileobject.c:Py_UniversalNewlineFread(). */
319 Util_UnivNewlineRead(int *bzerror
, BZFILE
*stream
,
320 char* buf
, size_t n
, BZ2FileObject
*f
)
323 int newlinetypes
, skipnextlf
;
326 assert(stream
!= NULL
);
328 if (!f
->f_univ_newline
)
329 return BZ2_bzRead(bzerror
, stream
, buf
, n
);
331 newlinetypes
= f
->f_newlinetypes
;
332 skipnextlf
= f
->f_skipnextlf
;
334 /* Invariant: n is the number of bytes remaining to be filled
342 nread
= BZ2_bzRead(bzerror
, stream
, dst
, n
);
344 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
345 shortread
= n
!= 0; /* true iff EOF or error */
349 /* Save as LF and set flag to skip next LF. */
353 else if (skipnextlf
&& c
== '\n') {
354 /* Skip LF, and remember we saw CR LF. */
356 newlinetypes
|= NEWLINE_CRLF
;
360 /* Normal char to be stored in buffer. Also
361 * update the newlinetypes flag if either this
362 * is an LF or the previous char was a CR.
365 newlinetypes
|= NEWLINE_LF
;
367 newlinetypes
|= NEWLINE_CR
;
373 /* If this is EOF, update type flags. */
374 if (skipnextlf
&& *bzerror
== BZ_STREAM_END
)
375 newlinetypes
|= NEWLINE_CR
;
379 f
->f_newlinetypes
= newlinetypes
;
380 f
->f_skipnextlf
= skipnextlf
;
384 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
386 Util_DropReadAhead(BZ2FileObject
*f
)
388 if (f
->f_buf
!= NULL
) {
389 PyMem_Free(f
->f_buf
);
394 /* This is a hacked version of Python's fileobject.c:readahead(). */
396 Util_ReadAhead(BZ2FileObject
*f
, int bufsize
)
401 if (f
->f_buf
!= NULL
) {
402 if((f
->f_bufend
- f
->f_bufptr
) >= 1)
405 Util_DropReadAhead(f
);
407 if (f
->mode
== MODE_READ_EOF
) {
410 if ((f
->f_buf
= PyMem_Malloc(bufsize
)) == NULL
) {
413 Py_BEGIN_ALLOW_THREADS
414 chunksize
= Util_UnivNewlineRead(&bzerror
, f
->fp
, f
->f_buf
,
418 if (bzerror
== BZ_STREAM_END
) {
420 f
->mode
= MODE_READ_EOF
;
421 } else if (bzerror
!= BZ_OK
) {
422 Util_CatchBZ2Error(bzerror
);
423 Util_DropReadAhead(f
);
426 f
->f_bufptr
= f
->f_buf
;
427 f
->f_bufend
= f
->f_buf
+ chunksize
;
431 /* This is a hacked version of Python's
432 * fileobject.c:readahead_get_line_skip(). */
433 static PyStringObject
*
434 Util_ReadAheadGetLineSkip(BZ2FileObject
*f
, int skip
, int bufsize
)
441 if (f
->f_buf
== NULL
)
442 if (Util_ReadAhead(f
, bufsize
) < 0)
445 len
= f
->f_bufend
- f
->f_bufptr
;
447 return (PyStringObject
*)
448 PyString_FromStringAndSize(NULL
, skip
);
449 bufptr
= memchr(f
->f_bufptr
, '\n', len
);
450 if (bufptr
!= NULL
) {
451 bufptr
++; /* Count the '\n' */
452 len
= bufptr
- f
->f_bufptr
;
453 s
= (PyStringObject
*)
454 PyString_FromStringAndSize(NULL
, skip
+len
);
457 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
458 f
->f_bufptr
= bufptr
;
459 if (bufptr
== f
->f_bufend
)
460 Util_DropReadAhead(f
);
462 bufptr
= f
->f_bufptr
;
464 f
->f_buf
= NULL
; /* Force new readahead buffer */
465 s
= Util_ReadAheadGetLineSkip(f
, skip
+len
,
466 bufsize
+ (bufsize
>>2));
471 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
477 /* ===================================================================== */
478 /* Methods of BZ2File. */
480 PyDoc_STRVAR(BZ2File_read__doc__
,
481 "read([size]) -> string\n\
483 Read at most size uncompressed bytes, returned as a string. If the size\n\
484 argument is negative or omitted, read until EOF is reached.\n\
487 /* This is a hacked version of Python's fileobject.c:file_read(). */
489 BZ2File_read(BZ2FileObject
*self
, PyObject
*args
)
491 long bytesrequested
= -1;
492 size_t bytesread
, buffersize
, chunksize
;
494 PyObject
*ret
= NULL
;
496 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
500 switch (self
->mode
) {
504 ret
= PyString_FromString("");
507 PyErr_SetString(PyExc_ValueError
,
508 "I/O operation on closed file");
511 PyErr_SetString(PyExc_IOError
,
512 "file is not ready for reading");
516 if (bytesrequested
< 0)
517 buffersize
= Util_NewBufferSize((size_t)0);
519 buffersize
= bytesrequested
;
520 if (buffersize
> INT_MAX
) {
521 PyErr_SetString(PyExc_OverflowError
,
522 "requested number of bytes is "
523 "more than a Python string can hold");
526 ret
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
532 Py_BEGIN_ALLOW_THREADS
533 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
535 buffersize
-bytesread
,
537 self
->pos
+= chunksize
;
539 bytesread
+= chunksize
;
540 if (bzerror
== BZ_STREAM_END
) {
541 self
->size
= self
->pos
;
542 self
->mode
= MODE_READ_EOF
;
544 } else if (bzerror
!= BZ_OK
) {
545 Util_CatchBZ2Error(bzerror
);
550 if (bytesrequested
< 0) {
551 buffersize
= Util_NewBufferSize(buffersize
);
552 if (_PyString_Resize(&ret
, buffersize
) < 0)
558 if (bytesread
!= buffersize
)
559 _PyString_Resize(&ret
, bytesread
);
566 PyDoc_STRVAR(BZ2File_readline__doc__
,
567 "readline([size]) -> string\n\
569 Return the next line from the file, as a string, retaining newline.\n\
570 A non-negative size argument will limit the maximum number of bytes to\n\
571 return (an incomplete line may be returned then). Return an empty\n\
576 BZ2File_readline(BZ2FileObject
*self
, PyObject
*args
)
578 PyObject
*ret
= NULL
;
581 if (!PyArg_ParseTuple(args
, "|i:readline", &sizehint
))
585 switch (self
->mode
) {
589 ret
= PyString_FromString("");
592 PyErr_SetString(PyExc_ValueError
,
593 "I/O operation on closed file");
596 PyErr_SetString(PyExc_IOError
,
597 "file is not ready for reading");
602 ret
= PyString_FromString("");
604 ret
= Util_GetLine(self
, (sizehint
< 0) ? 0 : sizehint
);
611 PyDoc_STRVAR(BZ2File_readlines__doc__
,
612 "readlines([size]) -> list\n\
614 Call readline() repeatedly and return a list of lines read.\n\
615 The optional size argument, if given, is an approximate bound on the\n\
616 total number of bytes in the lines returned.\n\
619 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
621 BZ2File_readlines(BZ2FileObject
*self
, PyObject
*args
)
624 PyObject
*list
= NULL
;
626 char small_buffer
[SMALLCHUNK
];
627 char *buffer
= small_buffer
;
628 size_t buffersize
= SMALLCHUNK
;
629 PyObject
*big_buffer
= NULL
;
632 size_t totalread
= 0;
638 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
642 switch (self
->mode
) {
646 list
= PyList_New(0);
649 PyErr_SetString(PyExc_ValueError
,
650 "I/O operation on closed file");
653 PyErr_SetString(PyExc_IOError
,
654 "file is not ready for reading");
658 if ((list
= PyList_New(0)) == NULL
)
662 Py_BEGIN_ALLOW_THREADS
663 nread
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
665 buffersize
-nfilled
, self
);
668 if (bzerror
== BZ_STREAM_END
) {
669 self
->size
= self
->pos
;
670 self
->mode
= MODE_READ_EOF
;
676 } else if (bzerror
!= BZ_OK
) {
677 Util_CatchBZ2Error(bzerror
);
684 p
= memchr(buffer
+nfilled
, '\n', nread
);
686 /* Need a larger buffer to fit this line */
689 if (buffersize
> INT_MAX
) {
690 PyErr_SetString(PyExc_OverflowError
,
691 "line is longer than a Python string can hold");
694 if (big_buffer
== NULL
) {
695 /* Create the big buffer */
696 big_buffer
= PyString_FromStringAndSize(
698 if (big_buffer
== NULL
)
700 buffer
= PyString_AS_STRING(big_buffer
);
701 memcpy(buffer
, small_buffer
, nfilled
);
704 /* Grow the big buffer */
705 _PyString_Resize(&big_buffer
, buffersize
);
706 buffer
= PyString_AS_STRING(big_buffer
);
710 end
= buffer
+nfilled
+nread
;
713 /* Process complete lines */
715 line
= PyString_FromStringAndSize(q
, p
-q
);
718 err
= PyList_Append(list
, line
);
723 p
= memchr(q
, '\n', end
-q
);
725 /* Move the remaining incomplete line to the start */
727 memmove(buffer
, q
, nfilled
);
729 if (totalread
>= (size_t)sizehint
)
737 /* Partial last line */
738 line
= PyString_FromStringAndSize(buffer
, nfilled
);
742 /* Need to complete the last line */
743 PyObject
*rest
= Util_GetLine(self
, 0);
748 PyString_Concat(&line
, rest
);
753 err
= PyList_Append(list
, line
);
762 Py_DECREF(big_buffer
);
767 PyDoc_STRVAR(BZ2File_xreadlines__doc__
,
768 "xreadlines() -> self\n\
770 For backward compatibility. BZ2File objects now include the performance\n\
771 optimizations previously implemented in the xreadlines module.\n\
774 PyDoc_STRVAR(BZ2File_write__doc__
,
775 "write(data) -> None\n\
777 Write the 'data' string to file. Note that due to buffering, close() may\n\
778 be needed before the file on disk reflects the data written.\n\
781 /* This is a hacked version of Python's fileobject.c:file_write(). */
783 BZ2File_write(BZ2FileObject
*self
, PyObject
*args
)
785 PyObject
*ret
= NULL
;
790 if (!PyArg_ParseTuple(args
, "s#:write", &buf
, &len
))
794 switch (self
->mode
) {
799 PyErr_SetString(PyExc_ValueError
,
800 "I/O operation on closed file");
804 PyErr_SetString(PyExc_IOError
,
805 "file is not ready for writing");
809 self
->f_softspace
= 0;
811 Py_BEGIN_ALLOW_THREADS
812 BZ2_bzWrite (&bzerror
, self
->fp
, buf
, len
);
816 if (bzerror
!= BZ_OK
) {
817 Util_CatchBZ2Error(bzerror
);
829 PyDoc_STRVAR(BZ2File_writelines__doc__
,
830 "writelines(sequence_of_strings) -> None\n\
832 Write the sequence of strings to the file. Note that newlines are not\n\
833 added. The sequence can be any iterable object producing strings. This is\n\
834 equivalent to calling write() for each string.\n\
837 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
839 BZ2File_writelines(BZ2FileObject
*self
, PyObject
*seq
)
841 #define CHUNKSIZE 1000
842 PyObject
*list
= NULL
;
843 PyObject
*iter
= NULL
;
844 PyObject
*ret
= NULL
;
846 int i
, j
, index
, len
, islist
;
850 islist
= PyList_Check(seq
);
852 iter
= PyObject_GetIter(seq
);
854 PyErr_SetString(PyExc_TypeError
,
855 "writelines() requires an iterable argument");
858 list
= PyList_New(CHUNKSIZE
);
863 /* Strategy: slurp CHUNKSIZE lines into a private list,
864 checking that they are all strings, then write that list
865 without holding the interpreter lock, then come back for more. */
866 for (index
= 0; ; index
+= CHUNKSIZE
) {
869 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
872 j
= PyList_GET_SIZE(list
);
875 for (j
= 0; j
< CHUNKSIZE
; j
++) {
876 line
= PyIter_Next(iter
);
878 if (PyErr_Occurred())
882 PyList_SetItem(list
, j
, line
);
888 /* Check that all entries are indeed strings. If not,
889 apply the same rules as for file.write() and
890 convert the rets to strings. This is slow, but
891 seems to be the only way since all conversion APIs
892 could potentially execute Python code. */
893 for (i
= 0; i
< j
; i
++) {
894 PyObject
*v
= PyList_GET_ITEM(list
, i
);
895 if (!PyString_Check(v
)) {
898 if (PyObject_AsCharBuffer(v
, &buffer
, &len
)) {
899 PyErr_SetString(PyExc_TypeError
,
906 line
= PyString_FromStringAndSize(buffer
,
911 PyList_SET_ITEM(list
, i
, line
);
915 self
->f_softspace
= 0;
917 /* Since we are releasing the global lock, the
918 following code may *not* execute Python code. */
919 Py_BEGIN_ALLOW_THREADS
920 for (i
= 0; i
< j
; i
++) {
921 line
= PyList_GET_ITEM(list
, i
);
922 len
= PyString_GET_SIZE(line
);
923 BZ2_bzWrite (&bzerror
, self
->fp
,
924 PyString_AS_STRING(line
), len
);
925 if (bzerror
!= BZ_OK
) {
927 Util_CatchBZ2Error(bzerror
);
948 PyDoc_STRVAR(BZ2File_seek__doc__
,
949 "seek(offset [, whence]) -> None\n\
951 Move to new file position. Argument offset is a byte count. Optional\n\
952 argument whence defaults to 0 (offset from start of file, offset\n\
953 should be >= 0); other values are 1 (move relative to current position,\n\
954 positive or negative), and 2 (move relative to end of file, usually\n\
955 negative, although many platforms allow seeking beyond the end of a file).\n\
957 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
958 the operation may be extremely slow.\n\
962 BZ2File_seek(BZ2FileObject
*self
, PyObject
*args
)
966 char small_buffer
[SMALLCHUNK
];
967 char *buffer
= small_buffer
;
968 size_t buffersize
= SMALLCHUNK
;
974 PyObject
*ret
= NULL
;
976 if (!PyArg_ParseTuple(args
, "l|i:seek", &offset
, &where
))
980 Util_DropReadAhead(self
);
981 switch (self
->mode
) {
987 PyErr_SetString(PyExc_ValueError
,
988 "I/O operation on closed file");
992 PyErr_SetString(PyExc_IOError
,
993 "seek works only while reading");
999 offset
= self
->pos
+ offset
;
1001 } else if (where
== 2) {
1002 if (self
->size
== -1) {
1003 assert(self
->mode
!= MODE_READ_EOF
);
1005 Py_BEGIN_ALLOW_THREADS
1006 chunksize
= Util_UnivNewlineRead(
1010 self
->pos
+= chunksize
;
1011 Py_END_ALLOW_THREADS
1013 bytesread
+= chunksize
;
1014 if (bzerror
== BZ_STREAM_END
) {
1016 } else if (bzerror
!= BZ_OK
) {
1017 Util_CatchBZ2Error(bzerror
);
1021 self
->mode
= MODE_READ_EOF
;
1022 self
->size
= self
->pos
;
1025 offset
= self
->size
+ offset
;
1026 if (offset
>= self
->pos
)
1027 offset
-= self
->pos
;
1033 } else if (where
== 0) {
1034 if (offset
>= self
->pos
)
1035 offset
-= self
->pos
;
1041 BZ2_bzReadClose(&bzerror
, self
->fp
);
1042 if (bzerror
!= BZ_OK
) {
1043 Util_CatchBZ2Error(bzerror
);
1046 ret
= PyObject_CallMethod(self
->file
, "seek", "(i)", 0);
1052 self
->fp
= BZ2_bzReadOpen(&bzerror
, PyFile_AsFile(self
->file
),
1054 if (bzerror
!= BZ_OK
) {
1055 Util_CatchBZ2Error(bzerror
);
1058 self
->mode
= MODE_READ
;
1059 } else if (self
->mode
== MODE_READ_EOF
) {
1066 /* Before getting here, offset must be set to the number of bytes
1067 * to walk forward. */
1069 if ((size_t)offset
-bytesread
> buffersize
)
1070 readsize
= buffersize
;
1072 readsize
= offset
-bytesread
;
1073 Py_BEGIN_ALLOW_THREADS
1074 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
1075 buffer
, readsize
, self
);
1076 self
->pos
+= chunksize
;
1077 Py_END_ALLOW_THREADS
1078 bytesread
+= chunksize
;
1079 if (bzerror
== BZ_STREAM_END
) {
1080 self
->size
= self
->pos
;
1081 self
->mode
= MODE_READ_EOF
;
1083 } else if (bzerror
!= BZ_OK
) {
1084 Util_CatchBZ2Error(bzerror
);
1087 if (bytesread
== offset
)
1100 PyDoc_STRVAR(BZ2File_tell__doc__
,
1103 Return the current file position, an integer (may be a long integer).\n\
1107 BZ2File_tell(BZ2FileObject
*self
, PyObject
*args
)
1109 PyObject
*ret
= NULL
;
1111 if (self
->mode
== MODE_CLOSED
) {
1112 PyErr_SetString(PyExc_ValueError
,
1113 "I/O operation on closed file");
1117 ret
= PyInt_FromLong(self
->pos
);
1123 PyDoc_STRVAR(BZ2File_close__doc__
,
1124 "close() -> None or (perhaps) an integer\n\
1126 Close the file. Sets data attribute .closed to true. A closed file\n\
1127 cannot be used for further I/O operations. close() may be called more\n\
1128 than once without error.\n\
1132 BZ2File_close(BZ2FileObject
*self
)
1134 PyObject
*ret
= NULL
;
1135 int bzerror
= BZ_OK
;
1138 switch (self
->mode
) {
1141 BZ2_bzReadClose(&bzerror
, self
->fp
);
1144 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1148 self
->mode
= MODE_CLOSED
;
1149 ret
= PyObject_CallMethod(self
->file
, "close", NULL
);
1150 if (bzerror
!= BZ_OK
) {
1151 Util_CatchBZ2Error(bzerror
);
1160 static PyObject
*BZ2File_getiter(BZ2FileObject
*self
);
1162 static PyMethodDef BZ2File_methods
[] = {
1163 {"read", (PyCFunction
)BZ2File_read
, METH_VARARGS
, BZ2File_read__doc__
},
1164 {"readline", (PyCFunction
)BZ2File_readline
, METH_VARARGS
, BZ2File_readline__doc__
},
1165 {"readlines", (PyCFunction
)BZ2File_readlines
, METH_VARARGS
, BZ2File_readlines__doc__
},
1166 {"xreadlines", (PyCFunction
)BZ2File_getiter
, METH_VARARGS
, BZ2File_xreadlines__doc__
},
1167 {"write", (PyCFunction
)BZ2File_write
, METH_VARARGS
, BZ2File_write__doc__
},
1168 {"writelines", (PyCFunction
)BZ2File_writelines
, METH_O
, BZ2File_writelines__doc__
},
1169 {"seek", (PyCFunction
)BZ2File_seek
, METH_VARARGS
, BZ2File_seek__doc__
},
1170 {"tell", (PyCFunction
)BZ2File_tell
, METH_NOARGS
, BZ2File_tell__doc__
},
1171 {"close", (PyCFunction
)BZ2File_close
, METH_NOARGS
, BZ2File_close__doc__
},
1172 {NULL
, NULL
} /* sentinel */
1176 /* ===================================================================== */
1177 /* Getters and setters of BZ2File. */
1179 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1181 BZ2File_get_newlines(BZ2FileObject
*self
, void *closure
)
1183 switch (self
->f_newlinetypes
) {
1184 case NEWLINE_UNKNOWN
:
1188 return PyString_FromString("\r");
1190 return PyString_FromString("\n");
1191 case NEWLINE_CR
|NEWLINE_LF
:
1192 return Py_BuildValue("(ss)", "\r", "\n");
1194 return PyString_FromString("\r\n");
1195 case NEWLINE_CR
|NEWLINE_CRLF
:
1196 return Py_BuildValue("(ss)", "\r", "\r\n");
1197 case NEWLINE_LF
|NEWLINE_CRLF
:
1198 return Py_BuildValue("(ss)", "\n", "\r\n");
1199 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1200 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1202 PyErr_Format(PyExc_SystemError
,
1203 "Unknown newlines value 0x%x\n",
1204 self
->f_newlinetypes
);
1210 BZ2File_get_closed(BZ2FileObject
*self
, void *closure
)
1212 return PyInt_FromLong(self
->mode
== MODE_CLOSED
);
1216 BZ2File_get_mode(BZ2FileObject
*self
, void *closure
)
1218 return PyObject_GetAttrString(self
->file
, "mode");
1222 BZ2File_get_name(BZ2FileObject
*self
, void *closure
)
1224 return PyObject_GetAttrString(self
->file
, "name");
1227 static PyGetSetDef BZ2File_getset
[] = {
1228 {"closed", (getter
)BZ2File_get_closed
, NULL
,
1229 "True if the file is closed"},
1230 {"newlines", (getter
)BZ2File_get_newlines
, NULL
,
1231 "end-of-line convention used in this file"},
1232 {"mode", (getter
)BZ2File_get_mode
, NULL
,
1233 "file mode ('r', 'w', or 'U')"},
1234 {"name", (getter
)BZ2File_get_name
, NULL
,
1236 {NULL
} /* Sentinel */
1240 /* ===================================================================== */
1241 /* Members of BZ2File_Type. */
1244 #define OFF(x) offsetof(BZ2FileObject, x)
1246 static PyMemberDef BZ2File_members
[] = {
1247 {"softspace", T_INT
, OFF(f_softspace
), 0,
1248 "flag indicating that a space needs to be printed; used by print"},
1249 {NULL
} /* Sentinel */
1252 /* ===================================================================== */
1253 /* Slot definitions for BZ2File_Type. */
1256 BZ2File_init(BZ2FileObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1258 static char *kwlist
[] = {"filename", "mode", "buffering",
1259 "compresslevel", 0};
1263 int compresslevel
= 9;
1269 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "O|sii:BZ2File",
1270 kwlist
, &name
, &mode
, &buffering
,
1274 if (compresslevel
< 1 || compresslevel
> 9) {
1275 PyErr_SetString(PyExc_ValueError
,
1276 "compresslevel must be between 1 and 9");
1294 self
->f_univ_newline
= 1;
1302 PyErr_Format(PyExc_ValueError
,
1303 "invalid mode char %c", *mode
);
1311 mode
= (mode_char
== 'r') ? "rb" : "wb";
1313 self
->file
= PyObject_CallFunction((PyObject
*)&PyFile_Type
, "(Osi)",
1314 name
, mode
, buffering
);
1315 if (self
->file
== NULL
)
1318 /* From now on, we have stuff to dealloc, so jump to error label
1319 * instead of returning */
1322 self
->lock
= PyThread_allocate_lock();
1327 if (mode_char
== 'r')
1328 self
->fp
= BZ2_bzReadOpen(&bzerror
,
1329 PyFile_AsFile(self
->file
),
1332 self
->fp
= BZ2_bzWriteOpen(&bzerror
,
1333 PyFile_AsFile(self
->file
),
1334 compresslevel
, 0, 0);
1336 if (bzerror
!= BZ_OK
) {
1337 Util_CatchBZ2Error(bzerror
);
1341 self
->mode
= (mode_char
== 'r') ? MODE_READ
: MODE_WRITE
;
1346 Py_DECREF(self
->file
);
1349 PyThread_free_lock(self
->lock
);
1355 BZ2File_dealloc(BZ2FileObject
*self
)
1360 PyThread_free_lock(self
->lock
);
1362 switch (self
->mode
) {
1365 BZ2_bzReadClose(&bzerror
, self
->fp
);
1368 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1372 Util_DropReadAhead(self
);
1373 Py_XDECREF(self
->file
);
1374 self
->ob_type
->tp_free((PyObject
*)self
);
1377 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1379 BZ2File_getiter(BZ2FileObject
*self
)
1381 if (self
->mode
== MODE_CLOSED
) {
1382 PyErr_SetString(PyExc_ValueError
,
1383 "I/O operation on closed file");
1386 Py_INCREF((PyObject
*)self
);
1387 return (PyObject
*)self
;
1390 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1391 #define READAHEAD_BUFSIZE 8192
1393 BZ2File_iternext(BZ2FileObject
*self
)
1395 PyStringObject
* ret
;
1397 if (self
->mode
== MODE_CLOSED
) {
1398 PyErr_SetString(PyExc_ValueError
,
1399 "I/O operation on closed file");
1402 ret
= Util_ReadAheadGetLineSkip(self
, 0, READAHEAD_BUFSIZE
);
1404 if (ret
== NULL
|| PyString_GET_SIZE(ret
) == 0) {
1408 return (PyObject
*)ret
;
1411 /* ===================================================================== */
1412 /* BZ2File_Type definition. */
1414 PyDoc_VAR(BZ2File__doc__
) =
1416 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1418 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1419 writing. When opened for writing, the file will be created if it doesn't\n\
1420 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1421 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1422 is given, must be a number between 1 and 9.\n\
1426 Add a 'U' to mode to open the file for input with universal newline\n\
1427 support. Any line ending in the input file will be seen as a '\\n' in\n\
1428 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1429 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1430 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1431 newlines are available only when reading.\n\
1435 static PyTypeObject BZ2File_Type
= {
1436 PyObject_HEAD_INIT(NULL
)
1438 "bz2.BZ2File", /*tp_name*/
1439 sizeof(BZ2FileObject
), /*tp_basicsize*/
1441 (destructor
)BZ2File_dealloc
, /*tp_dealloc*/
1448 0, /*tp_as_sequence*/
1449 0, /*tp_as_mapping*/
1453 PyObject_GenericGetAttr
,/*tp_getattro*/
1454 PyObject_GenericSetAttr
,/*tp_setattro*/
1456 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1457 BZ2File__doc__
, /*tp_doc*/
1460 0, /*tp_richcompare*/
1461 0, /*tp_weaklistoffset*/
1462 (getiterfunc
)BZ2File_getiter
, /*tp_iter*/
1463 (iternextfunc
)BZ2File_iternext
, /*tp_iternext*/
1464 BZ2File_methods
, /*tp_methods*/
1465 BZ2File_members
, /*tp_members*/
1466 BZ2File_getset
, /*tp_getset*/
1471 0, /*tp_dictoffset*/
1472 (initproc
)BZ2File_init
, /*tp_init*/
1473 PyType_GenericAlloc
, /*tp_alloc*/
1474 PyType_GenericNew
, /*tp_new*/
1475 _PyObject_Del
, /*tp_free*/
1480 /* ===================================================================== */
1481 /* Methods of BZ2Comp. */
1483 PyDoc_STRVAR(BZ2Comp_compress__doc__
,
1484 "compress(data) -> string\n\
1486 Provide more data to the compressor object. It will return chunks of\n\
1487 compressed data whenever possible. When you've finished providing data\n\
1488 to compress, call the flush() method to finish the compression process,\n\
1489 and return what is left in the internal buffers.\n\
1493 BZ2Comp_compress(BZ2CompObject
*self
, PyObject
*args
)
1497 int bufsize
= SMALLCHUNK
;
1498 PY_LONG_LONG totalout
;
1499 PyObject
*ret
= NULL
;
1500 bz_stream
*bzs
= &self
->bzs
;
1503 if (!PyArg_ParseTuple(args
, "s#:compress", &data
, &datasize
))
1507 return PyString_FromString("");
1510 if (!self
->running
) {
1511 PyErr_SetString(PyExc_ValueError
,
1512 "this object was already flushed");
1516 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1520 bzs
->next_in
= data
;
1521 bzs
->avail_in
= datasize
;
1522 bzs
->next_out
= BUF(ret
);
1523 bzs
->avail_out
= bufsize
;
1525 totalout
= BZS_TOTAL_OUT(bzs
);
1528 Py_BEGIN_ALLOW_THREADS
1529 bzerror
= BZ2_bzCompress(bzs
, BZ_RUN
);
1530 Py_END_ALLOW_THREADS
1531 if (bzerror
!= BZ_RUN_OK
) {
1532 Util_CatchBZ2Error(bzerror
);
1535 if (bzs
->avail_out
== 0) {
1536 bufsize
= Util_NewBufferSize(bufsize
);
1537 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1538 BZ2_bzCompressEnd(bzs
);
1541 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1543 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1544 } else if (bzs
->avail_in
== 0) {
1549 _PyString_Resize(&ret
, (int)(BZS_TOTAL_OUT(bzs
) - totalout
));
1560 PyDoc_STRVAR(BZ2Comp_flush__doc__
,
1561 "flush() -> string\n\
1563 Finish the compression process and return what is left in internal buffers.\n\
1564 You must not use the compressor object after calling this method.\n\
1568 BZ2Comp_flush(BZ2CompObject
*self
)
1570 int bufsize
= SMALLCHUNK
;
1571 PyObject
*ret
= NULL
;
1572 bz_stream
*bzs
= &self
->bzs
;
1573 PY_LONG_LONG totalout
;
1577 if (!self
->running
) {
1578 PyErr_SetString(PyExc_ValueError
, "object was already "
1584 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1588 bzs
->next_out
= BUF(ret
);
1589 bzs
->avail_out
= bufsize
;
1591 totalout
= BZS_TOTAL_OUT(bzs
);
1594 Py_BEGIN_ALLOW_THREADS
1595 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
1596 Py_END_ALLOW_THREADS
1597 if (bzerror
== BZ_STREAM_END
) {
1599 } else if (bzerror
!= BZ_FINISH_OK
) {
1600 Util_CatchBZ2Error(bzerror
);
1603 if (bzs
->avail_out
== 0) {
1604 bufsize
= Util_NewBufferSize(bufsize
);
1605 if (_PyString_Resize(&ret
, bufsize
) < 0)
1607 bzs
->next_out
= BUF(ret
);
1608 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1610 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1614 if (bzs
->avail_out
!= 0)
1615 _PyString_Resize(&ret
, (int)(BZS_TOTAL_OUT(bzs
) - totalout
));
1626 static PyMethodDef BZ2Comp_methods
[] = {
1627 {"compress", (PyCFunction
)BZ2Comp_compress
, METH_VARARGS
,
1628 BZ2Comp_compress__doc__
},
1629 {"flush", (PyCFunction
)BZ2Comp_flush
, METH_NOARGS
,
1630 BZ2Comp_flush__doc__
},
1631 {NULL
, NULL
} /* sentinel */
1635 /* ===================================================================== */
1636 /* Slot definitions for BZ2Comp_Type. */
1639 BZ2Comp_init(BZ2CompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1641 int compresslevel
= 9;
1643 static char *kwlist
[] = {"compresslevel", 0};
1645 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|i:BZ2Compressor",
1646 kwlist
, &compresslevel
))
1649 if (compresslevel
< 1 || compresslevel
> 9) {
1650 PyErr_SetString(PyExc_ValueError
,
1651 "compresslevel must be between 1 and 9");
1656 self
->lock
= PyThread_allocate_lock();
1661 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1662 bzerror
= BZ2_bzCompressInit(&self
->bzs
, compresslevel
, 0, 0);
1663 if (bzerror
!= BZ_OK
) {
1664 Util_CatchBZ2Error(bzerror
);
1674 PyThread_free_lock(self
->lock
);
1680 BZ2Comp_dealloc(BZ2CompObject
*self
)
1684 PyThread_free_lock(self
->lock
);
1686 BZ2_bzCompressEnd(&self
->bzs
);
1687 self
->ob_type
->tp_free((PyObject
*)self
);
1691 /* ===================================================================== */
1692 /* BZ2Comp_Type definition. */
1694 PyDoc_STRVAR(BZ2Comp__doc__
,
1695 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1697 Create a new compressor object. This object may be used to compress\n\
1698 data sequentially. If you want to compress data in one shot, use the\n\
1699 compress() function instead. The compresslevel parameter, if given,\n\
1700 must be a number between 1 and 9.\n\
1703 static PyTypeObject BZ2Comp_Type
= {
1704 PyObject_HEAD_INIT(NULL
)
1706 "bz2.BZ2Compressor", /*tp_name*/
1707 sizeof(BZ2CompObject
), /*tp_basicsize*/
1709 (destructor
)BZ2Comp_dealloc
, /*tp_dealloc*/
1716 0, /*tp_as_sequence*/
1717 0, /*tp_as_mapping*/
1721 PyObject_GenericGetAttr
,/*tp_getattro*/
1722 PyObject_GenericSetAttr
,/*tp_setattro*/
1724 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1725 BZ2Comp__doc__
, /*tp_doc*/
1728 0, /*tp_richcompare*/
1729 0, /*tp_weaklistoffset*/
1732 BZ2Comp_methods
, /*tp_methods*/
1739 0, /*tp_dictoffset*/
1740 (initproc
)BZ2Comp_init
, /*tp_init*/
1741 PyType_GenericAlloc
, /*tp_alloc*/
1742 PyType_GenericNew
, /*tp_new*/
1743 _PyObject_Del
, /*tp_free*/
1748 /* ===================================================================== */
1749 /* Members of BZ2Decomp. */
1752 #define OFF(x) offsetof(BZ2DecompObject, x)
1754 static PyMemberDef BZ2Decomp_members
[] = {
1755 {"unused_data", T_OBJECT
, OFF(unused_data
), RO
},
1756 {NULL
} /* Sentinel */
1760 /* ===================================================================== */
1761 /* Methods of BZ2Decomp. */
1763 PyDoc_STRVAR(BZ2Decomp_decompress__doc__
,
1764 "decompress(data) -> string\n\
1766 Provide more data to the decompressor object. It will return chunks\n\
1767 of decompressed data whenever possible. If you try to decompress data\n\
1768 after the end of stream is found, EOFError will be raised. If any data\n\
1769 was found after the end of stream, it'll be ignored and saved in\n\
1770 unused_data attribute.\n\
1774 BZ2Decomp_decompress(BZ2DecompObject
*self
, PyObject
*args
)
1778 int bufsize
= SMALLCHUNK
;
1779 PY_LONG_LONG totalout
;
1780 PyObject
*ret
= NULL
;
1781 bz_stream
*bzs
= &self
->bzs
;
1784 if (!PyArg_ParseTuple(args
, "s#:decompress", &data
, &datasize
))
1788 if (!self
->running
) {
1789 PyErr_SetString(PyExc_EOFError
, "end of stream was "
1794 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1798 bzs
->next_in
= data
;
1799 bzs
->avail_in
= datasize
;
1800 bzs
->next_out
= BUF(ret
);
1801 bzs
->avail_out
= bufsize
;
1803 totalout
= BZS_TOTAL_OUT(bzs
);
1806 Py_BEGIN_ALLOW_THREADS
1807 bzerror
= BZ2_bzDecompress(bzs
);
1808 Py_END_ALLOW_THREADS
1809 if (bzerror
== BZ_STREAM_END
) {
1810 if (bzs
->avail_in
!= 0) {
1811 Py_DECREF(self
->unused_data
);
1813 PyString_FromStringAndSize(bzs
->next_in
,
1819 if (bzerror
!= BZ_OK
) {
1820 Util_CatchBZ2Error(bzerror
);
1823 if (bzs
->avail_out
== 0) {
1824 bufsize
= Util_NewBufferSize(bufsize
);
1825 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1826 BZ2_bzDecompressEnd(bzs
);
1829 bzs
->next_out
= BUF(ret
);
1830 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1832 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1833 } else if (bzs
->avail_in
== 0) {
1838 if (bzs
->avail_out
!= 0)
1839 _PyString_Resize(&ret
, (int)(BZS_TOTAL_OUT(bzs
) - totalout
));
1850 static PyMethodDef BZ2Decomp_methods
[] = {
1851 {"decompress", (PyCFunction
)BZ2Decomp_decompress
, METH_VARARGS
, BZ2Decomp_decompress__doc__
},
1852 {NULL
, NULL
} /* sentinel */
1856 /* ===================================================================== */
1857 /* Slot definitions for BZ2Decomp_Type. */
1860 BZ2Decomp_init(BZ2DecompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1864 if (!PyArg_ParseTuple(args
, ":BZ2Decompressor"))
1868 self
->lock
= PyThread_allocate_lock();
1873 self
->unused_data
= PyString_FromString("");
1874 if (!self
->unused_data
)
1877 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1878 bzerror
= BZ2_bzDecompressInit(&self
->bzs
, 0, 0);
1879 if (bzerror
!= BZ_OK
) {
1880 Util_CatchBZ2Error(bzerror
);
1891 PyThread_free_lock(self
->lock
);
1893 Py_XDECREF(self
->unused_data
);
1898 BZ2Decomp_dealloc(BZ2DecompObject
*self
)
1902 PyThread_free_lock(self
->lock
);
1904 Py_XDECREF(self
->unused_data
);
1905 BZ2_bzDecompressEnd(&self
->bzs
);
1906 self
->ob_type
->tp_free((PyObject
*)self
);
1910 /* ===================================================================== */
1911 /* BZ2Decomp_Type definition. */
1913 PyDoc_STRVAR(BZ2Decomp__doc__
,
1914 "BZ2Decompressor() -> decompressor object\n\
1916 Create a new decompressor object. This object may be used to decompress\n\
1917 data sequentially. If you want to decompress data in one shot, use the\n\
1918 decompress() function instead.\n\
1921 static PyTypeObject BZ2Decomp_Type
= {
1922 PyObject_HEAD_INIT(NULL
)
1924 "bz2.BZ2Decompressor", /*tp_name*/
1925 sizeof(BZ2DecompObject
), /*tp_basicsize*/
1927 (destructor
)BZ2Decomp_dealloc
, /*tp_dealloc*/
1934 0, /*tp_as_sequence*/
1935 0, /*tp_as_mapping*/
1939 PyObject_GenericGetAttr
,/*tp_getattro*/
1940 PyObject_GenericSetAttr
,/*tp_setattro*/
1942 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1943 BZ2Decomp__doc__
, /*tp_doc*/
1946 0, /*tp_richcompare*/
1947 0, /*tp_weaklistoffset*/
1950 BZ2Decomp_methods
, /*tp_methods*/
1951 BZ2Decomp_members
, /*tp_members*/
1957 0, /*tp_dictoffset*/
1958 (initproc
)BZ2Decomp_init
, /*tp_init*/
1959 PyType_GenericAlloc
, /*tp_alloc*/
1960 PyType_GenericNew
, /*tp_new*/
1961 _PyObject_Del
, /*tp_free*/
1966 /* ===================================================================== */
1967 /* Module functions. */
1969 PyDoc_STRVAR(bz2_compress__doc__
,
1970 "compress(data [, compresslevel=9]) -> string\n\
1972 Compress data in one shot. If you want to compress data sequentially,\n\
1973 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1974 given, must be a number between 1 and 9.\n\
1978 bz2_compress(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1980 int compresslevel
=9;
1984 PyObject
*ret
= NULL
;
1986 bz_stream
*bzs
= &_bzs
;
1988 static char *kwlist
[] = {"data", "compresslevel", 0};
1990 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "s#|i",
1991 kwlist
, &data
, &datasize
,
1995 if (compresslevel
< 1 || compresslevel
> 9) {
1996 PyErr_SetString(PyExc_ValueError
,
1997 "compresslevel must be between 1 and 9");
2001 /* Conforming to bz2 manual, this is large enough to fit compressed
2002 * data in one shot. We will check it later anyway. */
2003 bufsize
= datasize
+ (datasize
/100+1) + 600;
2005 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2009 memset(bzs
, 0, sizeof(bz_stream
));
2011 bzs
->next_in
= data
;
2012 bzs
->avail_in
= datasize
;
2013 bzs
->next_out
= BUF(ret
);
2014 bzs
->avail_out
= bufsize
;
2016 bzerror
= BZ2_bzCompressInit(bzs
, compresslevel
, 0, 0);
2017 if (bzerror
!= BZ_OK
) {
2018 Util_CatchBZ2Error(bzerror
);
2024 Py_BEGIN_ALLOW_THREADS
2025 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
2026 Py_END_ALLOW_THREADS
2027 if (bzerror
== BZ_STREAM_END
) {
2029 } else if (bzerror
!= BZ_FINISH_OK
) {
2030 BZ2_bzCompressEnd(bzs
);
2031 Util_CatchBZ2Error(bzerror
);
2035 if (bzs
->avail_out
== 0) {
2036 bufsize
= Util_NewBufferSize(bufsize
);
2037 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2038 BZ2_bzCompressEnd(bzs
);
2042 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2043 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2047 if (bzs
->avail_out
!= 0)
2048 _PyString_Resize(&ret
, (int)BZS_TOTAL_OUT(bzs
));
2049 BZ2_bzCompressEnd(bzs
);
2054 PyDoc_STRVAR(bz2_decompress__doc__
,
2055 "decompress(data) -> decompressed data\n\
2057 Decompress data in one shot. If you want to decompress data sequentially,\n\
2058 use an instance of BZ2Decompressor instead.\n\
2062 bz2_decompress(PyObject
*self
, PyObject
*args
)
2066 int bufsize
= SMALLCHUNK
;
2069 bz_stream
*bzs
= &_bzs
;
2072 if (!PyArg_ParseTuple(args
, "s#:decompress", &data
, &datasize
))
2076 return PyString_FromString("");
2078 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2082 memset(bzs
, 0, sizeof(bz_stream
));
2084 bzs
->next_in
= data
;
2085 bzs
->avail_in
= datasize
;
2086 bzs
->next_out
= BUF(ret
);
2087 bzs
->avail_out
= bufsize
;
2089 bzerror
= BZ2_bzDecompressInit(bzs
, 0, 0);
2090 if (bzerror
!= BZ_OK
) {
2091 Util_CatchBZ2Error(bzerror
);
2097 Py_BEGIN_ALLOW_THREADS
2098 bzerror
= BZ2_bzDecompress(bzs
);
2099 Py_END_ALLOW_THREADS
2100 if (bzerror
== BZ_STREAM_END
) {
2102 } else if (bzerror
!= BZ_OK
) {
2103 BZ2_bzDecompressEnd(bzs
);
2104 Util_CatchBZ2Error(bzerror
);
2108 if (bzs
->avail_out
== 0) {
2109 bufsize
= Util_NewBufferSize(bufsize
);
2110 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2111 BZ2_bzDecompressEnd(bzs
);
2115 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2116 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2117 } else if (bzs
->avail_in
== 0) {
2118 BZ2_bzDecompressEnd(bzs
);
2119 PyErr_SetString(PyExc_ValueError
,
2120 "couldn't find end of stream");
2126 if (bzs
->avail_out
!= 0)
2127 _PyString_Resize(&ret
, (int)BZS_TOTAL_OUT(bzs
));
2128 BZ2_bzDecompressEnd(bzs
);
2133 static PyMethodDef bz2_methods
[] = {
2134 {"compress", (PyCFunction
) bz2_compress
, METH_VARARGS
|METH_KEYWORDS
,
2135 bz2_compress__doc__
},
2136 {"decompress", (PyCFunction
) bz2_decompress
, METH_VARARGS
,
2137 bz2_decompress__doc__
},
2138 {NULL
, NULL
} /* sentinel */
2141 /* ===================================================================== */
2142 /* Initialization function. */
2144 PyDoc_STRVAR(bz2__doc__
,
2145 "The python bz2 module provides a comprehensive interface for\n\
2146 the bz2 compression library. It implements a complete file\n\
2147 interface, one shot (de)compression functions, and types for\n\
2148 sequential (de)compression.\n\
2156 BZ2File_Type
.ob_type
= &PyType_Type
;
2157 BZ2Comp_Type
.ob_type
= &PyType_Type
;
2158 BZ2Decomp_Type
.ob_type
= &PyType_Type
;
2160 m
= Py_InitModule3("bz2", bz2_methods
, bz2__doc__
);
2162 PyModule_AddObject(m
, "__author__", PyString_FromString(__author__
));
2164 Py_INCREF(&BZ2File_Type
);
2165 PyModule_AddObject(m
, "BZ2File", (PyObject
*)&BZ2File_Type
);
2167 Py_INCREF(&BZ2Comp_Type
);
2168 PyModule_AddObject(m
, "BZ2Compressor", (PyObject
*)&BZ2Comp_Type
);
2170 Py_INCREF(&BZ2Decomp_Type
);
2171 PyModule_AddObject(m
, "BZ2Decompressor", (PyObject
*)&BZ2Decomp_Type
);