AddressList.__str__(): Get rid of useless, and broken method. Closes
[python/dscho.git] / Modules / bz2module.c
blob83582bd937452daaff8d2afff0121f62d14d0feb
1 /*
3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
8 */
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
27 #define MODE_CLOSED 0
28 #define MODE_READ 1
29 #define MODE_READ_EOF 2
30 #define MODE_WRITE 3
32 #define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
35 #ifdef BZ_CONFIG_ERROR
37 #if SIZEOF_LONG >= 8
38 #define BZS_TOTAL_OUT(bzs) \
39 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40 #elif SIZEOF_LONG_LONG >= 8
41 #define BZS_TOTAL_OUT(bzs) \
42 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
43 #else
44 #define BZS_TOTAL_OUT(bzs) \
45 bzs->total_out_lo32;
46 #endif
48 #else /* ! BZ_CONFIG_ERROR */
50 #define BZ2_bzRead bzRead
51 #define BZ2_bzReadOpen bzReadOpen
52 #define BZ2_bzReadClose bzReadClose
53 #define BZ2_bzWrite bzWrite
54 #define BZ2_bzWriteOpen bzWriteOpen
55 #define BZ2_bzWriteClose bzWriteClose
56 #define BZ2_bzCompress bzCompress
57 #define BZ2_bzCompressInit bzCompressInit
58 #define BZ2_bzCompressEnd bzCompressEnd
59 #define BZ2_bzDecompress bzDecompress
60 #define BZ2_bzDecompressInit bzDecompressInit
61 #define BZ2_bzDecompressEnd bzDecompressEnd
63 #define BZS_TOTAL_OUT(bzs) bzs->total_out
65 #endif /* ! BZ_CONFIG_ERROR */
68 #ifdef WITH_THREAD
69 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
70 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71 #else
72 #define ACQUIRE_LOCK(obj)
73 #define RELEASE_LOCK(obj)
74 #endif
76 #ifdef WITH_UNIVERSAL_NEWLINES
77 /* Bits in f_newlinetypes */
78 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
79 #define NEWLINE_CR 1 /* \r newline seen */
80 #define NEWLINE_LF 2 /* \n newline seen */
81 #define NEWLINE_CRLF 4 /* \r\n newline seen */
82 #endif
84 /* ===================================================================== */
85 /* Structure definitions. */
87 typedef struct {
88 PyObject_HEAD
89 PyObject *file;
91 char* f_buf; /* Allocated readahead buffer */
92 char* f_bufend; /* Points after last occupied position */
93 char* f_bufptr; /* Current buffer position */
95 int f_softspace; /* Flag used by 'print' command */
97 #ifdef WITH_UNIVERSAL_NEWLINES
98 int f_univ_newline; /* Handle any newline convention */
99 int f_newlinetypes; /* Types of newlines seen */
100 int f_skipnextlf; /* Skip next \n */
101 #endif
103 BZFILE *fp;
104 int mode;
105 long pos;
106 long size;
107 #ifdef WITH_THREAD
108 PyThread_type_lock lock;
109 #endif
110 } BZ2FileObject;
112 typedef struct {
113 PyObject_HEAD
114 bz_stream bzs;
115 int running;
116 #ifdef WITH_THREAD
117 PyThread_type_lock lock;
118 #endif
119 } BZ2CompObject;
121 typedef struct {
122 PyObject_HEAD
123 bz_stream bzs;
124 int running;
125 PyObject *unused_data;
126 #ifdef WITH_THREAD
127 PyThread_type_lock lock;
128 #endif
129 } BZ2DecompObject;
131 /* ===================================================================== */
132 /* Utility functions. */
134 static int
135 Util_CatchBZ2Error(int bzerror)
137 int ret = 0;
138 switch(bzerror) {
139 case BZ_OK:
140 case BZ_STREAM_END:
141 break;
143 #ifdef BZ_CONFIG_ERROR
144 case BZ_CONFIG_ERROR:
145 PyErr_SetString(PyExc_SystemError,
146 "the bz2 library was not compiled "
147 "correctly");
148 ret = 1;
149 break;
150 #endif
152 case BZ_PARAM_ERROR:
153 PyErr_SetString(PyExc_ValueError,
154 "the bz2 library has received wrong "
155 "parameters");
156 ret = 1;
157 break;
159 case BZ_MEM_ERROR:
160 PyErr_NoMemory();
161 ret = 1;
162 break;
164 case BZ_DATA_ERROR:
165 case BZ_DATA_ERROR_MAGIC:
166 PyErr_SetString(PyExc_IOError, "invalid data stream");
167 ret = 1;
168 break;
170 case BZ_IO_ERROR:
171 PyErr_SetString(PyExc_IOError, "unknown IO error");
172 ret = 1;
173 break;
175 case BZ_UNEXPECTED_EOF:
176 PyErr_SetString(PyExc_EOFError,
177 "compressed file ended before the "
178 "logical end-of-stream was detected");
179 ret = 1;
180 break;
182 case BZ_SEQUENCE_ERROR:
183 PyErr_SetString(PyExc_RuntimeError,
184 "wrong sequence of bz2 library "
185 "commands used");
186 ret = 1;
187 break;
189 return ret;
192 #if BUFSIZ < 8192
193 #define SMALLCHUNK 8192
194 #else
195 #define SMALLCHUNK BUFSIZ
196 #endif
198 #if SIZEOF_INT < 4
199 #define BIGCHUNK (512 * 32)
200 #else
201 #define BIGCHUNK (512 * 1024)
202 #endif
204 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
205 static size_t
206 Util_NewBufferSize(size_t currentsize)
208 if (currentsize > SMALLCHUNK) {
209 /* Keep doubling until we reach BIGCHUNK;
210 then keep adding BIGCHUNK. */
211 if (currentsize <= BIGCHUNK)
212 return currentsize + currentsize;
213 else
214 return currentsize + BIGCHUNK;
216 return currentsize + SMALLCHUNK;
219 /* This is a hacked version of Python's fileobject.c:get_line(). */
220 static PyObject *
221 Util_GetLine(BZ2FileObject *f, int n)
223 char c;
224 char *buf, *end;
225 size_t total_v_size; /* total # of slots in buffer */
226 size_t used_v_size; /* # used slots in buffer */
227 size_t increment; /* amount to increment the buffer */
228 PyObject *v;
229 int bzerror;
230 #ifdef WITH_UNIVERSAL_NEWLINES
231 int newlinetypes = f->f_newlinetypes;
232 int skipnextlf = f->f_skipnextlf;
233 int univ_newline = f->f_univ_newline;
234 #endif
236 total_v_size = n > 0 ? n : 100;
237 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
238 if (v == NULL)
239 return NULL;
241 buf = BUF(v);
242 end = buf + total_v_size;
244 for (;;) {
245 Py_BEGIN_ALLOW_THREADS
246 #ifdef WITH_UNIVERSAL_NEWLINES
247 if (univ_newline) {
248 while (1) {
249 BZ2_bzRead(&bzerror, f->fp, &c, 1);
250 f->pos++;
251 if (bzerror != BZ_OK || buf == end)
252 break;
253 if (skipnextlf) {
254 skipnextlf = 0;
255 if (c == '\n') {
256 /* Seeing a \n here with
257 * skipnextlf true means we
258 * saw a \r before.
260 newlinetypes |= NEWLINE_CRLF;
261 BZ2_bzRead(&bzerror, f->fp,
262 &c, 1);
263 if (bzerror != BZ_OK)
264 break;
265 } else {
266 newlinetypes |= NEWLINE_CR;
269 if (c == '\r') {
270 skipnextlf = 1;
271 c = '\n';
272 } else if ( c == '\n')
273 newlinetypes |= NEWLINE_LF;
274 *buf++ = c;
275 if (c == '\n') break;
277 if (bzerror == BZ_STREAM_END && skipnextlf)
278 newlinetypes |= NEWLINE_CR;
279 } else /* If not universal newlines use the normal loop */
280 #endif
281 do {
282 BZ2_bzRead(&bzerror, f->fp, &c, 1);
283 f->pos++;
284 *buf++ = c;
285 } while (bzerror == BZ_OK && c != '\n' && buf != end);
286 Py_END_ALLOW_THREADS
287 #ifdef WITH_UNIVERSAL_NEWLINES
288 f->f_newlinetypes = newlinetypes;
289 f->f_skipnextlf = skipnextlf;
290 #endif
291 if (bzerror == BZ_STREAM_END) {
292 f->size = f->pos;
293 f->mode = MODE_READ_EOF;
294 break;
295 } else if (bzerror != BZ_OK) {
296 Util_CatchBZ2Error(bzerror);
297 Py_DECREF(v);
298 return NULL;
300 if (c == '\n')
301 break;
302 /* Must be because buf == end */
303 if (n > 0)
304 break;
305 used_v_size = total_v_size;
306 increment = total_v_size >> 2; /* mild exponential growth */
307 total_v_size += increment;
308 if (total_v_size > INT_MAX) {
309 PyErr_SetString(PyExc_OverflowError,
310 "line is longer than a Python string can hold");
311 Py_DECREF(v);
312 return NULL;
314 if (_PyString_Resize(&v, total_v_size) < 0)
315 return NULL;
316 buf = BUF(v) + used_v_size;
317 end = BUF(v) + total_v_size;
320 used_v_size = buf - BUF(v);
321 if (used_v_size != total_v_size)
322 _PyString_Resize(&v, used_v_size);
323 return v;
326 #ifndef WITH_UNIVERSAL_NEWLINES
327 #define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
328 #else
329 /* This is a hacked version of Python's
330 * fileobject.c:Py_UniversalNewlineFread(). */
331 size_t
332 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
333 char* buf, size_t n, BZ2FileObject *f)
335 char *dst = buf;
336 int newlinetypes, skipnextlf;
338 assert(buf != NULL);
339 assert(stream != NULL);
341 if (!f->f_univ_newline)
342 return BZ2_bzRead(bzerror, stream, buf, n);
344 newlinetypes = f->f_newlinetypes;
345 skipnextlf = f->f_skipnextlf;
347 /* Invariant: n is the number of bytes remaining to be filled
348 * in the buffer.
350 while (n) {
351 size_t nread;
352 int shortread;
353 char *src = dst;
355 nread = BZ2_bzRead(bzerror, stream, dst, n);
356 assert(nread <= n);
357 n -= nread; /* assuming 1 byte out for each in; will adjust */
358 shortread = n != 0; /* true iff EOF or error */
359 while (nread--) {
360 char c = *src++;
361 if (c == '\r') {
362 /* Save as LF and set flag to skip next LF. */
363 *dst++ = '\n';
364 skipnextlf = 1;
366 else if (skipnextlf && c == '\n') {
367 /* Skip LF, and remember we saw CR LF. */
368 skipnextlf = 0;
369 newlinetypes |= NEWLINE_CRLF;
370 ++n;
372 else {
373 /* Normal char to be stored in buffer. Also
374 * update the newlinetypes flag if either this
375 * is an LF or the previous char was a CR.
377 if (c == '\n')
378 newlinetypes |= NEWLINE_LF;
379 else if (skipnextlf)
380 newlinetypes |= NEWLINE_CR;
381 *dst++ = c;
382 skipnextlf = 0;
385 if (shortread) {
386 /* If this is EOF, update type flags. */
387 if (skipnextlf && *bzerror == BZ_STREAM_END)
388 newlinetypes |= NEWLINE_CR;
389 break;
392 f->f_newlinetypes = newlinetypes;
393 f->f_skipnextlf = skipnextlf;
394 return dst - buf;
396 #endif
398 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
399 static void
400 Util_DropReadAhead(BZ2FileObject *f)
402 if (f->f_buf != NULL) {
403 PyMem_Free(f->f_buf);
404 f->f_buf = NULL;
408 /* This is a hacked version of Python's fileobject.c:readahead(). */
409 static int
410 Util_ReadAhead(BZ2FileObject *f, int bufsize)
412 int chunksize;
413 int bzerror;
415 if (f->f_buf != NULL) {
416 if((f->f_bufend - f->f_bufptr) >= 1)
417 return 0;
418 else
419 Util_DropReadAhead(f);
421 if (f->mode == MODE_READ_EOF) {
422 return -1;
424 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
425 return -1;
427 Py_BEGIN_ALLOW_THREADS
428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
430 Py_END_ALLOW_THREADS
431 f->pos += chunksize;
432 if (bzerror == BZ_STREAM_END) {
433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
437 Util_DropReadAhead(f);
438 return -1;
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
445 /* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447 static PyStringObject *
448 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
455 if (f->f_buf == NULL)
456 if (Util_ReadAhead(f, bufsize) < 0)
457 return NULL;
459 len = f->f_bufend - f->f_bufptr;
460 if (len == 0)
461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
469 if (s == NULL)
470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
474 Util_DropReadAhead(f);
475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
488 return s;
491 /* ===================================================================== */
492 /* Methods of BZ2File. */
494 PyDoc_STRVAR(BZ2File_read__doc__,
495 "read([size]) -> string\n\
497 Read at most size uncompressed bytes, returned as a string. If the size\n\
498 argument is negative or omitted, read until EOF is reached.\n\
501 /* This is a hacked version of Python's fileobject.c:file_read(). */
502 static PyObject *
503 BZ2File_read(BZ2FileObject *self, PyObject *args)
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
536 "requested number of bytes is "
537 "more than a Python string can hold");
538 goto cleanup;
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
575 cleanup:
576 RELEASE_LOCK(self);
577 return ret;
580 PyDoc_STRVAR(BZ2File_readline__doc__,
581 "readline([size]) -> string\n\
583 Return the next line from the file, as a string, retaining newline.\n\
584 A non-negative size argument will limit the maximum number of bytes to\n\
585 return (an incomplete line may be returned then). Return an empty\n\
586 string at EOF.\n\
589 static PyObject *
590 BZ2File_readline(BZ2FileObject *self, PyObject *args)
592 PyObject *ret = NULL;
593 int sizehint = -1;
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
620 cleanup:
621 RELEASE_LOCK(self);
622 return ret;
625 PyDoc_STRVAR(BZ2File_readlines__doc__,
626 "readlines([size]) -> list\n\
628 Call readline() repeatedly and return a list of lines read.\n\
629 The optional size argument, if given, is an approximate bound on the\n\
630 total number of bytes in the lines returned.\n\
633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
634 static PyObject *
635 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
699 if (p == NULL) {
700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
705 "line is longer than a Python string can hold");
706 goto error;
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
722 continue;
724 end = buffer+nfilled+nread;
725 q = buffer;
726 do {
727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
738 } while (p != NULL);
739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
778 return list;
781 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782 "xreadlines() -> self\n\
784 For backward compatibility. BZ2File objects now include the performance\n\
785 optimizations previously implemented in the xreadlines module.\n\
788 PyDoc_STRVAR(BZ2File_write__doc__,
789 "write(data) -> None\n\
791 Write the 'data' string to file. Note that due to buffering, close() may\n\
792 be needed before the file on disk reflects the data written.\n\
795 /* This is a hacked version of Python's fileobject.c:file_write(). */
796 static PyObject *
797 BZ2File_write(BZ2FileObject *self, PyObject *args)
799 PyObject *ret = NULL;
800 char *buf;
801 int len;
802 int bzerror;
804 if (!PyArg_ParseTuple(args, "s#", &buf, &len))
805 return NULL;
807 ACQUIRE_LOCK(self);
808 switch (self->mode) {
809 case MODE_WRITE:
810 break;
812 case MODE_CLOSED:
813 PyErr_SetString(PyExc_ValueError,
814 "I/O operation on closed file");
815 goto cleanup;;
817 default:
818 PyErr_SetString(PyExc_IOError,
819 "file is not ready for writing");
820 goto cleanup;;
823 self->f_softspace = 0;
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror, self->fp, buf, len);
827 self->pos += len;
828 Py_END_ALLOW_THREADS
830 if (bzerror != BZ_OK) {
831 Util_CatchBZ2Error(bzerror);
832 goto cleanup;
835 Py_INCREF(Py_None);
836 ret = Py_None;
838 cleanup:
839 RELEASE_LOCK(self);
840 return ret;
843 PyDoc_STRVAR(BZ2File_writelines__doc__,
844 "writelines(sequence_of_strings) -> None\n\
846 Write the sequence of strings to the file. Note that newlines are not\n\
847 added. The sequence can be any iterable object producing strings. This is\n\
848 equivalent to calling write() for each string.\n\
851 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
852 static PyObject *
853 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
855 #define CHUNKSIZE 1000
856 PyObject *list = NULL;
857 PyObject *iter = NULL;
858 PyObject *ret = NULL;
859 PyObject *line;
860 int i, j, index, len, islist;
861 int bzerror;
863 ACQUIRE_LOCK(self);
864 islist = PyList_Check(seq);
865 if (!islist) {
866 iter = PyObject_GetIter(seq);
867 if (iter == NULL) {
868 PyErr_SetString(PyExc_TypeError,
869 "writelines() requires an iterable argument");
870 goto error;
872 list = PyList_New(CHUNKSIZE);
873 if (list == NULL)
874 goto error;
877 /* Strategy: slurp CHUNKSIZE lines into a private list,
878 checking that they are all strings, then write that list
879 without holding the interpreter lock, then come back for more. */
880 for (index = 0; ; index += CHUNKSIZE) {
881 if (islist) {
882 Py_XDECREF(list);
883 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
884 if (list == NULL)
885 goto error;
886 j = PyList_GET_SIZE(list);
888 else {
889 for (j = 0; j < CHUNKSIZE; j++) {
890 line = PyIter_Next(iter);
891 if (line == NULL) {
892 if (PyErr_Occurred())
893 goto error;
894 break;
896 PyList_SetItem(list, j, line);
899 if (j == 0)
900 break;
902 /* Check that all entries are indeed strings. If not,
903 apply the same rules as for file.write() and
904 convert the rets to strings. This is slow, but
905 seems to be the only way since all conversion APIs
906 could potentially execute Python code. */
907 for (i = 0; i < j; i++) {
908 PyObject *v = PyList_GET_ITEM(list, i);
909 if (!PyString_Check(v)) {
910 const char *buffer;
911 int len;
912 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
913 PyErr_SetString(PyExc_TypeError,
914 "writelines() "
915 "argument must be "
916 "a sequence of "
917 "strings");
918 goto error;
920 line = PyString_FromStringAndSize(buffer,
921 len);
922 if (line == NULL)
923 goto error;
924 Py_DECREF(v);
925 PyList_SET_ITEM(list, i, line);
929 self->f_softspace = 0;
931 /* Since we are releasing the global lock, the
932 following code may *not* execute Python code. */
933 Py_BEGIN_ALLOW_THREADS
934 for (i = 0; i < j; i++) {
935 line = PyList_GET_ITEM(list, i);
936 len = PyString_GET_SIZE(line);
937 BZ2_bzWrite (&bzerror, self->fp,
938 PyString_AS_STRING(line), len);
939 if (bzerror != BZ_OK) {
940 Py_BLOCK_THREADS
941 Util_CatchBZ2Error(bzerror);
942 goto error;
945 Py_END_ALLOW_THREADS
947 if (j < CHUNKSIZE)
948 break;
951 Py_INCREF(Py_None);
952 ret = Py_None;
954 error:
955 RELEASE_LOCK(self);
956 Py_XDECREF(list);
957 Py_XDECREF(iter);
958 return ret;
959 #undef CHUNKSIZE
962 PyDoc_STRVAR(BZ2File_seek__doc__,
963 "seek(offset [, whence]) -> None\n\
965 Move to new file position. Argument offset is a byte count. Optional\n\
966 argument whence defaults to 0 (offset from start of file, offset\n\
967 should be >= 0); other values are 1 (move relative to current position,\n\
968 positive or negative), and 2 (move relative to end of file, usually\n\
969 negative, although many platforms allow seeking beyond the end of a file).\n\
971 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
972 the operation may be extremely slow.\n\
975 static PyObject *
976 BZ2File_seek(BZ2FileObject *self, PyObject *args)
978 int where = 0;
979 long offset;
980 char small_buffer[SMALLCHUNK];
981 char *buffer = small_buffer;
982 size_t buffersize = SMALLCHUNK;
983 int bytesread = 0;
984 int readsize;
985 int chunksize;
986 int bzerror;
987 int rewind = 0;
988 PyObject *ret = NULL;
990 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
991 return NULL;
993 ACQUIRE_LOCK(self);
994 Util_DropReadAhead(self);
995 switch (self->mode) {
996 case MODE_READ:
997 case MODE_READ_EOF:
998 break;
1000 case MODE_CLOSED:
1001 PyErr_SetString(PyExc_ValueError,
1002 "I/O operation on closed file");
1003 goto cleanup;;
1005 default:
1006 PyErr_SetString(PyExc_IOError,
1007 "seek works only while reading");
1008 goto cleanup;;
1011 if (offset < 0) {
1012 if (where == 1) {
1013 offset = self->pos + offset;
1014 rewind = 1;
1015 } else if (where == 2) {
1016 if (self->size == -1) {
1017 assert(self->mode != MODE_READ_EOF);
1018 for (;;) {
1019 Py_BEGIN_ALLOW_THREADS
1020 chunksize = Util_UnivNewlineRead(
1021 &bzerror, self->fp,
1022 buffer, buffersize,
1023 self);
1024 self->pos += chunksize;
1025 Py_END_ALLOW_THREADS
1027 bytesread += chunksize;
1028 if (bzerror == BZ_STREAM_END) {
1029 break;
1030 } else if (bzerror != BZ_OK) {
1031 Util_CatchBZ2Error(bzerror);
1032 goto cleanup;
1035 self->mode = MODE_READ_EOF;
1036 self->size = self->pos;
1037 bytesread = 0;
1039 offset = self->size + offset;
1040 if (offset >= self->pos)
1041 offset -= self->pos;
1042 else
1043 rewind = 1;
1045 if (offset < 0)
1046 offset = 0;
1047 } else if (where == 0) {
1048 if (offset >= self->pos)
1049 offset -= self->pos;
1050 else
1051 rewind = 1;
1054 if (rewind) {
1055 BZ2_bzReadClose(&bzerror, self->fp);
1056 if (bzerror != BZ_OK) {
1057 Util_CatchBZ2Error(bzerror);
1058 goto cleanup;
1060 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1061 if (!ret)
1062 goto cleanup;
1063 Py_DECREF(ret);
1064 ret = NULL;
1065 self->pos = 0;
1066 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1067 0, 0, NULL, 0);
1068 if (bzerror != BZ_OK) {
1069 Util_CatchBZ2Error(bzerror);
1070 goto cleanup;
1072 self->mode = MODE_READ;
1073 } else if (self->mode == MODE_READ_EOF) {
1074 goto exit;
1077 if (offset == 0)
1078 goto exit;
1080 /* Before getting here, offset must be set to the number of bytes
1081 * to walk forward. */
1082 for (;;) {
1083 if ((size_t)offset-bytesread > buffersize)
1084 readsize = buffersize;
1085 else
1086 readsize = offset-bytesread;
1087 Py_BEGIN_ALLOW_THREADS
1088 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1089 buffer, readsize, self);
1090 self->pos += chunksize;
1091 Py_END_ALLOW_THREADS
1092 bytesread += chunksize;
1093 if (bzerror == BZ_STREAM_END) {
1094 self->size = self->pos;
1095 self->mode = MODE_READ_EOF;
1096 break;
1097 } else if (bzerror != BZ_OK) {
1098 Util_CatchBZ2Error(bzerror);
1099 goto cleanup;
1101 if (bytesread == offset)
1102 break;
1105 exit:
1106 Py_INCREF(Py_None);
1107 ret = Py_None;
1109 cleanup:
1110 RELEASE_LOCK(self);
1111 return ret;
1114 PyDoc_STRVAR(BZ2File_tell__doc__,
1115 "tell() -> int\n\
1117 Return the current file position, an integer (may be a long integer).\n\
1120 static PyObject *
1121 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1123 PyObject *ret = NULL;
1125 if (self->mode == MODE_CLOSED) {
1126 PyErr_SetString(PyExc_ValueError,
1127 "I/O operation on closed file");
1128 goto cleanup;
1131 ret = PyInt_FromLong(self->pos);
1133 cleanup:
1134 return ret;
1137 PyDoc_STRVAR(BZ2File_close__doc__,
1138 "close() -> None or (perhaps) an integer\n\
1140 Close the file. Sets data attribute .closed to true. A closed file\n\
1141 cannot be used for further I/O operations. close() may be called more\n\
1142 than once without error.\n\
1145 static PyObject *
1146 BZ2File_close(BZ2FileObject *self)
1148 PyObject *ret = NULL;
1149 int bzerror = BZ_OK;
1151 ACQUIRE_LOCK(self);
1152 switch (self->mode) {
1153 case MODE_READ:
1154 case MODE_READ_EOF:
1155 BZ2_bzReadClose(&bzerror, self->fp);
1156 break;
1157 case MODE_WRITE:
1158 BZ2_bzWriteClose(&bzerror, self->fp,
1159 0, NULL, NULL);
1160 break;
1162 self->mode = MODE_CLOSED;
1163 ret = PyObject_CallMethod(self->file, "close", NULL);
1164 if (bzerror != BZ_OK) {
1165 Util_CatchBZ2Error(bzerror);
1166 Py_XDECREF(ret);
1167 ret = NULL;
1170 RELEASE_LOCK(self);
1171 return ret;
1174 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1176 static PyMethodDef BZ2File_methods[] = {
1177 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1178 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1179 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1180 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1181 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1182 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1183 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1184 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1185 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1186 {NULL, NULL} /* sentinel */
1190 /* ===================================================================== */
1191 /* Getters and setters of BZ2File. */
1193 #ifdef WITH_UNIVERSAL_NEWLINES
1194 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1195 static PyObject *
1196 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1198 switch (self->f_newlinetypes) {
1199 case NEWLINE_UNKNOWN:
1200 Py_INCREF(Py_None);
1201 return Py_None;
1202 case NEWLINE_CR:
1203 return PyString_FromString("\r");
1204 case NEWLINE_LF:
1205 return PyString_FromString("\n");
1206 case NEWLINE_CR|NEWLINE_LF:
1207 return Py_BuildValue("(ss)", "\r", "\n");
1208 case NEWLINE_CRLF:
1209 return PyString_FromString("\r\n");
1210 case NEWLINE_CR|NEWLINE_CRLF:
1211 return Py_BuildValue("(ss)", "\r", "\r\n");
1212 case NEWLINE_LF|NEWLINE_CRLF:
1213 return Py_BuildValue("(ss)", "\n", "\r\n");
1214 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1215 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1216 default:
1217 PyErr_Format(PyExc_SystemError,
1218 "Unknown newlines value 0x%x\n",
1219 self->f_newlinetypes);
1220 return NULL;
1223 #endif
1225 static PyObject *
1226 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1228 return PyInt_FromLong(self->mode == MODE_CLOSED);
1231 static PyObject *
1232 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1234 return PyObject_GetAttrString(self->file, "mode");
1237 static PyObject *
1238 BZ2File_get_name(BZ2FileObject *self, void *closure)
1240 return PyObject_GetAttrString(self->file, "name");
1243 static PyGetSetDef BZ2File_getset[] = {
1244 {"closed", (getter)BZ2File_get_closed, NULL,
1245 "True if the file is closed"},
1246 #ifdef WITH_UNIVERSAL_NEWLINES
1247 {"newlines", (getter)BZ2File_get_newlines, NULL,
1248 "end-of-line convention used in this file"},
1249 #endif
1250 {"mode", (getter)BZ2File_get_mode, NULL,
1251 "file mode ('r', 'w', or 'U')"},
1252 {"name", (getter)BZ2File_get_name, NULL,
1253 "file name"},
1254 {NULL} /* Sentinel */
1258 /* ===================================================================== */
1259 /* Members of BZ2File_Type. */
1261 #undef OFF
1262 #define OFF(x) offsetof(BZ2FileObject, x)
1264 static PyMemberDef BZ2File_members[] = {
1265 {"softspace", T_INT, OFF(f_softspace), 0,
1266 "flag indicating that a space needs to be printed; used by print"},
1267 {NULL} /* Sentinel */
1270 /* ===================================================================== */
1271 /* Slot definitions for BZ2File_Type. */
1273 static int
1274 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1276 static char *kwlist[] = {"filename", "mode", "buffering",
1277 "compresslevel", 0};
1278 PyObject *name;
1279 char *mode = "r";
1280 int buffering = -1;
1281 int compresslevel = 9;
1282 int bzerror;
1283 int mode_char = 0;
1285 self->size = -1;
1287 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1288 kwlist, &name, &mode, &buffering,
1289 &compresslevel))
1290 return -1;
1292 if (compresslevel < 1 || compresslevel > 9) {
1293 PyErr_SetString(PyExc_ValueError,
1294 "compresslevel must be between 1 and 9");
1295 return -1;
1298 for (;;) {
1299 int error = 0;
1300 switch (*mode) {
1301 case 'r':
1302 case 'w':
1303 if (mode_char)
1304 error = 1;
1305 mode_char = *mode;
1306 break;
1308 case 'b':
1309 break;
1311 case 'U':
1312 #ifdef WITH_UNIVERSAL_NEWLINES
1313 self->f_univ_newline = 1;
1314 #endif
1315 break;
1317 default:
1318 error = 1;
1319 break;
1321 if (error) {
1322 PyErr_Format(PyExc_ValueError,
1323 "invalid mode char %c", *mode);
1324 return -1;
1326 mode++;
1327 if (*mode == '\0')
1328 break;
1331 mode = (mode_char == 'r') ? "rb" : "wb";
1333 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1334 name, mode, buffering);
1335 if (self->file == NULL)
1336 return -1;
1338 /* From now on, we have stuff to dealloc, so jump to error label
1339 * instead of returning */
1341 #ifdef WITH_THREAD
1342 self->lock = PyThread_allocate_lock();
1343 if (!self->lock)
1344 goto error;
1345 #endif
1347 if (mode_char == 'r')
1348 self->fp = BZ2_bzReadOpen(&bzerror,
1349 PyFile_AsFile(self->file),
1350 0, 0, NULL, 0);
1351 else
1352 self->fp = BZ2_bzWriteOpen(&bzerror,
1353 PyFile_AsFile(self->file),
1354 compresslevel, 0, 0);
1356 if (bzerror != BZ_OK) {
1357 Util_CatchBZ2Error(bzerror);
1358 goto error;
1361 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1363 return 0;
1365 error:
1366 Py_DECREF(self->file);
1367 #ifdef WITH_THREAD
1368 if (self->lock)
1369 PyThread_free_lock(self->lock);
1370 #endif
1371 return -1;
1374 static void
1375 BZ2File_dealloc(BZ2FileObject *self)
1377 int bzerror;
1378 #ifdef WITH_THREAD
1379 if (self->lock)
1380 PyThread_free_lock(self->lock);
1381 #endif
1382 switch (self->mode) {
1383 case MODE_READ:
1384 case MODE_READ_EOF:
1385 BZ2_bzReadClose(&bzerror, self->fp);
1386 break;
1387 case MODE_WRITE:
1388 BZ2_bzWriteClose(&bzerror, self->fp,
1389 0, NULL, NULL);
1390 break;
1392 Util_DropReadAhead(self);
1393 Py_XDECREF(self->file);
1394 self->ob_type->tp_free((PyObject *)self);
1397 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1398 static PyObject *
1399 BZ2File_getiter(BZ2FileObject *self)
1401 if (self->mode == MODE_CLOSED) {
1402 PyErr_SetString(PyExc_ValueError,
1403 "I/O operation on closed file");
1404 return NULL;
1406 Py_INCREF((PyObject*)self);
1407 return (PyObject *)self;
1410 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1411 #define READAHEAD_BUFSIZE 8192
1412 static PyObject *
1413 BZ2File_iternext(BZ2FileObject *self)
1415 PyStringObject* ret;
1416 ACQUIRE_LOCK(self);
1417 if (self->mode == MODE_CLOSED) {
1418 PyErr_SetString(PyExc_ValueError,
1419 "I/O operation on closed file");
1420 return NULL;
1422 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1423 RELEASE_LOCK(self);
1424 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1425 Py_XDECREF(ret);
1426 return NULL;
1428 return (PyObject *)ret;
1431 /* ===================================================================== */
1432 /* BZ2File_Type definition. */
1434 PyDoc_VAR(BZ2File__doc__) =
1435 PyDoc_STR(
1436 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1438 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1439 writing. When opened for writing, the file will be created if it doesn't\n\
1440 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1441 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1442 is given, must be a number between 1 and 9.\n\
1444 #ifdef WITH_UNIVERSAL_NEWLINES
1445 PyDoc_STR(
1446 "\n\
1447 Add a 'U' to mode to open the file for input with universal newline\n\
1448 support. Any line ending in the input file will be seen as a '\\n' in\n\
1449 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1450 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1451 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1452 newlines are available only when reading.\n\
1454 #endif
1457 static PyTypeObject BZ2File_Type = {
1458 PyObject_HEAD_INIT(NULL)
1459 0, /*ob_size*/
1460 "bz2.BZ2File", /*tp_name*/
1461 sizeof(BZ2FileObject), /*tp_basicsize*/
1462 0, /*tp_itemsize*/
1463 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1464 0, /*tp_print*/
1465 0, /*tp_getattr*/
1466 0, /*tp_setattr*/
1467 0, /*tp_compare*/
1468 0, /*tp_repr*/
1469 0, /*tp_as_number*/
1470 0, /*tp_as_sequence*/
1471 0, /*tp_as_mapping*/
1472 0, /*tp_hash*/
1473 0, /*tp_call*/
1474 0, /*tp_str*/
1475 PyObject_GenericGetAttr,/*tp_getattro*/
1476 PyObject_GenericSetAttr,/*tp_setattro*/
1477 0, /*tp_as_buffer*/
1478 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1479 BZ2File__doc__, /*tp_doc*/
1480 0, /*tp_traverse*/
1481 0, /*tp_clear*/
1482 0, /*tp_richcompare*/
1483 0, /*tp_weaklistoffset*/
1484 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1485 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1486 BZ2File_methods, /*tp_methods*/
1487 BZ2File_members, /*tp_members*/
1488 BZ2File_getset, /*tp_getset*/
1489 0, /*tp_base*/
1490 0, /*tp_dict*/
1491 0, /*tp_descr_get*/
1492 0, /*tp_descr_set*/
1493 0, /*tp_dictoffset*/
1494 (initproc)BZ2File_init, /*tp_init*/
1495 PyType_GenericAlloc, /*tp_alloc*/
1496 PyType_GenericNew, /*tp_new*/
1497 _PyObject_Del, /*tp_free*/
1498 0, /*tp_is_gc*/
1502 /* ===================================================================== */
1503 /* Methods of BZ2Comp. */
1505 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1506 "compress(data) -> string\n\
1508 Provide more data to the compressor object. It will return chunks of\n\
1509 compressed data whenever possible. When you've finished providing data\n\
1510 to compress, call the flush() method to finish the compression process,\n\
1511 and return what is left in the internal buffers.\n\
1514 static PyObject *
1515 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1517 char *data;
1518 int datasize;
1519 int bufsize = SMALLCHUNK;
1520 PY_LONG_LONG totalout;
1521 PyObject *ret = NULL;
1522 bz_stream *bzs = &self->bzs;
1523 int bzerror;
1525 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1526 return NULL;
1528 ACQUIRE_LOCK(self);
1529 if (!self->running) {
1530 PyErr_SetString(PyExc_ValueError,
1531 "this object was already flushed");
1532 goto error;
1535 ret = PyString_FromStringAndSize(NULL, bufsize);
1536 if (!ret)
1537 goto error;
1539 bzs->next_in = data;
1540 bzs->avail_in = datasize;
1541 bzs->next_out = BUF(ret);
1542 bzs->avail_out = bufsize;
1544 totalout = BZS_TOTAL_OUT(bzs);
1546 for (;;) {
1547 Py_BEGIN_ALLOW_THREADS
1548 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1549 Py_END_ALLOW_THREADS
1550 if (bzerror != BZ_RUN_OK) {
1551 Util_CatchBZ2Error(bzerror);
1552 goto error;
1554 if (bzs->avail_out == 0) {
1555 bufsize = Util_NewBufferSize(bufsize);
1556 if (_PyString_Resize(&ret, bufsize) < 0) {
1557 BZ2_bzCompressEnd(bzs);
1558 goto error;
1560 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1561 - totalout);
1562 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1563 } else if (bzs->avail_in == 0) {
1564 break;
1568 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1570 RELEASE_LOCK(self);
1571 return ret;
1573 error:
1574 RELEASE_LOCK(self);
1575 Py_XDECREF(ret);
1576 return NULL;
1579 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1580 "flush() -> string\n\
1582 Finish the compression process and return what is left in internal buffers.\n\
1583 You must not use the compressor object after calling this method.\n\
1586 static PyObject *
1587 BZ2Comp_flush(BZ2CompObject *self)
1589 int bufsize = SMALLCHUNK;
1590 PyObject *ret = NULL;
1591 bz_stream *bzs = &self->bzs;
1592 PY_LONG_LONG totalout;
1593 int bzerror;
1595 ACQUIRE_LOCK(self);
1596 if (!self->running) {
1597 PyErr_SetString(PyExc_ValueError, "object was already "
1598 "flushed");
1599 goto error;
1601 self->running = 0;
1603 ret = PyString_FromStringAndSize(NULL, bufsize);
1604 if (!ret)
1605 goto error;
1607 bzs->next_out = BUF(ret);
1608 bzs->avail_out = bufsize;
1610 totalout = BZS_TOTAL_OUT(bzs);
1612 for (;;) {
1613 Py_BEGIN_ALLOW_THREADS
1614 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1615 Py_END_ALLOW_THREADS
1616 if (bzerror == BZ_STREAM_END) {
1617 break;
1618 } else if (bzerror != BZ_FINISH_OK) {
1619 Util_CatchBZ2Error(bzerror);
1620 goto error;
1622 if (bzs->avail_out == 0) {
1623 bufsize = Util_NewBufferSize(bufsize);
1624 if (_PyString_Resize(&ret, bufsize) < 0)
1625 goto error;
1626 bzs->next_out = BUF(ret);
1627 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1628 - totalout);
1629 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1633 if (bzs->avail_out != 0)
1634 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1636 RELEASE_LOCK(self);
1637 return ret;
1639 error:
1640 RELEASE_LOCK(self);
1641 Py_XDECREF(ret);
1642 return NULL;
1645 static PyMethodDef BZ2Comp_methods[] = {
1646 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1647 BZ2Comp_compress__doc__},
1648 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1649 BZ2Comp_flush__doc__},
1650 {NULL, NULL} /* sentinel */
1654 /* ===================================================================== */
1655 /* Slot definitions for BZ2Comp_Type. */
1657 static int
1658 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1660 int compresslevel = 9;
1661 int bzerror;
1662 static char *kwlist[] = {"compresslevel", 0};
1664 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1665 kwlist, &compresslevel))
1666 return -1;
1668 if (compresslevel < 1 || compresslevel > 9) {
1669 PyErr_SetString(PyExc_ValueError,
1670 "compresslevel must be between 1 and 9");
1671 goto error;
1674 #ifdef WITH_THREAD
1675 self->lock = PyThread_allocate_lock();
1676 if (!self->lock)
1677 goto error;
1678 #endif
1680 memset(&self->bzs, 0, sizeof(bz_stream));
1681 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1682 if (bzerror != BZ_OK) {
1683 Util_CatchBZ2Error(bzerror);
1684 goto error;
1687 self->running = 1;
1689 return 0;
1690 error:
1691 #ifdef WITH_THREAD
1692 if (self->lock)
1693 PyThread_free_lock(self->lock);
1694 #endif
1695 return -1;
1698 static void
1699 BZ2Comp_dealloc(BZ2CompObject *self)
1701 #ifdef WITH_THREAD
1702 if (self->lock)
1703 PyThread_free_lock(self->lock);
1704 #endif
1705 BZ2_bzCompressEnd(&self->bzs);
1706 self->ob_type->tp_free((PyObject *)self);
1710 /* ===================================================================== */
1711 /* BZ2Comp_Type definition. */
1713 PyDoc_STRVAR(BZ2Comp__doc__,
1714 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1716 Create a new compressor object. This object may be used to compress\n\
1717 data sequentially. If you want to compress data in one shot, use the\n\
1718 compress() function instead. The compresslevel parameter, if given,\n\
1719 must be a number between 1 and 9.\n\
1722 static PyTypeObject BZ2Comp_Type = {
1723 PyObject_HEAD_INIT(NULL)
1724 0, /*ob_size*/
1725 "bz2.BZ2Compressor", /*tp_name*/
1726 sizeof(BZ2CompObject), /*tp_basicsize*/
1727 0, /*tp_itemsize*/
1728 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1729 0, /*tp_print*/
1730 0, /*tp_getattr*/
1731 0, /*tp_setattr*/
1732 0, /*tp_compare*/
1733 0, /*tp_repr*/
1734 0, /*tp_as_number*/
1735 0, /*tp_as_sequence*/
1736 0, /*tp_as_mapping*/
1737 0, /*tp_hash*/
1738 0, /*tp_call*/
1739 0, /*tp_str*/
1740 PyObject_GenericGetAttr,/*tp_getattro*/
1741 PyObject_GenericSetAttr,/*tp_setattro*/
1742 0, /*tp_as_buffer*/
1743 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1744 BZ2Comp__doc__, /*tp_doc*/
1745 0, /*tp_traverse*/
1746 0, /*tp_clear*/
1747 0, /*tp_richcompare*/
1748 0, /*tp_weaklistoffset*/
1749 0, /*tp_iter*/
1750 0, /*tp_iternext*/
1751 BZ2Comp_methods, /*tp_methods*/
1752 0, /*tp_members*/
1753 0, /*tp_getset*/
1754 0, /*tp_base*/
1755 0, /*tp_dict*/
1756 0, /*tp_descr_get*/
1757 0, /*tp_descr_set*/
1758 0, /*tp_dictoffset*/
1759 (initproc)BZ2Comp_init, /*tp_init*/
1760 PyType_GenericAlloc, /*tp_alloc*/
1761 PyType_GenericNew, /*tp_new*/
1762 _PyObject_Del, /*tp_free*/
1763 0, /*tp_is_gc*/
1767 /* ===================================================================== */
1768 /* Members of BZ2Decomp. */
1770 #undef OFF
1771 #define OFF(x) offsetof(BZ2DecompObject, x)
1773 static PyMemberDef BZ2Decomp_members[] = {
1774 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1775 {NULL} /* Sentinel */
1779 /* ===================================================================== */
1780 /* Methods of BZ2Decomp. */
1782 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1783 "decompress(data) -> string\n\
1785 Provide more data to the decompressor object. It will return chunks\n\
1786 of decompressed data whenever possible. If you try to decompress data\n\
1787 after the end of stream is found, EOFError will be raised. If any data\n\
1788 was found after the end of stream, it'll be ignored and saved in\n\
1789 unused_data attribute.\n\
1792 static PyObject *
1793 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1795 char *data;
1796 int datasize;
1797 int bufsize = SMALLCHUNK;
1798 PY_LONG_LONG totalout;
1799 PyObject *ret = NULL;
1800 bz_stream *bzs = &self->bzs;
1801 int bzerror;
1803 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1804 return NULL;
1806 ACQUIRE_LOCK(self);
1807 if (!self->running) {
1808 PyErr_SetString(PyExc_EOFError, "end of stream was "
1809 "already found");
1810 goto error;
1813 ret = PyString_FromStringAndSize(NULL, bufsize);
1814 if (!ret)
1815 goto error;
1817 bzs->next_in = data;
1818 bzs->avail_in = datasize;
1819 bzs->next_out = BUF(ret);
1820 bzs->avail_out = bufsize;
1822 totalout = BZS_TOTAL_OUT(bzs);
1824 for (;;) {
1825 Py_BEGIN_ALLOW_THREADS
1826 bzerror = BZ2_bzDecompress(bzs);
1827 Py_END_ALLOW_THREADS
1828 if (bzerror == BZ_STREAM_END) {
1829 if (bzs->avail_in != 0) {
1830 Py_DECREF(self->unused_data);
1831 self->unused_data =
1832 PyString_FromStringAndSize(bzs->next_in,
1833 bzs->avail_in);
1835 self->running = 0;
1836 break;
1838 if (bzerror != BZ_OK) {
1839 Util_CatchBZ2Error(bzerror);
1840 goto error;
1842 if (bzs->avail_out == 0) {
1843 bufsize = Util_NewBufferSize(bufsize);
1844 if (_PyString_Resize(&ret, bufsize) < 0) {
1845 BZ2_bzDecompressEnd(bzs);
1846 goto error;
1848 bzs->next_out = BUF(ret);
1849 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1850 - totalout);
1851 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1852 } else if (bzs->avail_in == 0) {
1853 break;
1857 if (bzs->avail_out != 0)
1858 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1860 RELEASE_LOCK(self);
1861 return ret;
1863 error:
1864 RELEASE_LOCK(self);
1865 Py_XDECREF(ret);
1866 return NULL;
1869 static PyMethodDef BZ2Decomp_methods[] = {
1870 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1871 {NULL, NULL} /* sentinel */
1875 /* ===================================================================== */
1876 /* Slot definitions for BZ2Decomp_Type. */
1878 static int
1879 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1881 int bzerror;
1883 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1884 return -1;
1886 #ifdef WITH_THREAD
1887 self->lock = PyThread_allocate_lock();
1888 if (!self->lock)
1889 goto error;
1890 #endif
1892 self->unused_data = PyString_FromString("");
1893 if (!self->unused_data)
1894 goto error;
1896 memset(&self->bzs, 0, sizeof(bz_stream));
1897 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1898 if (bzerror != BZ_OK) {
1899 Util_CatchBZ2Error(bzerror);
1900 goto error;
1903 self->running = 1;
1905 return 0;
1907 error:
1908 #ifdef WITH_THREAD
1909 if (self->lock)
1910 PyThread_free_lock(self->lock);
1911 #endif
1912 Py_XDECREF(self->unused_data);
1913 return -1;
1916 static void
1917 BZ2Decomp_dealloc(BZ2DecompObject *self)
1919 #ifdef WITH_THREAD
1920 if (self->lock)
1921 PyThread_free_lock(self->lock);
1922 #endif
1923 Py_XDECREF(self->unused_data);
1924 BZ2_bzDecompressEnd(&self->bzs);
1925 self->ob_type->tp_free((PyObject *)self);
1929 /* ===================================================================== */
1930 /* BZ2Decomp_Type definition. */
1932 PyDoc_STRVAR(BZ2Decomp__doc__,
1933 "BZ2Decompressor() -> decompressor object\n\
1935 Create a new decompressor object. This object may be used to decompress\n\
1936 data sequentially. If you want to decompress data in one shot, use the\n\
1937 decompress() function instead.\n\
1940 static PyTypeObject BZ2Decomp_Type = {
1941 PyObject_HEAD_INIT(NULL)
1942 0, /*ob_size*/
1943 "bz2.BZ2Decompressor", /*tp_name*/
1944 sizeof(BZ2DecompObject), /*tp_basicsize*/
1945 0, /*tp_itemsize*/
1946 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1947 0, /*tp_print*/
1948 0, /*tp_getattr*/
1949 0, /*tp_setattr*/
1950 0, /*tp_compare*/
1951 0, /*tp_repr*/
1952 0, /*tp_as_number*/
1953 0, /*tp_as_sequence*/
1954 0, /*tp_as_mapping*/
1955 0, /*tp_hash*/
1956 0, /*tp_call*/
1957 0, /*tp_str*/
1958 PyObject_GenericGetAttr,/*tp_getattro*/
1959 PyObject_GenericSetAttr,/*tp_setattro*/
1960 0, /*tp_as_buffer*/
1961 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1962 BZ2Decomp__doc__, /*tp_doc*/
1963 0, /*tp_traverse*/
1964 0, /*tp_clear*/
1965 0, /*tp_richcompare*/
1966 0, /*tp_weaklistoffset*/
1967 0, /*tp_iter*/
1968 0, /*tp_iternext*/
1969 BZ2Decomp_methods, /*tp_methods*/
1970 BZ2Decomp_members, /*tp_members*/
1971 0, /*tp_getset*/
1972 0, /*tp_base*/
1973 0, /*tp_dict*/
1974 0, /*tp_descr_get*/
1975 0, /*tp_descr_set*/
1976 0, /*tp_dictoffset*/
1977 (initproc)BZ2Decomp_init, /*tp_init*/
1978 PyType_GenericAlloc, /*tp_alloc*/
1979 PyType_GenericNew, /*tp_new*/
1980 _PyObject_Del, /*tp_free*/
1981 0, /*tp_is_gc*/
1985 /* ===================================================================== */
1986 /* Module functions. */
1988 PyDoc_STRVAR(bz2_compress__doc__,
1989 "compress(data [, compresslevel=9]) -> string\n\
1991 Compress data in one shot. If you want to compress data sequentially,\n\
1992 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1993 given, must be a number between 1 and 9.\n\
1996 static PyObject *
1997 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1999 int compresslevel=9;
2000 char *data;
2001 int datasize;
2002 int bufsize;
2003 PyObject *ret = NULL;
2004 bz_stream _bzs;
2005 bz_stream *bzs = &_bzs;
2006 int bzerror;
2007 static char *kwlist[] = {"data", "compresslevel", 0};
2009 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2010 kwlist, &data, &datasize,
2011 &compresslevel))
2012 return NULL;
2014 if (compresslevel < 1 || compresslevel > 9) {
2015 PyErr_SetString(PyExc_ValueError,
2016 "compresslevel must be between 1 and 9");
2017 return NULL;
2020 /* Conforming to bz2 manual, this is large enough to fit compressed
2021 * data in one shot. We will check it later anyway. */
2022 bufsize = datasize + (datasize/100+1) + 600;
2024 ret = PyString_FromStringAndSize(NULL, bufsize);
2025 if (!ret)
2026 return NULL;
2028 memset(bzs, 0, sizeof(bz_stream));
2030 bzs->next_in = data;
2031 bzs->avail_in = datasize;
2032 bzs->next_out = BUF(ret);
2033 bzs->avail_out = bufsize;
2035 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2036 if (bzerror != BZ_OK) {
2037 Util_CatchBZ2Error(bzerror);
2038 Py_DECREF(ret);
2039 return NULL;
2042 for (;;) {
2043 Py_BEGIN_ALLOW_THREADS
2044 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2045 Py_END_ALLOW_THREADS
2046 if (bzerror == BZ_STREAM_END) {
2047 break;
2048 } else if (bzerror != BZ_FINISH_OK) {
2049 BZ2_bzCompressEnd(bzs);
2050 Util_CatchBZ2Error(bzerror);
2051 Py_DECREF(ret);
2052 return NULL;
2054 if (bzs->avail_out == 0) {
2055 bufsize = Util_NewBufferSize(bufsize);
2056 if (_PyString_Resize(&ret, bufsize) < 0) {
2057 BZ2_bzCompressEnd(bzs);
2058 Py_DECREF(ret);
2059 return NULL;
2061 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2062 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2066 if (bzs->avail_out != 0)
2067 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2068 BZ2_bzCompressEnd(bzs);
2070 return ret;
2073 PyDoc_STRVAR(bz2_decompress__doc__,
2074 "decompress(data) -> decompressed data\n\
2076 Decompress data in one shot. If you want to decompress data sequentially,\n\
2077 use an instance of BZ2Decompressor instead.\n\
2080 static PyObject *
2081 bz2_decompress(PyObject *self, PyObject *args)
2083 char *data;
2084 int datasize;
2085 int bufsize = SMALLCHUNK;
2086 PyObject *ret;
2087 bz_stream _bzs;
2088 bz_stream *bzs = &_bzs;
2089 int bzerror;
2091 if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2092 return NULL;
2094 if (datasize == 0)
2095 return PyString_FromString("");
2097 ret = PyString_FromStringAndSize(NULL, bufsize);
2098 if (!ret)
2099 return NULL;
2101 memset(bzs, 0, sizeof(bz_stream));
2103 bzs->next_in = data;
2104 bzs->avail_in = datasize;
2105 bzs->next_out = BUF(ret);
2106 bzs->avail_out = bufsize;
2108 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2109 if (bzerror != BZ_OK) {
2110 Util_CatchBZ2Error(bzerror);
2111 Py_DECREF(ret);
2112 return NULL;
2115 for (;;) {
2116 Py_BEGIN_ALLOW_THREADS
2117 bzerror = BZ2_bzDecompress(bzs);
2118 Py_END_ALLOW_THREADS
2119 if (bzerror == BZ_STREAM_END) {
2120 break;
2121 } else if (bzerror != BZ_OK) {
2122 BZ2_bzDecompressEnd(bzs);
2123 Util_CatchBZ2Error(bzerror);
2124 Py_DECREF(ret);
2125 return NULL;
2127 if (bzs->avail_out == 0) {
2128 bufsize = Util_NewBufferSize(bufsize);
2129 if (_PyString_Resize(&ret, bufsize) < 0) {
2130 BZ2_bzDecompressEnd(bzs);
2131 Py_DECREF(ret);
2132 return NULL;
2134 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2135 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2136 } else if (bzs->avail_in == 0) {
2137 BZ2_bzDecompressEnd(bzs);
2138 PyErr_SetString(PyExc_ValueError,
2139 "couldn't find end of stream");
2140 Py_DECREF(ret);
2141 return NULL;
2145 if (bzs->avail_out != 0)
2146 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2147 BZ2_bzDecompressEnd(bzs);
2149 return ret;
2152 static PyMethodDef bz2_methods[] = {
2153 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2154 bz2_compress__doc__},
2155 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2156 bz2_decompress__doc__},
2157 {NULL, NULL} /* sentinel */
2160 /* ===================================================================== */
2161 /* Initialization function. */
2163 PyDoc_STRVAR(bz2__doc__,
2164 "The python bz2 module provides a comprehensive interface for\n\
2165 the bz2 compression library. It implements a complete file\n\
2166 interface, one shot (de)compression functions, and types for\n\
2167 sequential (de)compression.\n\
2170 DL_EXPORT(void)
2171 initbz2(void)
2173 PyObject *m;
2175 BZ2File_Type.ob_type = &PyType_Type;
2176 BZ2Comp_Type.ob_type = &PyType_Type;
2177 BZ2Decomp_Type.ob_type = &PyType_Type;
2179 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2181 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2183 Py_INCREF(&BZ2File_Type);
2184 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2186 Py_INCREF(&BZ2Comp_Type);
2187 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2189 Py_INCREF(&BZ2Decomp_Type);
2190 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);