Fix argument order in pure python version of nsmallest() and nlargest().
[python/dscho.git] / Modules / bz2module.c
blob8a93cd9547bc047abde0e920da068faf7587eb50
1 /*
3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
8 */
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
27 #define MODE_CLOSED 0
28 #define MODE_READ 1
29 #define MODE_READ_EOF 2
30 #define MODE_WRITE 3
32 #define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
35 #ifdef BZ_CONFIG_ERROR
37 #if SIZEOF_LONG >= 8
38 #define BZS_TOTAL_OUT(bzs) \
39 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40 #elif SIZEOF_LONG_LONG >= 8
41 #define BZS_TOTAL_OUT(bzs) \
42 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
43 #else
44 #define BZS_TOTAL_OUT(bzs) \
45 bzs->total_out_lo32;
46 #endif
48 #else /* ! BZ_CONFIG_ERROR */
50 #define BZ2_bzRead bzRead
51 #define BZ2_bzReadOpen bzReadOpen
52 #define BZ2_bzReadClose bzReadClose
53 #define BZ2_bzWrite bzWrite
54 #define BZ2_bzWriteOpen bzWriteOpen
55 #define BZ2_bzWriteClose bzWriteClose
56 #define BZ2_bzCompress bzCompress
57 #define BZ2_bzCompressInit bzCompressInit
58 #define BZ2_bzCompressEnd bzCompressEnd
59 #define BZ2_bzDecompress bzDecompress
60 #define BZ2_bzDecompressInit bzDecompressInit
61 #define BZ2_bzDecompressEnd bzDecompressEnd
63 #define BZS_TOTAL_OUT(bzs) bzs->total_out
65 #endif /* ! BZ_CONFIG_ERROR */
68 #ifdef WITH_THREAD
69 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
70 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71 #else
72 #define ACQUIRE_LOCK(obj)
73 #define RELEASE_LOCK(obj)
74 #endif
76 /* Bits in f_newlinetypes */
77 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
78 #define NEWLINE_CR 1 /* \r newline seen */
79 #define NEWLINE_LF 2 /* \n newline seen */
80 #define NEWLINE_CRLF 4 /* \r\n newline seen */
82 /* ===================================================================== */
83 /* Structure definitions. */
85 typedef struct {
86 PyObject_HEAD
87 PyObject *file;
89 char* f_buf; /* Allocated readahead buffer */
90 char* f_bufend; /* Points after last occupied position */
91 char* f_bufptr; /* Current buffer position */
93 int f_softspace; /* Flag used by 'print' command */
95 int f_univ_newline; /* Handle any newline convention */
96 int f_newlinetypes; /* Types of newlines seen */
97 int f_skipnextlf; /* Skip next \n */
99 BZFILE *fp;
100 int mode;
101 long pos;
102 long size;
103 #ifdef WITH_THREAD
104 PyThread_type_lock lock;
105 #endif
106 } BZ2FileObject;
108 typedef struct {
109 PyObject_HEAD
110 bz_stream bzs;
111 int running;
112 #ifdef WITH_THREAD
113 PyThread_type_lock lock;
114 #endif
115 } BZ2CompObject;
117 typedef struct {
118 PyObject_HEAD
119 bz_stream bzs;
120 int running;
121 PyObject *unused_data;
122 #ifdef WITH_THREAD
123 PyThread_type_lock lock;
124 #endif
125 } BZ2DecompObject;
127 /* ===================================================================== */
128 /* Utility functions. */
130 static int
131 Util_CatchBZ2Error(int bzerror)
133 int ret = 0;
134 switch(bzerror) {
135 case BZ_OK:
136 case BZ_STREAM_END:
137 break;
139 #ifdef BZ_CONFIG_ERROR
140 case BZ_CONFIG_ERROR:
141 PyErr_SetString(PyExc_SystemError,
142 "the bz2 library was not compiled "
143 "correctly");
144 ret = 1;
145 break;
146 #endif
148 case BZ_PARAM_ERROR:
149 PyErr_SetString(PyExc_ValueError,
150 "the bz2 library has received wrong "
151 "parameters");
152 ret = 1;
153 break;
155 case BZ_MEM_ERROR:
156 PyErr_NoMemory();
157 ret = 1;
158 break;
160 case BZ_DATA_ERROR:
161 case BZ_DATA_ERROR_MAGIC:
162 PyErr_SetString(PyExc_IOError, "invalid data stream");
163 ret = 1;
164 break;
166 case BZ_IO_ERROR:
167 PyErr_SetString(PyExc_IOError, "unknown IO error");
168 ret = 1;
169 break;
171 case BZ_UNEXPECTED_EOF:
172 PyErr_SetString(PyExc_EOFError,
173 "compressed file ended before the "
174 "logical end-of-stream was detected");
175 ret = 1;
176 break;
178 case BZ_SEQUENCE_ERROR:
179 PyErr_SetString(PyExc_RuntimeError,
180 "wrong sequence of bz2 library "
181 "commands used");
182 ret = 1;
183 break;
185 return ret;
188 #if BUFSIZ < 8192
189 #define SMALLCHUNK 8192
190 #else
191 #define SMALLCHUNK BUFSIZ
192 #endif
194 #if SIZEOF_INT < 4
195 #define BIGCHUNK (512 * 32)
196 #else
197 #define BIGCHUNK (512 * 1024)
198 #endif
200 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
201 static size_t
202 Util_NewBufferSize(size_t currentsize)
204 if (currentsize > SMALLCHUNK) {
205 /* Keep doubling until we reach BIGCHUNK;
206 then keep adding BIGCHUNK. */
207 if (currentsize <= BIGCHUNK)
208 return currentsize + currentsize;
209 else
210 return currentsize + BIGCHUNK;
212 return currentsize + SMALLCHUNK;
215 /* This is a hacked version of Python's fileobject.c:get_line(). */
216 static PyObject *
217 Util_GetLine(BZ2FileObject *f, int n)
219 char c;
220 char *buf, *end;
221 size_t total_v_size; /* total # of slots in buffer */
222 size_t used_v_size; /* # used slots in buffer */
223 size_t increment; /* amount to increment the buffer */
224 PyObject *v;
225 int bzerror;
226 int newlinetypes = f->f_newlinetypes;
227 int skipnextlf = f->f_skipnextlf;
228 int univ_newline = f->f_univ_newline;
230 total_v_size = n > 0 ? n : 100;
231 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
232 if (v == NULL)
233 return NULL;
235 buf = BUF(v);
236 end = buf + total_v_size;
238 for (;;) {
239 Py_BEGIN_ALLOW_THREADS
240 if (univ_newline) {
241 while (1) {
242 BZ2_bzRead(&bzerror, f->fp, &c, 1);
243 f->pos++;
244 if (bzerror != BZ_OK || buf == end)
245 break;
246 if (skipnextlf) {
247 skipnextlf = 0;
248 if (c == '\n') {
249 /* Seeing a \n here with
250 * skipnextlf true means we
251 * saw a \r before.
253 newlinetypes |= NEWLINE_CRLF;
254 BZ2_bzRead(&bzerror, f->fp,
255 &c, 1);
256 if (bzerror != BZ_OK)
257 break;
258 } else {
259 newlinetypes |= NEWLINE_CR;
262 if (c == '\r') {
263 skipnextlf = 1;
264 c = '\n';
265 } else if ( c == '\n')
266 newlinetypes |= NEWLINE_LF;
267 *buf++ = c;
268 if (c == '\n') break;
270 if (bzerror == BZ_STREAM_END && skipnextlf)
271 newlinetypes |= NEWLINE_CR;
272 } else /* If not universal newlines use the normal loop */
273 do {
274 BZ2_bzRead(&bzerror, f->fp, &c, 1);
275 f->pos++;
276 *buf++ = c;
277 } while (bzerror == BZ_OK && c != '\n' && buf != end);
278 Py_END_ALLOW_THREADS
279 f->f_newlinetypes = newlinetypes;
280 f->f_skipnextlf = skipnextlf;
281 if (bzerror == BZ_STREAM_END) {
282 f->size = f->pos;
283 f->mode = MODE_READ_EOF;
284 break;
285 } else if (bzerror != BZ_OK) {
286 Util_CatchBZ2Error(bzerror);
287 Py_DECREF(v);
288 return NULL;
290 if (c == '\n')
291 break;
292 /* Must be because buf == end */
293 if (n > 0)
294 break;
295 used_v_size = total_v_size;
296 increment = total_v_size >> 2; /* mild exponential growth */
297 total_v_size += increment;
298 if (total_v_size > INT_MAX) {
299 PyErr_SetString(PyExc_OverflowError,
300 "line is longer than a Python string can hold");
301 Py_DECREF(v);
302 return NULL;
304 if (_PyString_Resize(&v, total_v_size) < 0)
305 return NULL;
306 buf = BUF(v) + used_v_size;
307 end = BUF(v) + total_v_size;
310 used_v_size = buf - BUF(v);
311 if (used_v_size != total_v_size)
312 _PyString_Resize(&v, used_v_size);
313 return v;
316 /* This is a hacked version of Python's
317 * fileobject.c:Py_UniversalNewlineFread(). */
318 size_t
319 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
320 char* buf, size_t n, BZ2FileObject *f)
322 char *dst = buf;
323 int newlinetypes, skipnextlf;
325 assert(buf != NULL);
326 assert(stream != NULL);
328 if (!f->f_univ_newline)
329 return BZ2_bzRead(bzerror, stream, buf, n);
331 newlinetypes = f->f_newlinetypes;
332 skipnextlf = f->f_skipnextlf;
334 /* Invariant: n is the number of bytes remaining to be filled
335 * in the buffer.
337 while (n) {
338 size_t nread;
339 int shortread;
340 char *src = dst;
342 nread = BZ2_bzRead(bzerror, stream, dst, n);
343 assert(nread <= n);
344 n -= nread; /* assuming 1 byte out for each in; will adjust */
345 shortread = n != 0; /* true iff EOF or error */
346 while (nread--) {
347 char c = *src++;
348 if (c == '\r') {
349 /* Save as LF and set flag to skip next LF. */
350 *dst++ = '\n';
351 skipnextlf = 1;
353 else if (skipnextlf && c == '\n') {
354 /* Skip LF, and remember we saw CR LF. */
355 skipnextlf = 0;
356 newlinetypes |= NEWLINE_CRLF;
357 ++n;
359 else {
360 /* Normal char to be stored in buffer. Also
361 * update the newlinetypes flag if either this
362 * is an LF or the previous char was a CR.
364 if (c == '\n')
365 newlinetypes |= NEWLINE_LF;
366 else if (skipnextlf)
367 newlinetypes |= NEWLINE_CR;
368 *dst++ = c;
369 skipnextlf = 0;
372 if (shortread) {
373 /* If this is EOF, update type flags. */
374 if (skipnextlf && *bzerror == BZ_STREAM_END)
375 newlinetypes |= NEWLINE_CR;
376 break;
379 f->f_newlinetypes = newlinetypes;
380 f->f_skipnextlf = skipnextlf;
381 return dst - buf;
384 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
385 static void
386 Util_DropReadAhead(BZ2FileObject *f)
388 if (f->f_buf != NULL) {
389 PyMem_Free(f->f_buf);
390 f->f_buf = NULL;
394 /* This is a hacked version of Python's fileobject.c:readahead(). */
395 static int
396 Util_ReadAhead(BZ2FileObject *f, int bufsize)
398 int chunksize;
399 int bzerror;
401 if (f->f_buf != NULL) {
402 if((f->f_bufend - f->f_bufptr) >= 1)
403 return 0;
404 else
405 Util_DropReadAhead(f);
407 if (f->mode == MODE_READ_EOF) {
408 return -1;
410 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
411 return -1;
413 Py_BEGIN_ALLOW_THREADS
414 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
415 bufsize, f);
416 Py_END_ALLOW_THREADS
417 f->pos += chunksize;
418 if (bzerror == BZ_STREAM_END) {
419 f->size = f->pos;
420 f->mode = MODE_READ_EOF;
421 } else if (bzerror != BZ_OK) {
422 Util_CatchBZ2Error(bzerror);
423 Util_DropReadAhead(f);
424 return -1;
426 f->f_bufptr = f->f_buf;
427 f->f_bufend = f->f_buf + chunksize;
428 return 0;
431 /* This is a hacked version of Python's
432 * fileobject.c:readahead_get_line_skip(). */
433 static PyStringObject *
434 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
436 PyStringObject* s;
437 char *bufptr;
438 char *buf;
439 int len;
441 if (f->f_buf == NULL)
442 if (Util_ReadAhead(f, bufsize) < 0)
443 return NULL;
445 len = f->f_bufend - f->f_bufptr;
446 if (len == 0)
447 return (PyStringObject *)
448 PyString_FromStringAndSize(NULL, skip);
449 bufptr = memchr(f->f_bufptr, '\n', len);
450 if (bufptr != NULL) {
451 bufptr++; /* Count the '\n' */
452 len = bufptr - f->f_bufptr;
453 s = (PyStringObject *)
454 PyString_FromStringAndSize(NULL, skip+len);
455 if (s == NULL)
456 return NULL;
457 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
458 f->f_bufptr = bufptr;
459 if (bufptr == f->f_bufend)
460 Util_DropReadAhead(f);
461 } else {
462 bufptr = f->f_bufptr;
463 buf = f->f_buf;
464 f->f_buf = NULL; /* Force new readahead buffer */
465 s = Util_ReadAheadGetLineSkip(f, skip+len,
466 bufsize + (bufsize>>2));
467 if (s == NULL) {
468 PyMem_Free(buf);
469 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
472 PyMem_Free(buf);
474 return s;
477 /* ===================================================================== */
478 /* Methods of BZ2File. */
480 PyDoc_STRVAR(BZ2File_read__doc__,
481 "read([size]) -> string\n\
483 Read at most size uncompressed bytes, returned as a string. If the size\n\
484 argument is negative or omitted, read until EOF is reached.\n\
487 /* This is a hacked version of Python's fileobject.c:file_read(). */
488 static PyObject *
489 BZ2File_read(BZ2FileObject *self, PyObject *args)
491 long bytesrequested = -1;
492 size_t bytesread, buffersize, chunksize;
493 int bzerror;
494 PyObject *ret = NULL;
496 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
497 return NULL;
499 ACQUIRE_LOCK(self);
500 switch (self->mode) {
501 case MODE_READ:
502 break;
503 case MODE_READ_EOF:
504 ret = PyString_FromString("");
505 goto cleanup;
506 case MODE_CLOSED:
507 PyErr_SetString(PyExc_ValueError,
508 "I/O operation on closed file");
509 goto cleanup;
510 default:
511 PyErr_SetString(PyExc_IOError,
512 "file is not ready for reading");
513 goto cleanup;
516 if (bytesrequested < 0)
517 buffersize = Util_NewBufferSize((size_t)0);
518 else
519 buffersize = bytesrequested;
520 if (buffersize > INT_MAX) {
521 PyErr_SetString(PyExc_OverflowError,
522 "requested number of bytes is "
523 "more than a Python string can hold");
524 goto cleanup;
526 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
527 if (ret == NULL)
528 goto cleanup;
529 bytesread = 0;
531 for (;;) {
532 Py_BEGIN_ALLOW_THREADS
533 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
534 BUF(ret)+bytesread,
535 buffersize-bytesread,
536 self);
537 self->pos += chunksize;
538 Py_END_ALLOW_THREADS
539 bytesread += chunksize;
540 if (bzerror == BZ_STREAM_END) {
541 self->size = self->pos;
542 self->mode = MODE_READ_EOF;
543 break;
544 } else if (bzerror != BZ_OK) {
545 Util_CatchBZ2Error(bzerror);
546 Py_DECREF(ret);
547 ret = NULL;
548 goto cleanup;
550 if (bytesrequested < 0) {
551 buffersize = Util_NewBufferSize(buffersize);
552 if (_PyString_Resize(&ret, buffersize) < 0)
553 goto cleanup;
554 } else {
555 break;
558 if (bytesread != buffersize)
559 _PyString_Resize(&ret, bytesread);
561 cleanup:
562 RELEASE_LOCK(self);
563 return ret;
566 PyDoc_STRVAR(BZ2File_readline__doc__,
567 "readline([size]) -> string\n\
569 Return the next line from the file, as a string, retaining newline.\n\
570 A non-negative size argument will limit the maximum number of bytes to\n\
571 return (an incomplete line may be returned then). Return an empty\n\
572 string at EOF.\n\
575 static PyObject *
576 BZ2File_readline(BZ2FileObject *self, PyObject *args)
578 PyObject *ret = NULL;
579 int sizehint = -1;
581 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
582 return NULL;
584 ACQUIRE_LOCK(self);
585 switch (self->mode) {
586 case MODE_READ:
587 break;
588 case MODE_READ_EOF:
589 ret = PyString_FromString("");
590 goto cleanup;
591 case MODE_CLOSED:
592 PyErr_SetString(PyExc_ValueError,
593 "I/O operation on closed file");
594 goto cleanup;
595 default:
596 PyErr_SetString(PyExc_IOError,
597 "file is not ready for reading");
598 goto cleanup;
601 if (sizehint == 0)
602 ret = PyString_FromString("");
603 else
604 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
606 cleanup:
607 RELEASE_LOCK(self);
608 return ret;
611 PyDoc_STRVAR(BZ2File_readlines__doc__,
612 "readlines([size]) -> list\n\
614 Call readline() repeatedly and return a list of lines read.\n\
615 The optional size argument, if given, is an approximate bound on the\n\
616 total number of bytes in the lines returned.\n\
619 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
620 static PyObject *
621 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
623 long sizehint = 0;
624 PyObject *list = NULL;
625 PyObject *line;
626 char small_buffer[SMALLCHUNK];
627 char *buffer = small_buffer;
628 size_t buffersize = SMALLCHUNK;
629 PyObject *big_buffer = NULL;
630 size_t nfilled = 0;
631 size_t nread;
632 size_t totalread = 0;
633 char *p, *q, *end;
634 int err;
635 int shortread = 0;
636 int bzerror;
638 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
639 return NULL;
641 ACQUIRE_LOCK(self);
642 switch (self->mode) {
643 case MODE_READ:
644 break;
645 case MODE_READ_EOF:
646 list = PyList_New(0);
647 goto cleanup;
648 case MODE_CLOSED:
649 PyErr_SetString(PyExc_ValueError,
650 "I/O operation on closed file");
651 goto cleanup;
652 default:
653 PyErr_SetString(PyExc_IOError,
654 "file is not ready for reading");
655 goto cleanup;
658 if ((list = PyList_New(0)) == NULL)
659 goto cleanup;
661 for (;;) {
662 Py_BEGIN_ALLOW_THREADS
663 nread = Util_UnivNewlineRead(&bzerror, self->fp,
664 buffer+nfilled,
665 buffersize-nfilled, self);
666 self->pos += nread;
667 Py_END_ALLOW_THREADS
668 if (bzerror == BZ_STREAM_END) {
669 self->size = self->pos;
670 self->mode = MODE_READ_EOF;
671 if (nread == 0) {
672 sizehint = 0;
673 break;
675 shortread = 1;
676 } else if (bzerror != BZ_OK) {
677 Util_CatchBZ2Error(bzerror);
678 error:
679 Py_DECREF(list);
680 list = NULL;
681 goto cleanup;
683 totalread += nread;
684 p = memchr(buffer+nfilled, '\n', nread);
685 if (p == NULL) {
686 /* Need a larger buffer to fit this line */
687 nfilled += nread;
688 buffersize *= 2;
689 if (buffersize > INT_MAX) {
690 PyErr_SetString(PyExc_OverflowError,
691 "line is longer than a Python string can hold");
692 goto error;
694 if (big_buffer == NULL) {
695 /* Create the big buffer */
696 big_buffer = PyString_FromStringAndSize(
697 NULL, buffersize);
698 if (big_buffer == NULL)
699 goto error;
700 buffer = PyString_AS_STRING(big_buffer);
701 memcpy(buffer, small_buffer, nfilled);
703 else {
704 /* Grow the big buffer */
705 _PyString_Resize(&big_buffer, buffersize);
706 buffer = PyString_AS_STRING(big_buffer);
708 continue;
710 end = buffer+nfilled+nread;
711 q = buffer;
712 do {
713 /* Process complete lines */
714 p++;
715 line = PyString_FromStringAndSize(q, p-q);
716 if (line == NULL)
717 goto error;
718 err = PyList_Append(list, line);
719 Py_DECREF(line);
720 if (err != 0)
721 goto error;
722 q = p;
723 p = memchr(q, '\n', end-q);
724 } while (p != NULL);
725 /* Move the remaining incomplete line to the start */
726 nfilled = end-q;
727 memmove(buffer, q, nfilled);
728 if (sizehint > 0)
729 if (totalread >= (size_t)sizehint)
730 break;
731 if (shortread) {
732 sizehint = 0;
733 break;
736 if (nfilled != 0) {
737 /* Partial last line */
738 line = PyString_FromStringAndSize(buffer, nfilled);
739 if (line == NULL)
740 goto error;
741 if (sizehint > 0) {
742 /* Need to complete the last line */
743 PyObject *rest = Util_GetLine(self, 0);
744 if (rest == NULL) {
745 Py_DECREF(line);
746 goto error;
748 PyString_Concat(&line, rest);
749 Py_DECREF(rest);
750 if (line == NULL)
751 goto error;
753 err = PyList_Append(list, line);
754 Py_DECREF(line);
755 if (err != 0)
756 goto error;
759 cleanup:
760 RELEASE_LOCK(self);
761 if (big_buffer) {
762 Py_DECREF(big_buffer);
764 return list;
767 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
768 "xreadlines() -> self\n\
770 For backward compatibility. BZ2File objects now include the performance\n\
771 optimizations previously implemented in the xreadlines module.\n\
774 PyDoc_STRVAR(BZ2File_write__doc__,
775 "write(data) -> None\n\
777 Write the 'data' string to file. Note that due to buffering, close() may\n\
778 be needed before the file on disk reflects the data written.\n\
781 /* This is a hacked version of Python's fileobject.c:file_write(). */
782 static PyObject *
783 BZ2File_write(BZ2FileObject *self, PyObject *args)
785 PyObject *ret = NULL;
786 char *buf;
787 int len;
788 int bzerror;
790 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
791 return NULL;
793 ACQUIRE_LOCK(self);
794 switch (self->mode) {
795 case MODE_WRITE:
796 break;
798 case MODE_CLOSED:
799 PyErr_SetString(PyExc_ValueError,
800 "I/O operation on closed file");
801 goto cleanup;;
803 default:
804 PyErr_SetString(PyExc_IOError,
805 "file is not ready for writing");
806 goto cleanup;;
809 self->f_softspace = 0;
811 Py_BEGIN_ALLOW_THREADS
812 BZ2_bzWrite (&bzerror, self->fp, buf, len);
813 self->pos += len;
814 Py_END_ALLOW_THREADS
816 if (bzerror != BZ_OK) {
817 Util_CatchBZ2Error(bzerror);
818 goto cleanup;
821 Py_INCREF(Py_None);
822 ret = Py_None;
824 cleanup:
825 RELEASE_LOCK(self);
826 return ret;
829 PyDoc_STRVAR(BZ2File_writelines__doc__,
830 "writelines(sequence_of_strings) -> None\n\
832 Write the sequence of strings to the file. Note that newlines are not\n\
833 added. The sequence can be any iterable object producing strings. This is\n\
834 equivalent to calling write() for each string.\n\
837 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
838 static PyObject *
839 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
841 #define CHUNKSIZE 1000
842 PyObject *list = NULL;
843 PyObject *iter = NULL;
844 PyObject *ret = NULL;
845 PyObject *line;
846 int i, j, index, len, islist;
847 int bzerror;
849 ACQUIRE_LOCK(self);
850 islist = PyList_Check(seq);
851 if (!islist) {
852 iter = PyObject_GetIter(seq);
853 if (iter == NULL) {
854 PyErr_SetString(PyExc_TypeError,
855 "writelines() requires an iterable argument");
856 goto error;
858 list = PyList_New(CHUNKSIZE);
859 if (list == NULL)
860 goto error;
863 /* Strategy: slurp CHUNKSIZE lines into a private list,
864 checking that they are all strings, then write that list
865 without holding the interpreter lock, then come back for more. */
866 for (index = 0; ; index += CHUNKSIZE) {
867 if (islist) {
868 Py_XDECREF(list);
869 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
870 if (list == NULL)
871 goto error;
872 j = PyList_GET_SIZE(list);
874 else {
875 for (j = 0; j < CHUNKSIZE; j++) {
876 line = PyIter_Next(iter);
877 if (line == NULL) {
878 if (PyErr_Occurred())
879 goto error;
880 break;
882 PyList_SetItem(list, j, line);
885 if (j == 0)
886 break;
888 /* Check that all entries are indeed strings. If not,
889 apply the same rules as for file.write() and
890 convert the rets to strings. This is slow, but
891 seems to be the only way since all conversion APIs
892 could potentially execute Python code. */
893 for (i = 0; i < j; i++) {
894 PyObject *v = PyList_GET_ITEM(list, i);
895 if (!PyString_Check(v)) {
896 const char *buffer;
897 int len;
898 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
899 PyErr_SetString(PyExc_TypeError,
900 "writelines() "
901 "argument must be "
902 "a sequence of "
903 "strings");
904 goto error;
906 line = PyString_FromStringAndSize(buffer,
907 len);
908 if (line == NULL)
909 goto error;
910 Py_DECREF(v);
911 PyList_SET_ITEM(list, i, line);
915 self->f_softspace = 0;
917 /* Since we are releasing the global lock, the
918 following code may *not* execute Python code. */
919 Py_BEGIN_ALLOW_THREADS
920 for (i = 0; i < j; i++) {
921 line = PyList_GET_ITEM(list, i);
922 len = PyString_GET_SIZE(line);
923 BZ2_bzWrite (&bzerror, self->fp,
924 PyString_AS_STRING(line), len);
925 if (bzerror != BZ_OK) {
926 Py_BLOCK_THREADS
927 Util_CatchBZ2Error(bzerror);
928 goto error;
931 Py_END_ALLOW_THREADS
933 if (j < CHUNKSIZE)
934 break;
937 Py_INCREF(Py_None);
938 ret = Py_None;
940 error:
941 RELEASE_LOCK(self);
942 Py_XDECREF(list);
943 Py_XDECREF(iter);
944 return ret;
945 #undef CHUNKSIZE
948 PyDoc_STRVAR(BZ2File_seek__doc__,
949 "seek(offset [, whence]) -> None\n\
951 Move to new file position. Argument offset is a byte count. Optional\n\
952 argument whence defaults to 0 (offset from start of file, offset\n\
953 should be >= 0); other values are 1 (move relative to current position,\n\
954 positive or negative), and 2 (move relative to end of file, usually\n\
955 negative, although many platforms allow seeking beyond the end of a file).\n\
957 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
958 the operation may be extremely slow.\n\
961 static PyObject *
962 BZ2File_seek(BZ2FileObject *self, PyObject *args)
964 int where = 0;
965 long offset;
966 char small_buffer[SMALLCHUNK];
967 char *buffer = small_buffer;
968 size_t buffersize = SMALLCHUNK;
969 int bytesread = 0;
970 int readsize;
971 int chunksize;
972 int bzerror;
973 int rewind = 0;
974 PyObject *ret = NULL;
976 if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
977 return NULL;
979 ACQUIRE_LOCK(self);
980 Util_DropReadAhead(self);
981 switch (self->mode) {
982 case MODE_READ:
983 case MODE_READ_EOF:
984 break;
986 case MODE_CLOSED:
987 PyErr_SetString(PyExc_ValueError,
988 "I/O operation on closed file");
989 goto cleanup;;
991 default:
992 PyErr_SetString(PyExc_IOError,
993 "seek works only while reading");
994 goto cleanup;;
997 if (offset < 0) {
998 if (where == 1) {
999 offset = self->pos + offset;
1000 rewind = 1;
1001 } else if (where == 2) {
1002 if (self->size == -1) {
1003 assert(self->mode != MODE_READ_EOF);
1004 for (;;) {
1005 Py_BEGIN_ALLOW_THREADS
1006 chunksize = Util_UnivNewlineRead(
1007 &bzerror, self->fp,
1008 buffer, buffersize,
1009 self);
1010 self->pos += chunksize;
1011 Py_END_ALLOW_THREADS
1013 bytesread += chunksize;
1014 if (bzerror == BZ_STREAM_END) {
1015 break;
1016 } else if (bzerror != BZ_OK) {
1017 Util_CatchBZ2Error(bzerror);
1018 goto cleanup;
1021 self->mode = MODE_READ_EOF;
1022 self->size = self->pos;
1023 bytesread = 0;
1025 offset = self->size + offset;
1026 if (offset >= self->pos)
1027 offset -= self->pos;
1028 else
1029 rewind = 1;
1031 if (offset < 0)
1032 offset = 0;
1033 } else if (where == 0) {
1034 if (offset >= self->pos)
1035 offset -= self->pos;
1036 else
1037 rewind = 1;
1040 if (rewind) {
1041 BZ2_bzReadClose(&bzerror, self->fp);
1042 if (bzerror != BZ_OK) {
1043 Util_CatchBZ2Error(bzerror);
1044 goto cleanup;
1046 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1047 if (!ret)
1048 goto cleanup;
1049 Py_DECREF(ret);
1050 ret = NULL;
1051 self->pos = 0;
1052 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1053 0, 0, NULL, 0);
1054 if (bzerror != BZ_OK) {
1055 Util_CatchBZ2Error(bzerror);
1056 goto cleanup;
1058 self->mode = MODE_READ;
1059 } else if (self->mode == MODE_READ_EOF) {
1060 goto exit;
1063 if (offset == 0)
1064 goto exit;
1066 /* Before getting here, offset must be set to the number of bytes
1067 * to walk forward. */
1068 for (;;) {
1069 if ((size_t)offset-bytesread > buffersize)
1070 readsize = buffersize;
1071 else
1072 readsize = offset-bytesread;
1073 Py_BEGIN_ALLOW_THREADS
1074 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1075 buffer, readsize, self);
1076 self->pos += chunksize;
1077 Py_END_ALLOW_THREADS
1078 bytesread += chunksize;
1079 if (bzerror == BZ_STREAM_END) {
1080 self->size = self->pos;
1081 self->mode = MODE_READ_EOF;
1082 break;
1083 } else if (bzerror != BZ_OK) {
1084 Util_CatchBZ2Error(bzerror);
1085 goto cleanup;
1087 if (bytesread == offset)
1088 break;
1091 exit:
1092 Py_INCREF(Py_None);
1093 ret = Py_None;
1095 cleanup:
1096 RELEASE_LOCK(self);
1097 return ret;
1100 PyDoc_STRVAR(BZ2File_tell__doc__,
1101 "tell() -> int\n\
1103 Return the current file position, an integer (may be a long integer).\n\
1106 static PyObject *
1107 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1109 PyObject *ret = NULL;
1111 if (self->mode == MODE_CLOSED) {
1112 PyErr_SetString(PyExc_ValueError,
1113 "I/O operation on closed file");
1114 goto cleanup;
1117 ret = PyInt_FromLong(self->pos);
1119 cleanup:
1120 return ret;
1123 PyDoc_STRVAR(BZ2File_close__doc__,
1124 "close() -> None or (perhaps) an integer\n\
1126 Close the file. Sets data attribute .closed to true. A closed file\n\
1127 cannot be used for further I/O operations. close() may be called more\n\
1128 than once without error.\n\
1131 static PyObject *
1132 BZ2File_close(BZ2FileObject *self)
1134 PyObject *ret = NULL;
1135 int bzerror = BZ_OK;
1137 ACQUIRE_LOCK(self);
1138 switch (self->mode) {
1139 case MODE_READ:
1140 case MODE_READ_EOF:
1141 BZ2_bzReadClose(&bzerror, self->fp);
1142 break;
1143 case MODE_WRITE:
1144 BZ2_bzWriteClose(&bzerror, self->fp,
1145 0, NULL, NULL);
1146 break;
1148 self->mode = MODE_CLOSED;
1149 ret = PyObject_CallMethod(self->file, "close", NULL);
1150 if (bzerror != BZ_OK) {
1151 Util_CatchBZ2Error(bzerror);
1152 Py_XDECREF(ret);
1153 ret = NULL;
1156 RELEASE_LOCK(self);
1157 return ret;
1160 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1162 static PyMethodDef BZ2File_methods[] = {
1163 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1164 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1165 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1166 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1167 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1168 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1169 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1170 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1171 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1172 {NULL, NULL} /* sentinel */
1176 /* ===================================================================== */
1177 /* Getters and setters of BZ2File. */
1179 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1180 static PyObject *
1181 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1183 switch (self->f_newlinetypes) {
1184 case NEWLINE_UNKNOWN:
1185 Py_INCREF(Py_None);
1186 return Py_None;
1187 case NEWLINE_CR:
1188 return PyString_FromString("\r");
1189 case NEWLINE_LF:
1190 return PyString_FromString("\n");
1191 case NEWLINE_CR|NEWLINE_LF:
1192 return Py_BuildValue("(ss)", "\r", "\n");
1193 case NEWLINE_CRLF:
1194 return PyString_FromString("\r\n");
1195 case NEWLINE_CR|NEWLINE_CRLF:
1196 return Py_BuildValue("(ss)", "\r", "\r\n");
1197 case NEWLINE_LF|NEWLINE_CRLF:
1198 return Py_BuildValue("(ss)", "\n", "\r\n");
1199 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1200 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1201 default:
1202 PyErr_Format(PyExc_SystemError,
1203 "Unknown newlines value 0x%x\n",
1204 self->f_newlinetypes);
1205 return NULL;
1209 static PyObject *
1210 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1212 return PyInt_FromLong(self->mode == MODE_CLOSED);
1215 static PyObject *
1216 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1218 return PyObject_GetAttrString(self->file, "mode");
1221 static PyObject *
1222 BZ2File_get_name(BZ2FileObject *self, void *closure)
1224 return PyObject_GetAttrString(self->file, "name");
1227 static PyGetSetDef BZ2File_getset[] = {
1228 {"closed", (getter)BZ2File_get_closed, NULL,
1229 "True if the file is closed"},
1230 {"newlines", (getter)BZ2File_get_newlines, NULL,
1231 "end-of-line convention used in this file"},
1232 {"mode", (getter)BZ2File_get_mode, NULL,
1233 "file mode ('r', 'w', or 'U')"},
1234 {"name", (getter)BZ2File_get_name, NULL,
1235 "file name"},
1236 {NULL} /* Sentinel */
1240 /* ===================================================================== */
1241 /* Members of BZ2File_Type. */
1243 #undef OFF
1244 #define OFF(x) offsetof(BZ2FileObject, x)
1246 static PyMemberDef BZ2File_members[] = {
1247 {"softspace", T_INT, OFF(f_softspace), 0,
1248 "flag indicating that a space needs to be printed; used by print"},
1249 {NULL} /* Sentinel */
1252 /* ===================================================================== */
1253 /* Slot definitions for BZ2File_Type. */
1255 static int
1256 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1258 static char *kwlist[] = {"filename", "mode", "buffering",
1259 "compresslevel", 0};
1260 PyObject *name;
1261 char *mode = "r";
1262 int buffering = -1;
1263 int compresslevel = 9;
1264 int bzerror;
1265 int mode_char = 0;
1267 self->size = -1;
1269 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1270 kwlist, &name, &mode, &buffering,
1271 &compresslevel))
1272 return -1;
1274 if (compresslevel < 1 || compresslevel > 9) {
1275 PyErr_SetString(PyExc_ValueError,
1276 "compresslevel must be between 1 and 9");
1277 return -1;
1280 for (;;) {
1281 int error = 0;
1282 switch (*mode) {
1283 case 'r':
1284 case 'w':
1285 if (mode_char)
1286 error = 1;
1287 mode_char = *mode;
1288 break;
1290 case 'b':
1291 break;
1293 case 'U':
1294 self->f_univ_newline = 1;
1295 break;
1297 default:
1298 error = 1;
1299 break;
1301 if (error) {
1302 PyErr_Format(PyExc_ValueError,
1303 "invalid mode char %c", *mode);
1304 return -1;
1306 mode++;
1307 if (*mode == '\0')
1308 break;
1311 mode = (mode_char == 'r') ? "rb" : "wb";
1313 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1314 name, mode, buffering);
1315 if (self->file == NULL)
1316 return -1;
1318 /* From now on, we have stuff to dealloc, so jump to error label
1319 * instead of returning */
1321 #ifdef WITH_THREAD
1322 self->lock = PyThread_allocate_lock();
1323 if (!self->lock)
1324 goto error;
1325 #endif
1327 if (mode_char == 'r')
1328 self->fp = BZ2_bzReadOpen(&bzerror,
1329 PyFile_AsFile(self->file),
1330 0, 0, NULL, 0);
1331 else
1332 self->fp = BZ2_bzWriteOpen(&bzerror,
1333 PyFile_AsFile(self->file),
1334 compresslevel, 0, 0);
1336 if (bzerror != BZ_OK) {
1337 Util_CatchBZ2Error(bzerror);
1338 goto error;
1341 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1343 return 0;
1345 error:
1346 Py_DECREF(self->file);
1347 #ifdef WITH_THREAD
1348 if (self->lock)
1349 PyThread_free_lock(self->lock);
1350 #endif
1351 return -1;
1354 static void
1355 BZ2File_dealloc(BZ2FileObject *self)
1357 int bzerror;
1358 #ifdef WITH_THREAD
1359 if (self->lock)
1360 PyThread_free_lock(self->lock);
1361 #endif
1362 switch (self->mode) {
1363 case MODE_READ:
1364 case MODE_READ_EOF:
1365 BZ2_bzReadClose(&bzerror, self->fp);
1366 break;
1367 case MODE_WRITE:
1368 BZ2_bzWriteClose(&bzerror, self->fp,
1369 0, NULL, NULL);
1370 break;
1372 Util_DropReadAhead(self);
1373 Py_XDECREF(self->file);
1374 self->ob_type->tp_free((PyObject *)self);
1377 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1378 static PyObject *
1379 BZ2File_getiter(BZ2FileObject *self)
1381 if (self->mode == MODE_CLOSED) {
1382 PyErr_SetString(PyExc_ValueError,
1383 "I/O operation on closed file");
1384 return NULL;
1386 Py_INCREF((PyObject*)self);
1387 return (PyObject *)self;
1390 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1391 #define READAHEAD_BUFSIZE 8192
1392 static PyObject *
1393 BZ2File_iternext(BZ2FileObject *self)
1395 PyStringObject* ret;
1396 ACQUIRE_LOCK(self);
1397 if (self->mode == MODE_CLOSED) {
1398 PyErr_SetString(PyExc_ValueError,
1399 "I/O operation on closed file");
1400 return NULL;
1402 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1403 RELEASE_LOCK(self);
1404 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1405 Py_XDECREF(ret);
1406 return NULL;
1408 return (PyObject *)ret;
1411 /* ===================================================================== */
1412 /* BZ2File_Type definition. */
1414 PyDoc_VAR(BZ2File__doc__) =
1415 PyDoc_STR(
1416 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1418 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1419 writing. When opened for writing, the file will be created if it doesn't\n\
1420 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1421 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1422 is given, must be a number between 1 and 9.\n\
1424 PyDoc_STR(
1425 "\n\
1426 Add a 'U' to mode to open the file for input with universal newline\n\
1427 support. Any line ending in the input file will be seen as a '\\n' in\n\
1428 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1429 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1430 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1431 newlines are available only when reading.\n\
1435 static PyTypeObject BZ2File_Type = {
1436 PyObject_HEAD_INIT(NULL)
1437 0, /*ob_size*/
1438 "bz2.BZ2File", /*tp_name*/
1439 sizeof(BZ2FileObject), /*tp_basicsize*/
1440 0, /*tp_itemsize*/
1441 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1442 0, /*tp_print*/
1443 0, /*tp_getattr*/
1444 0, /*tp_setattr*/
1445 0, /*tp_compare*/
1446 0, /*tp_repr*/
1447 0, /*tp_as_number*/
1448 0, /*tp_as_sequence*/
1449 0, /*tp_as_mapping*/
1450 0, /*tp_hash*/
1451 0, /*tp_call*/
1452 0, /*tp_str*/
1453 PyObject_GenericGetAttr,/*tp_getattro*/
1454 PyObject_GenericSetAttr,/*tp_setattro*/
1455 0, /*tp_as_buffer*/
1456 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1457 BZ2File__doc__, /*tp_doc*/
1458 0, /*tp_traverse*/
1459 0, /*tp_clear*/
1460 0, /*tp_richcompare*/
1461 0, /*tp_weaklistoffset*/
1462 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1463 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1464 BZ2File_methods, /*tp_methods*/
1465 BZ2File_members, /*tp_members*/
1466 BZ2File_getset, /*tp_getset*/
1467 0, /*tp_base*/
1468 0, /*tp_dict*/
1469 0, /*tp_descr_get*/
1470 0, /*tp_descr_set*/
1471 0, /*tp_dictoffset*/
1472 (initproc)BZ2File_init, /*tp_init*/
1473 PyType_GenericAlloc, /*tp_alloc*/
1474 PyType_GenericNew, /*tp_new*/
1475 _PyObject_Del, /*tp_free*/
1476 0, /*tp_is_gc*/
1480 /* ===================================================================== */
1481 /* Methods of BZ2Comp. */
1483 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1484 "compress(data) -> string\n\
1486 Provide more data to the compressor object. It will return chunks of\n\
1487 compressed data whenever possible. When you've finished providing data\n\
1488 to compress, call the flush() method to finish the compression process,\n\
1489 and return what is left in the internal buffers.\n\
1492 static PyObject *
1493 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1495 char *data;
1496 int datasize;
1497 int bufsize = SMALLCHUNK;
1498 PY_LONG_LONG totalout;
1499 PyObject *ret = NULL;
1500 bz_stream *bzs = &self->bzs;
1501 int bzerror;
1503 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1504 return NULL;
1506 if (datasize == 0)
1507 return PyString_FromString("");
1509 ACQUIRE_LOCK(self);
1510 if (!self->running) {
1511 PyErr_SetString(PyExc_ValueError,
1512 "this object was already flushed");
1513 goto error;
1516 ret = PyString_FromStringAndSize(NULL, bufsize);
1517 if (!ret)
1518 goto error;
1520 bzs->next_in = data;
1521 bzs->avail_in = datasize;
1522 bzs->next_out = BUF(ret);
1523 bzs->avail_out = bufsize;
1525 totalout = BZS_TOTAL_OUT(bzs);
1527 for (;;) {
1528 Py_BEGIN_ALLOW_THREADS
1529 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1530 Py_END_ALLOW_THREADS
1531 if (bzerror != BZ_RUN_OK) {
1532 Util_CatchBZ2Error(bzerror);
1533 goto error;
1535 if (bzs->avail_out == 0) {
1536 bufsize = Util_NewBufferSize(bufsize);
1537 if (_PyString_Resize(&ret, bufsize) < 0) {
1538 BZ2_bzCompressEnd(bzs);
1539 goto error;
1541 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1542 - totalout);
1543 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1544 } else if (bzs->avail_in == 0) {
1545 break;
1549 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1551 RELEASE_LOCK(self);
1552 return ret;
1554 error:
1555 RELEASE_LOCK(self);
1556 Py_XDECREF(ret);
1557 return NULL;
1560 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1561 "flush() -> string\n\
1563 Finish the compression process and return what is left in internal buffers.\n\
1564 You must not use the compressor object after calling this method.\n\
1567 static PyObject *
1568 BZ2Comp_flush(BZ2CompObject *self)
1570 int bufsize = SMALLCHUNK;
1571 PyObject *ret = NULL;
1572 bz_stream *bzs = &self->bzs;
1573 PY_LONG_LONG totalout;
1574 int bzerror;
1576 ACQUIRE_LOCK(self);
1577 if (!self->running) {
1578 PyErr_SetString(PyExc_ValueError, "object was already "
1579 "flushed");
1580 goto error;
1582 self->running = 0;
1584 ret = PyString_FromStringAndSize(NULL, bufsize);
1585 if (!ret)
1586 goto error;
1588 bzs->next_out = BUF(ret);
1589 bzs->avail_out = bufsize;
1591 totalout = BZS_TOTAL_OUT(bzs);
1593 for (;;) {
1594 Py_BEGIN_ALLOW_THREADS
1595 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1596 Py_END_ALLOW_THREADS
1597 if (bzerror == BZ_STREAM_END) {
1598 break;
1599 } else if (bzerror != BZ_FINISH_OK) {
1600 Util_CatchBZ2Error(bzerror);
1601 goto error;
1603 if (bzs->avail_out == 0) {
1604 bufsize = Util_NewBufferSize(bufsize);
1605 if (_PyString_Resize(&ret, bufsize) < 0)
1606 goto error;
1607 bzs->next_out = BUF(ret);
1608 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1609 - totalout);
1610 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1614 if (bzs->avail_out != 0)
1615 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1617 RELEASE_LOCK(self);
1618 return ret;
1620 error:
1621 RELEASE_LOCK(self);
1622 Py_XDECREF(ret);
1623 return NULL;
1626 static PyMethodDef BZ2Comp_methods[] = {
1627 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1628 BZ2Comp_compress__doc__},
1629 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1630 BZ2Comp_flush__doc__},
1631 {NULL, NULL} /* sentinel */
1635 /* ===================================================================== */
1636 /* Slot definitions for BZ2Comp_Type. */
1638 static int
1639 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1641 int compresslevel = 9;
1642 int bzerror;
1643 static char *kwlist[] = {"compresslevel", 0};
1645 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1646 kwlist, &compresslevel))
1647 return -1;
1649 if (compresslevel < 1 || compresslevel > 9) {
1650 PyErr_SetString(PyExc_ValueError,
1651 "compresslevel must be between 1 and 9");
1652 goto error;
1655 #ifdef WITH_THREAD
1656 self->lock = PyThread_allocate_lock();
1657 if (!self->lock)
1658 goto error;
1659 #endif
1661 memset(&self->bzs, 0, sizeof(bz_stream));
1662 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1663 if (bzerror != BZ_OK) {
1664 Util_CatchBZ2Error(bzerror);
1665 goto error;
1668 self->running = 1;
1670 return 0;
1671 error:
1672 #ifdef WITH_THREAD
1673 if (self->lock)
1674 PyThread_free_lock(self->lock);
1675 #endif
1676 return -1;
1679 static void
1680 BZ2Comp_dealloc(BZ2CompObject *self)
1682 #ifdef WITH_THREAD
1683 if (self->lock)
1684 PyThread_free_lock(self->lock);
1685 #endif
1686 BZ2_bzCompressEnd(&self->bzs);
1687 self->ob_type->tp_free((PyObject *)self);
1691 /* ===================================================================== */
1692 /* BZ2Comp_Type definition. */
1694 PyDoc_STRVAR(BZ2Comp__doc__,
1695 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1697 Create a new compressor object. This object may be used to compress\n\
1698 data sequentially. If you want to compress data in one shot, use the\n\
1699 compress() function instead. The compresslevel parameter, if given,\n\
1700 must be a number between 1 and 9.\n\
1703 static PyTypeObject BZ2Comp_Type = {
1704 PyObject_HEAD_INIT(NULL)
1705 0, /*ob_size*/
1706 "bz2.BZ2Compressor", /*tp_name*/
1707 sizeof(BZ2CompObject), /*tp_basicsize*/
1708 0, /*tp_itemsize*/
1709 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1710 0, /*tp_print*/
1711 0, /*tp_getattr*/
1712 0, /*tp_setattr*/
1713 0, /*tp_compare*/
1714 0, /*tp_repr*/
1715 0, /*tp_as_number*/
1716 0, /*tp_as_sequence*/
1717 0, /*tp_as_mapping*/
1718 0, /*tp_hash*/
1719 0, /*tp_call*/
1720 0, /*tp_str*/
1721 PyObject_GenericGetAttr,/*tp_getattro*/
1722 PyObject_GenericSetAttr,/*tp_setattro*/
1723 0, /*tp_as_buffer*/
1724 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1725 BZ2Comp__doc__, /*tp_doc*/
1726 0, /*tp_traverse*/
1727 0, /*tp_clear*/
1728 0, /*tp_richcompare*/
1729 0, /*tp_weaklistoffset*/
1730 0, /*tp_iter*/
1731 0, /*tp_iternext*/
1732 BZ2Comp_methods, /*tp_methods*/
1733 0, /*tp_members*/
1734 0, /*tp_getset*/
1735 0, /*tp_base*/
1736 0, /*tp_dict*/
1737 0, /*tp_descr_get*/
1738 0, /*tp_descr_set*/
1739 0, /*tp_dictoffset*/
1740 (initproc)BZ2Comp_init, /*tp_init*/
1741 PyType_GenericAlloc, /*tp_alloc*/
1742 PyType_GenericNew, /*tp_new*/
1743 _PyObject_Del, /*tp_free*/
1744 0, /*tp_is_gc*/
1748 /* ===================================================================== */
1749 /* Members of BZ2Decomp. */
1751 #undef OFF
1752 #define OFF(x) offsetof(BZ2DecompObject, x)
1754 static PyMemberDef BZ2Decomp_members[] = {
1755 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1756 {NULL} /* Sentinel */
1760 /* ===================================================================== */
1761 /* Methods of BZ2Decomp. */
1763 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1764 "decompress(data) -> string\n\
1766 Provide more data to the decompressor object. It will return chunks\n\
1767 of decompressed data whenever possible. If you try to decompress data\n\
1768 after the end of stream is found, EOFError will be raised. If any data\n\
1769 was found after the end of stream, it'll be ignored and saved in\n\
1770 unused_data attribute.\n\
1773 static PyObject *
1774 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1776 char *data;
1777 int datasize;
1778 int bufsize = SMALLCHUNK;
1779 PY_LONG_LONG totalout;
1780 PyObject *ret = NULL;
1781 bz_stream *bzs = &self->bzs;
1782 int bzerror;
1784 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1785 return NULL;
1787 ACQUIRE_LOCK(self);
1788 if (!self->running) {
1789 PyErr_SetString(PyExc_EOFError, "end of stream was "
1790 "already found");
1791 goto error;
1794 ret = PyString_FromStringAndSize(NULL, bufsize);
1795 if (!ret)
1796 goto error;
1798 bzs->next_in = data;
1799 bzs->avail_in = datasize;
1800 bzs->next_out = BUF(ret);
1801 bzs->avail_out = bufsize;
1803 totalout = BZS_TOTAL_OUT(bzs);
1805 for (;;) {
1806 Py_BEGIN_ALLOW_THREADS
1807 bzerror = BZ2_bzDecompress(bzs);
1808 Py_END_ALLOW_THREADS
1809 if (bzerror == BZ_STREAM_END) {
1810 if (bzs->avail_in != 0) {
1811 Py_DECREF(self->unused_data);
1812 self->unused_data =
1813 PyString_FromStringAndSize(bzs->next_in,
1814 bzs->avail_in);
1816 self->running = 0;
1817 break;
1819 if (bzerror != BZ_OK) {
1820 Util_CatchBZ2Error(bzerror);
1821 goto error;
1823 if (bzs->avail_out == 0) {
1824 bufsize = Util_NewBufferSize(bufsize);
1825 if (_PyString_Resize(&ret, bufsize) < 0) {
1826 BZ2_bzDecompressEnd(bzs);
1827 goto error;
1829 bzs->next_out = BUF(ret);
1830 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1831 - totalout);
1832 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1833 } else if (bzs->avail_in == 0) {
1834 break;
1838 if (bzs->avail_out != 0)
1839 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1841 RELEASE_LOCK(self);
1842 return ret;
1844 error:
1845 RELEASE_LOCK(self);
1846 Py_XDECREF(ret);
1847 return NULL;
1850 static PyMethodDef BZ2Decomp_methods[] = {
1851 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1852 {NULL, NULL} /* sentinel */
1856 /* ===================================================================== */
1857 /* Slot definitions for BZ2Decomp_Type. */
1859 static int
1860 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1862 int bzerror;
1864 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1865 return -1;
1867 #ifdef WITH_THREAD
1868 self->lock = PyThread_allocate_lock();
1869 if (!self->lock)
1870 goto error;
1871 #endif
1873 self->unused_data = PyString_FromString("");
1874 if (!self->unused_data)
1875 goto error;
1877 memset(&self->bzs, 0, sizeof(bz_stream));
1878 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1879 if (bzerror != BZ_OK) {
1880 Util_CatchBZ2Error(bzerror);
1881 goto error;
1884 self->running = 1;
1886 return 0;
1888 error:
1889 #ifdef WITH_THREAD
1890 if (self->lock)
1891 PyThread_free_lock(self->lock);
1892 #endif
1893 Py_XDECREF(self->unused_data);
1894 return -1;
1897 static void
1898 BZ2Decomp_dealloc(BZ2DecompObject *self)
1900 #ifdef WITH_THREAD
1901 if (self->lock)
1902 PyThread_free_lock(self->lock);
1903 #endif
1904 Py_XDECREF(self->unused_data);
1905 BZ2_bzDecompressEnd(&self->bzs);
1906 self->ob_type->tp_free((PyObject *)self);
1910 /* ===================================================================== */
1911 /* BZ2Decomp_Type definition. */
1913 PyDoc_STRVAR(BZ2Decomp__doc__,
1914 "BZ2Decompressor() -> decompressor object\n\
1916 Create a new decompressor object. This object may be used to decompress\n\
1917 data sequentially. If you want to decompress data in one shot, use the\n\
1918 decompress() function instead.\n\
1921 static PyTypeObject BZ2Decomp_Type = {
1922 PyObject_HEAD_INIT(NULL)
1923 0, /*ob_size*/
1924 "bz2.BZ2Decompressor", /*tp_name*/
1925 sizeof(BZ2DecompObject), /*tp_basicsize*/
1926 0, /*tp_itemsize*/
1927 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1928 0, /*tp_print*/
1929 0, /*tp_getattr*/
1930 0, /*tp_setattr*/
1931 0, /*tp_compare*/
1932 0, /*tp_repr*/
1933 0, /*tp_as_number*/
1934 0, /*tp_as_sequence*/
1935 0, /*tp_as_mapping*/
1936 0, /*tp_hash*/
1937 0, /*tp_call*/
1938 0, /*tp_str*/
1939 PyObject_GenericGetAttr,/*tp_getattro*/
1940 PyObject_GenericSetAttr,/*tp_setattro*/
1941 0, /*tp_as_buffer*/
1942 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1943 BZ2Decomp__doc__, /*tp_doc*/
1944 0, /*tp_traverse*/
1945 0, /*tp_clear*/
1946 0, /*tp_richcompare*/
1947 0, /*tp_weaklistoffset*/
1948 0, /*tp_iter*/
1949 0, /*tp_iternext*/
1950 BZ2Decomp_methods, /*tp_methods*/
1951 BZ2Decomp_members, /*tp_members*/
1952 0, /*tp_getset*/
1953 0, /*tp_base*/
1954 0, /*tp_dict*/
1955 0, /*tp_descr_get*/
1956 0, /*tp_descr_set*/
1957 0, /*tp_dictoffset*/
1958 (initproc)BZ2Decomp_init, /*tp_init*/
1959 PyType_GenericAlloc, /*tp_alloc*/
1960 PyType_GenericNew, /*tp_new*/
1961 _PyObject_Del, /*tp_free*/
1962 0, /*tp_is_gc*/
1966 /* ===================================================================== */
1967 /* Module functions. */
1969 PyDoc_STRVAR(bz2_compress__doc__,
1970 "compress(data [, compresslevel=9]) -> string\n\
1972 Compress data in one shot. If you want to compress data sequentially,\n\
1973 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1974 given, must be a number between 1 and 9.\n\
1977 static PyObject *
1978 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1980 int compresslevel=9;
1981 char *data;
1982 int datasize;
1983 int bufsize;
1984 PyObject *ret = NULL;
1985 bz_stream _bzs;
1986 bz_stream *bzs = &_bzs;
1987 int bzerror;
1988 static char *kwlist[] = {"data", "compresslevel", 0};
1990 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1991 kwlist, &data, &datasize,
1992 &compresslevel))
1993 return NULL;
1995 if (compresslevel < 1 || compresslevel > 9) {
1996 PyErr_SetString(PyExc_ValueError,
1997 "compresslevel must be between 1 and 9");
1998 return NULL;
2001 /* Conforming to bz2 manual, this is large enough to fit compressed
2002 * data in one shot. We will check it later anyway. */
2003 bufsize = datasize + (datasize/100+1) + 600;
2005 ret = PyString_FromStringAndSize(NULL, bufsize);
2006 if (!ret)
2007 return NULL;
2009 memset(bzs, 0, sizeof(bz_stream));
2011 bzs->next_in = data;
2012 bzs->avail_in = datasize;
2013 bzs->next_out = BUF(ret);
2014 bzs->avail_out = bufsize;
2016 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2017 if (bzerror != BZ_OK) {
2018 Util_CatchBZ2Error(bzerror);
2019 Py_DECREF(ret);
2020 return NULL;
2023 for (;;) {
2024 Py_BEGIN_ALLOW_THREADS
2025 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2026 Py_END_ALLOW_THREADS
2027 if (bzerror == BZ_STREAM_END) {
2028 break;
2029 } else if (bzerror != BZ_FINISH_OK) {
2030 BZ2_bzCompressEnd(bzs);
2031 Util_CatchBZ2Error(bzerror);
2032 Py_DECREF(ret);
2033 return NULL;
2035 if (bzs->avail_out == 0) {
2036 bufsize = Util_NewBufferSize(bufsize);
2037 if (_PyString_Resize(&ret, bufsize) < 0) {
2038 BZ2_bzCompressEnd(bzs);
2039 Py_DECREF(ret);
2040 return NULL;
2042 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2043 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2047 if (bzs->avail_out != 0)
2048 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2049 BZ2_bzCompressEnd(bzs);
2051 return ret;
2054 PyDoc_STRVAR(bz2_decompress__doc__,
2055 "decompress(data) -> decompressed data\n\
2057 Decompress data in one shot. If you want to decompress data sequentially,\n\
2058 use an instance of BZ2Decompressor instead.\n\
2061 static PyObject *
2062 bz2_decompress(PyObject *self, PyObject *args)
2064 char *data;
2065 int datasize;
2066 int bufsize = SMALLCHUNK;
2067 PyObject *ret;
2068 bz_stream _bzs;
2069 bz_stream *bzs = &_bzs;
2070 int bzerror;
2072 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2073 return NULL;
2075 if (datasize == 0)
2076 return PyString_FromString("");
2078 ret = PyString_FromStringAndSize(NULL, bufsize);
2079 if (!ret)
2080 return NULL;
2082 memset(bzs, 0, sizeof(bz_stream));
2084 bzs->next_in = data;
2085 bzs->avail_in = datasize;
2086 bzs->next_out = BUF(ret);
2087 bzs->avail_out = bufsize;
2089 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2090 if (bzerror != BZ_OK) {
2091 Util_CatchBZ2Error(bzerror);
2092 Py_DECREF(ret);
2093 return NULL;
2096 for (;;) {
2097 Py_BEGIN_ALLOW_THREADS
2098 bzerror = BZ2_bzDecompress(bzs);
2099 Py_END_ALLOW_THREADS
2100 if (bzerror == BZ_STREAM_END) {
2101 break;
2102 } else if (bzerror != BZ_OK) {
2103 BZ2_bzDecompressEnd(bzs);
2104 Util_CatchBZ2Error(bzerror);
2105 Py_DECREF(ret);
2106 return NULL;
2108 if (bzs->avail_out == 0) {
2109 bufsize = Util_NewBufferSize(bufsize);
2110 if (_PyString_Resize(&ret, bufsize) < 0) {
2111 BZ2_bzDecompressEnd(bzs);
2112 Py_DECREF(ret);
2113 return NULL;
2115 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2116 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2117 } else if (bzs->avail_in == 0) {
2118 BZ2_bzDecompressEnd(bzs);
2119 PyErr_SetString(PyExc_ValueError,
2120 "couldn't find end of stream");
2121 Py_DECREF(ret);
2122 return NULL;
2126 if (bzs->avail_out != 0)
2127 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2128 BZ2_bzDecompressEnd(bzs);
2130 return ret;
2133 static PyMethodDef bz2_methods[] = {
2134 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2135 bz2_compress__doc__},
2136 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2137 bz2_decompress__doc__},
2138 {NULL, NULL} /* sentinel */
2141 /* ===================================================================== */
2142 /* Initialization function. */
2144 PyDoc_STRVAR(bz2__doc__,
2145 "The python bz2 module provides a comprehensive interface for\n\
2146 the bz2 compression library. It implements a complete file\n\
2147 interface, one shot (de)compression functions, and types for\n\
2148 sequential (de)compression.\n\
2151 PyMODINIT_FUNC
2152 initbz2(void)
2154 PyObject *m;
2156 BZ2File_Type.ob_type = &PyType_Type;
2157 BZ2Comp_Type.ob_type = &PyType_Type;
2158 BZ2Decomp_Type.ob_type = &PyType_Type;
2160 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2162 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2164 Py_INCREF(&BZ2File_Type);
2165 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2167 Py_INCREF(&BZ2Comp_Type);
2168 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2170 Py_INCREF(&BZ2Decomp_Type);
2171 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);