Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  26
  27 #define MODE_CLOSED   0
  28 #define MODE_READ     1
  29 #define MODE_READ_EOF 2
  30 #define MODE_WRITE    3
  31
  32 #define BZ2FileObject_Check(v)  ((v)->ob_type == &BZ2File_Type)
  33
  34 #if SIZEOF_LONG >= 8
  35 #define BZS_TOTAL_OUT(bzs) \
  36         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  37 #elif SIZEOF_LONG_LONG >= 8
  38 #define BZS_TOTAL_OUT(bzs) \
  39         (((LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  40 #else
  41 #define BZS_TOTAL_OUT(bzs) \
  42         bzs->total_out_lo32;
  43 #endif
  44
  45 #ifdef WITH_THREAD
  46 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  47 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  48 #else
  49 #define ACQUIRE_LOCK(obj)
  50 #define RELEASE_LOCK(obj)
  51 #endif
  52
  53 #ifdef WITH_UNIVERSAL_NEWLINES
  54 /* Bits in f_newlinetypes */
  55 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  56 #define NEWLINE_CR 1            /* \r newline seen */
  57 #define NEWLINE_LF 2            /* \n newline seen */
  58 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  59 #endif
  60
  61 /* ===================================================================== */
  62 /* Structure definitions. */
  63
  64 typedef struct {
  65         PyObject_HEAD
  66         PyObject *file;
  67
  68         char* f_buf;            /* Allocated readahead buffer */
  69         char* f_bufend;         /* Points after last occupied position */
  70         char* f_bufptr;         /* Current buffer position */
  71
  72         int f_softspace;        /* Flag used by 'print' command */
  73
  74 #ifdef WITH_UNIVERSAL_NEWLINES
  75         int f_univ_newline;     /* Handle any newline convention */
  76         int f_newlinetypes;     /* Types of newlines seen */
  77         int f_skipnextlf;       /* Skip next \n */
  78 #endif
  79
  80         BZFILE *fp;
  81         int mode;
  82         long pos;
  83         long size;
  84 #ifdef WITH_THREAD
  85         PyThread_type_lock lock;
  86 #endif
  87 } BZ2FileObject;
  88
  89 typedef struct {
  90         PyObject_HEAD
  91         bz_stream bzs;
  92         int running;
  93 #ifdef WITH_THREAD
  94         PyThread_type_lock lock;
  95 #endif
  96 } BZ2CompObject;
  97
  98 typedef struct {
  99         PyObject_HEAD
 100         bz_stream bzs;
 101         int running;
 102         PyObject *unused_data;
 103 #ifdef WITH_THREAD
 104         PyThread_type_lock lock;
 105 #endif
 106 } BZ2DecompObject;
 107
 108 /* ===================================================================== */
 109 /* Utility functions. */
 110
 111 static int
 112 Util_CatchBZ2Error(int bzerror)
 113 {
 114         int ret = 0;
 115         switch(bzerror) {
 116                 case BZ_OK:
 117                 case BZ_STREAM_END:
 118                         break;
 119
 120                 case BZ_CONFIG_ERROR:
 121                         PyErr_SetString(PyExc_SystemError,
 122                                         "the bz2 library was not compiled "
 123                                         "correctly");
 124                         ret = 1;
 125                         break;
 126
 127                 case BZ_PARAM_ERROR:
 128                         PyErr_SetString(PyExc_ValueError,
 129                                         "the bz2 library has received wrong "
 130                                         "parameters");
 131                         ret = 1;
 132                         break;
 133
 134                 case BZ_MEM_ERROR:
 135                         PyErr_NoMemory();
 136                         ret = 1;
 137                         break;
 138
 139                 case BZ_DATA_ERROR:
 140                 case BZ_DATA_ERROR_MAGIC:
 141                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 142                         ret = 1;
 143                         break;
 144
 145                 case BZ_IO_ERROR:
 146                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 147                         ret = 1;
 148                         break;
 149
 150                 case BZ_UNEXPECTED_EOF:
 151                         PyErr_SetString(PyExc_EOFError,
 152                                         "compressed file ended before the "
 153                                         "logical end-of-stream was detected");
 154                         ret = 1;
 155                         break;
 156
 157                 case BZ_SEQUENCE_ERROR:
 158                         PyErr_SetString(PyExc_RuntimeError,
 159                                         "wrong sequence of bz2 library "
 160                                         "commands used");
 161                         ret = 1;
 162                         break;
 163         }
 164         return ret;
 165 }
 166
 167 #if BUFSIZ < 8192
 168 #define SMALLCHUNK 8192
 169 #else
 170 #define SMALLCHUNK BUFSIZ
 171 #endif
 172
 173 #if SIZEOF_INT < 4
 174 #define BIGCHUNK  (512 * 32)
 175 #else
 176 #define BIGCHUNK  (512 * 1024)
 177 #endif
 178
 179 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 180 static size_t
 181 Util_NewBufferSize(size_t currentsize)
 182 {
 183         if (currentsize > SMALLCHUNK) {
 184                 /* Keep doubling until we reach BIGCHUNK;
 185                    then keep adding BIGCHUNK. */
 186                 if (currentsize <= BIGCHUNK)
 187                         return currentsize + currentsize;
 188                 else
 189                         return currentsize + BIGCHUNK;
 190         }
 191         return currentsize + SMALLCHUNK;
 192 }
 193
 194 /* This is a hacked version of Python's fileobject.c:get_line(). */
 195 static PyObject *
 196 Util_GetLine(BZ2FileObject *f, int n)
 197 {
 198         char c;
 199         char *buf, *end;
 200         size_t total_v_size;    /* total # of slots in buffer */
 201         size_t used_v_size;     /* # used slots in buffer */
 202         size_t increment;       /* amount to increment the buffer */
 203         PyObject *v;
 204         int bzerror;
 205 #ifdef WITH_UNIVERSAL_NEWLINES
 206         int newlinetypes = f->f_newlinetypes;
 207         int skipnextlf = f->f_skipnextlf;
 208         int univ_newline = f->f_univ_newline;
 209 #endif
 210
 211         total_v_size = n > 0 ? n : 100;
 212         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 213         if (v == NULL)
 214                 return NULL;
 215
 216         buf = BUF(v);
 217         end = buf + total_v_size;
 218
 219         for (;;) {
 220                 Py_BEGIN_ALLOW_THREADS
 221 #ifdef WITH_UNIVERSAL_NEWLINES
 222                 if (univ_newline) {
 223                         while (1) {
 224                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 225                                 f->pos++;
 226                                 if (bzerror != BZ_OK || buf == end)
 227                                         break;
 228                                 if (skipnextlf) {
 229                                         skipnextlf = 0;
 230                                         if (c == '\n') {
 231                                                 /* Seeing a \n here with
 232                                                  * skipnextlf true means we
 233                                                  * saw a \r before.
 234                                                  */
 235                                                 newlinetypes |= NEWLINE_CRLF;
 236                                                 BZ2_bzRead(&bzerror, f->fp,
 237                                                            &c, 1);
 238                                                 if (bzerror != BZ_OK)
 239                                                         break;
 240                                         } else {
 241                                                 newlinetypes |= NEWLINE_CR;
 242                                         }
 243                                 }
 244                                 if (c == '\r') {
 245                                         skipnextlf = 1;
 246                                         c = '\n';
 247                                 } else if ( c == '\n')
 248                                         newlinetypes |= NEWLINE_LF;
 249                                 *buf++ = c;
 250                                 if (c == '\n') break;
 251                         }
 252                         if (bzerror == BZ_STREAM_END && skipnextlf)
 253                                 newlinetypes |= NEWLINE_CR;
 254                 } else /* If not universal newlines use the normal loop */
 255 #endif
 256                         do {
 257                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 258                                 f->pos++;
 259                                 *buf++ = c;
 260                         } while (bzerror == BZ_OK && c != '\n' && buf != end);
 261                 Py_END_ALLOW_THREADS
 262 #ifdef WITH_UNIVERSAL_NEWLINES
 263                 f->f_newlinetypes = newlinetypes;
 264                 f->f_skipnextlf = skipnextlf;
 265 #endif
 266                 if (bzerror == BZ_STREAM_END) {
 267                         f->size = f->pos;
 268                         f->mode = MODE_READ_EOF;
 269                         break;
 270                 } else if (bzerror != BZ_OK) {
 271                         Util_CatchBZ2Error(bzerror);
 272                         Py_DECREF(v);
 273                         return NULL;
 274                 }
 275                 if (c == '\n')
 276                         break;
 277                 /* Must be because buf == end */
 278                 if (n > 0)
 279                         break;
 280                 used_v_size = total_v_size;
 281                 increment = total_v_size >> 2; /* mild exponential growth */
 282                 total_v_size += increment;
 283                 if (total_v_size > INT_MAX) {
 284                         PyErr_SetString(PyExc_OverflowError,
 285                             "line is longer than a Python string can hold");
 286                         Py_DECREF(v);
 287                         return NULL;
 288                 }
 289                 if (_PyString_Resize(&v, total_v_size) < 0)
 290                         return NULL;
 291                 buf = BUF(v) + used_v_size;
 292                 end = BUF(v) + total_v_size;
 293         }
 294
 295         used_v_size = buf - BUF(v);
 296         if (used_v_size != total_v_size)
 297                 _PyString_Resize(&v, used_v_size);
 298         return v;
 299 }
 300
 301 #ifndef WITH_UNIVERSAL_NEWLINES
 302 #define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
 303 #else
 304 /* This is a hacked version of Python's
 305  * fileobject.c:Py_UniversalNewlineFread(). */
 306 size_t
 307 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 308                      char* buf, size_t n, BZ2FileObject *f)
 309 {
 310         char *dst = buf;
 311         int newlinetypes, skipnextlf;
 312
 313         assert(buf != NULL);
 314         assert(stream != NULL);
 315
 316         if (!f->f_univ_newline)
 317                 return BZ2_bzRead(bzerror, stream, buf, n);
 318
 319         newlinetypes = f->f_newlinetypes;
 320         skipnextlf = f->f_skipnextlf;
 321
 322         /* Invariant:  n is the number of bytes remaining to be filled
 323          * in the buffer.
 324          */
 325         while (n) {
 326                 size_t nread;
 327                 int shortread;
 328                 char *src = dst;
 329
 330                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 331                 assert(nread <= n);
 332                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 333                 shortread = n != 0;     /* true iff EOF or error */
 334                 while (nread--) {
 335                         char c = *src++;
 336                         if (c == '\r') {
 337                                 /* Save as LF and set flag to skip next LF. */
 338                                 *dst++ = '\n';
 339                                 skipnextlf = 1;
 340                         }
 341                         else if (skipnextlf && c == '\n') {
 342                                 /* Skip LF, and remember we saw CR LF. */
 343                                 skipnextlf = 0;
 344                                 newlinetypes |= NEWLINE_CRLF;
 345                                 ++n;
 346                         }
 347                         else {
 348                                 /* Normal char to be stored in buffer.  Also
 349                                  * update the newlinetypes flag if either this
 350                                  * is an LF or the previous char was a CR.
 351                                  */
 352                                 if (c == '\n')
 353                                         newlinetypes |= NEWLINE_LF;
 354                                 else if (skipnextlf)
 355                                         newlinetypes |= NEWLINE_CR;
 356                                 *dst++ = c;
 357                                 skipnextlf = 0;
 358                         }
 359                 }
 360                 if (shortread) {
 361                         /* If this is EOF, update type flags. */
 362                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 363                                 newlinetypes |= NEWLINE_CR;
 364                         break;
 365                 }
 366         }
 367         f->f_newlinetypes = newlinetypes;
 368         f->f_skipnextlf = skipnextlf;
 369         return dst - buf;
 370 }
 371 #endif
 372
 373 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 374 static void
 375 Util_DropReadAhead(BZ2FileObject *f)
 376 {
 377         if (f->f_buf != NULL) {
 378                 PyMem_Free(f->f_buf);
 379                 f->f_buf = NULL;
 380         }
 381 }
 382
 383 /* This is a hacked version of Python's fileobject.c:readahead(). */
 384 static int
 385 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 386 {
 387         int chunksize;
 388         int bzerror;
 389
 390         if (f->f_buf != NULL) {
 391                 if((f->f_bufend - f->f_bufptr) >= 1)
 392                         return 0;
 393                 else
 394                         Util_DropReadAhead(f);
 395         }
 396         if (f->mode == MODE_READ_EOF) {
 397                 return -1;
 398         }
 399         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 400                 return -1;
 401         }
 402         Py_BEGIN_ALLOW_THREADS
 403         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 404                                          bufsize, f);
 405         Py_END_ALLOW_THREADS
 406         f->pos += chunksize;
 407         if (bzerror == BZ_STREAM_END) {
 408                 f->size = f->pos;
 409                 f->mode = MODE_READ_EOF;
 410         } else if (bzerror != BZ_OK) {
 411                 Util_CatchBZ2Error(bzerror);
 412                 Util_DropReadAhead(f);
 413                 return -1;
 414         }
 415         f->f_bufptr = f->f_buf;
 416         f->f_bufend = f->f_buf + chunksize;
 417         return 0;
 418 }
 419
 420 /* This is a hacked version of Python's
 421  * fileobject.c:readahead_get_line_skip(). */
 422 static PyStringObject *
 423 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 424 {
 425         PyStringObject* s;
 426         char *bufptr;
 427         char *buf;
 428         int len;
 429
 430         if (f->f_buf == NULL)
 431                 if (Util_ReadAhead(f, bufsize) < 0)
 432                         return NULL;
 433
 434         len = f->f_bufend - f->f_bufptr;
 435         if (len == 0)
 436                 return (PyStringObject *)
 437                         PyString_FromStringAndSize(NULL, skip);
 438         bufptr = memchr(f->f_bufptr, '\n', len);
 439         if (bufptr != NULL) {
 440                 bufptr++;                       /* Count the '\n' */
 441                 len = bufptr - f->f_bufptr;
 442                 s = (PyStringObject *)
 443                         PyString_FromStringAndSize(NULL, skip+len);
 444                 if (s == NULL)
 445                         return NULL;
 446                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 447                 f->f_bufptr = bufptr;
 448                 if (bufptr == f->f_bufend)
 449                         Util_DropReadAhead(f);
 450         } else {
 451                 bufptr = f->f_bufptr;
 452                 buf = f->f_buf;
 453                 f->f_buf = NULL;        /* Force new readahead buffer */
 454                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 455                                               bufsize + (bufsize>>2));
 456                 if (s == NULL) {
 457                         PyMem_Free(buf);
 458                         return NULL;
 459                 }
 460                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 461                 PyMem_Free(buf);
 462         }
 463         return s;
 464 }
 465
 466 /* ===================================================================== */
 467 /* Methods of BZ2File. */
 468
 469 PyDoc_STRVAR(BZ2File_read__doc__,
 470 "read([size]) -> string\n\
 471 \n\
 472 Read at most size uncompressed bytes, returned as a string. If the size\n\
 473 argument is negative or omitted, read until EOF is reached.\n\
 474 ");
 475
 476 /* This is a hacked version of Python's fileobject.c:file_read(). */
 477 static PyObject *
 478 BZ2File_read(BZ2FileObject *self, PyObject *args)
 479 {
 480         long bytesrequested = -1;
 481         size_t bytesread, buffersize, chunksize;
 482         int bzerror;
 483         PyObject *ret = NULL;
 484
 485         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 486                 return NULL;
 487
 488         ACQUIRE_LOCK(self);
 489         switch (self->mode) {
 490                 case MODE_READ:
 491                         break;
 492                 case MODE_READ_EOF:
 493                         ret = PyString_FromString("");
 494                         goto cleanup;
 495                 case MODE_CLOSED:
 496                         PyErr_SetString(PyExc_ValueError,
 497                                         "I/O operation on closed file");
 498                         goto cleanup;
 499                 default:
 500                         PyErr_SetString(PyExc_IOError,
 501                                         "file is not ready for reading");
 502                         goto cleanup;
 503         }
 504
 505         if (bytesrequested < 0)
 506                 buffersize = Util_NewBufferSize((size_t)0);
 507         else
 508                 buffersize = bytesrequested;
 509         if (buffersize > INT_MAX) {
 510                 PyErr_SetString(PyExc_OverflowError,
 511                                 "requested number of bytes is "
 512                                 "more than a Python string can hold");
 513                 goto cleanup;
 514         }
 515         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 516         if (ret == NULL)
 517                 goto cleanup;
 518         bytesread = 0;
 519
 520         for (;;) {
 521                 Py_BEGIN_ALLOW_THREADS
 522                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 523                                                  BUF(ret)+bytesread,
 524                                                  buffersize-bytesread,
 525                                                  self);
 526                 self->pos += chunksize;
 527                 Py_END_ALLOW_THREADS
 528                 bytesread += chunksize;
 529                 if (bzerror == BZ_STREAM_END) {
 530                         self->size = self->pos;
 531                         self->mode = MODE_READ_EOF;
 532                         break;
 533                 } else if (bzerror != BZ_OK) {
 534                         Util_CatchBZ2Error(bzerror);
 535                         Py_DECREF(ret);
 536                         ret = NULL;
 537                         goto cleanup;
 538                 }
 539                 if (bytesrequested < 0) {
 540                         buffersize = Util_NewBufferSize(buffersize);
 541                         if (_PyString_Resize(&ret, buffersize) < 0)
 542                                 goto cleanup;
 543                 } else {
 544                         break;
 545                 }
 546         }
 547         if (bytesread != buffersize)
 548                 _PyString_Resize(&ret, bytesread);
 549
 550 cleanup:
 551         RELEASE_LOCK(self);
 552         return ret;
 553 }
 554
 555 PyDoc_STRVAR(BZ2File_readline__doc__,
 556 "readline([size]) -> string\n\
 557 \n\
 558 Return the next line from the file, as a string, retaining newline.\n\
 559 A non-negative size argument will limit the maximum number of bytes to\n\
 560 return (an incomplete line may be returned then). Return an empty\n\
 561 string at EOF.\n\
 562 ");
 563
 564 static PyObject *
 565 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 566 {
 567         PyObject *ret = NULL;
 568         int sizehint = -1;
 569
 570         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 571                 return NULL;
 572
 573         ACQUIRE_LOCK(self);
 574         switch (self->mode) {
 575                 case MODE_READ:
 576                         break;
 577                 case MODE_READ_EOF:
 578                         ret = PyString_FromString("");
 579                         goto cleanup;
 580                 case MODE_CLOSED:
 581                         PyErr_SetString(PyExc_ValueError,
 582                                         "I/O operation on closed file");
 583                         goto cleanup;
 584                 default:
 585                         PyErr_SetString(PyExc_IOError,
 586                                         "file is not ready for reading");
 587                         goto cleanup;
 588         }
 589
 590         if (sizehint == 0)
 591                 ret = PyString_FromString("");
 592         else
 593                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 594
 595 cleanup:
 596         RELEASE_LOCK(self);
 597         return ret;
 598 }
 599
 600 PyDoc_STRVAR(BZ2File_readlines__doc__,
 601 "readlines([size]) -> list\n\
 602 \n\
 603 Call readline() repeatedly and return a list of lines read.\n\
 604 The optional size argument, if given, is an approximate bound on the\n\
 605 total number of bytes in the lines returned.\n\
 606 ");
 607
 608 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 609 static PyObject *
 610 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 611 {
 612         long sizehint = 0;
 613         PyObject *list = NULL;
 614         PyObject *line;
 615         char small_buffer[SMALLCHUNK];
 616         char *buffer = small_buffer;
 617         size_t buffersize = SMALLCHUNK;
 618         PyObject *big_buffer = NULL;
 619         size_t nfilled = 0;
 620         size_t nread;
 621         size_t totalread = 0;
 622         char *p, *q, *end;
 623         int err;
 624         int shortread = 0;
 625         int bzerror;
 626
 627         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 628                 return NULL;
 629
 630         ACQUIRE_LOCK(self);
 631         switch (self->mode) {
 632                 case MODE_READ:
 633                         break;
 634                 case MODE_READ_EOF:
 635                         list = PyList_New(0);
 636                         goto cleanup;
 637                 case MODE_CLOSED:
 638                         PyErr_SetString(PyExc_ValueError,
 639                                         "I/O operation on closed file");
 640                         goto cleanup;
 641                 default:
 642                         PyErr_SetString(PyExc_IOError,
 643                                         "file is not ready for reading");
 644                         goto cleanup;
 645         }
 646
 647         if ((list = PyList_New(0)) == NULL)
 648                 goto cleanup;
 649
 650         for (;;) {
 651                 Py_BEGIN_ALLOW_THREADS
 652                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 653                                              buffer+nfilled,
 654                                              buffersize-nfilled, self);
 655                 self->pos += nread;
 656                 Py_END_ALLOW_THREADS
 657                 if (bzerror == BZ_STREAM_END) {
 658                         self->size = self->pos;
 659                         self->mode = MODE_READ_EOF;
 660                         if (nread == 0) {
 661                                 sizehint = 0;
 662                                 break;
 663                         }
 664                         shortread = 1;
 665                 } else if (bzerror != BZ_OK) {
 666                         Util_CatchBZ2Error(bzerror);
 667                   error:
 668                         Py_DECREF(list);
 669                         list = NULL;
 670                         goto cleanup;
 671                 }
 672                 totalread += nread;
 673                 p = memchr(buffer+nfilled, '\n', nread);
 674                 if (p == NULL) {
 675                         /* Need a larger buffer to fit this line */
 676                         nfilled += nread;
 677                         buffersize *= 2;
 678                         if (buffersize > INT_MAX) {
 679                                 PyErr_SetString(PyExc_OverflowError,
 680                             "line is longer than a Python string can hold");
 681                                 goto error;
 682                         }
 683                         if (big_buffer == NULL) {
 684                                 /* Create the big buffer */
 685                                 big_buffer = PyString_FromStringAndSize(
 686                                         NULL, buffersize);
 687                                 if (big_buffer == NULL)
 688                                         goto error;
 689                                 buffer = PyString_AS_STRING(big_buffer);
 690                                 memcpy(buffer, small_buffer, nfilled);
 691                         }
 692                         else {
 693                                 /* Grow the big buffer */
 694                                 _PyString_Resize(&big_buffer, buffersize);
 695                                 buffer = PyString_AS_STRING(big_buffer);
 696                         }
 697                         continue;
 698                 }
 699                 end = buffer+nfilled+nread;
 700                 q = buffer;
 701                 do {
 702                         /* Process complete lines */
 703                         p++;
 704                         line = PyString_FromStringAndSize(q, p-q);
 705                         if (line == NULL)
 706                                 goto error;
 707                         err = PyList_Append(list, line);
 708                         Py_DECREF(line);
 709                         if (err != 0)
 710                                 goto error;
 711                         q = p;
 712                         p = memchr(q, '\n', end-q);
 713                 } while (p != NULL);
 714                 /* Move the remaining incomplete line to the start */
 715                 nfilled = end-q;
 716                 memmove(buffer, q, nfilled);
 717                 if (sizehint > 0)
 718                         if (totalread >= (size_t)sizehint)
 719                                 break;
 720                 if (shortread) {
 721                         sizehint = 0;
 722                         break;
 723                 }
 724         }
 725         if (nfilled != 0) {
 726                 /* Partial last line */
 727                 line = PyString_FromStringAndSize(buffer, nfilled);
 728                 if (line == NULL)
 729                         goto error;
 730                 if (sizehint > 0) {
 731                         /* Need to complete the last line */
 732                         PyObject *rest = Util_GetLine(self, 0);
 733                         if (rest == NULL) {
 734                                 Py_DECREF(line);
 735                                 goto error;
 736                         }
 737                         PyString_Concat(&line, rest);
 738                         Py_DECREF(rest);
 739                         if (line == NULL)
 740                                 goto error;
 741                 }
 742                 err = PyList_Append(list, line);
 743                 Py_DECREF(line);
 744                 if (err != 0)
 745                         goto error;
 746         }
 747
 748   cleanup:
 749         RELEASE_LOCK(self);
 750         if (big_buffer) {
 751                 Py_DECREF(big_buffer);
 752         }
 753         return list;
 754 }
 755
 756 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 757 "xreadlines() -> self\n\
 758 \n\
 759 For backward compatibility. BZ2File objects now include the performance\n\
 760 optimizations previously implemented in the xreadlines module.\n\
 761 ");
 762
 763 PyDoc_STRVAR(BZ2File_write__doc__,
 764 "write(data) -> None\n\
 765 \n\
 766 Write the 'data' string to file. Note that due to buffering, close() may\n\
 767 be needed before the file on disk reflects the data written.\n\
 768 ");
 769
 770 /* This is a hacked version of Python's fileobject.c:file_write(). */
 771 static PyObject *
 772 BZ2File_write(BZ2FileObject *self, PyObject *args)
 773 {
 774         PyObject *ret = NULL;
 775         char *buf;
 776         int len;
 777         int bzerror;
 778
 779         if (!PyArg_ParseTuple(args, "s#", &buf, &len))
 780                 return NULL;
 781
 782         ACQUIRE_LOCK(self);
 783         switch (self->mode) {
 784                 case MODE_WRITE:
 785                         break;
 786
 787                 case MODE_CLOSED:
 788                         PyErr_SetString(PyExc_ValueError,
 789                                         "I/O operation on closed file");
 790                         goto cleanup;;
 791
 792                 default:
 793                         PyErr_SetString(PyExc_IOError,
 794                                         "file is not ready for writing");
 795                         goto cleanup;;
 796         }
 797
 798         self->f_softspace = 0;
 799
 800         Py_BEGIN_ALLOW_THREADS
 801         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 802         self->pos += len;
 803         Py_END_ALLOW_THREADS
 804
 805         if (bzerror != BZ_OK) {
 806                 Util_CatchBZ2Error(bzerror);
 807                 goto cleanup;
 808         }
 809
 810         Py_INCREF(Py_None);
 811         ret = Py_None;
 812
 813 cleanup:
 814         RELEASE_LOCK(self);
 815         return ret;
 816 }
 817
 818 PyDoc_STRVAR(BZ2File_writelines__doc__,
 819 "writelines(sequence_of_strings) -> None\n\
 820 \n\
 821 Write the sequence of strings to the file. Note that newlines are not\n\
 822 added. The sequence can be any iterable object producing strings. This is\n\
 823 equivalent to calling write() for each string.\n\
 824 ");
 825
 826 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 827 static PyObject *
 828 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 829 {
 830 #define CHUNKSIZE 1000
 831         PyObject *list = NULL;
 832         PyObject *iter = NULL;
 833         PyObject *ret = NULL;
 834         PyObject *line;
 835         int i, j, index, len, islist;
 836         int bzerror;
 837
 838         ACQUIRE_LOCK(self);
 839         islist = PyList_Check(seq);
 840         if  (!islist) {
 841                 iter = PyObject_GetIter(seq);
 842                 if (iter == NULL) {
 843                         PyErr_SetString(PyExc_TypeError,
 844                                 "writelines() requires an iterable argument");
 845                         goto error;
 846                 }
 847                 list = PyList_New(CHUNKSIZE);
 848                 if (list == NULL)
 849                         goto error;
 850         }
 851
 852         /* Strategy: slurp CHUNKSIZE lines into a private list,
 853            checking that they are all strings, then write that list
 854            without holding the interpreter lock, then come back for more. */
 855         for (index = 0; ; index += CHUNKSIZE) {
 856                 if (islist) {
 857                         Py_XDECREF(list);
 858                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 859                         if (list == NULL)
 860                                 goto error;
 861                         j = PyList_GET_SIZE(list);
 862                 }
 863                 else {
 864                         for (j = 0; j < CHUNKSIZE; j++) {
 865                                 line = PyIter_Next(iter);
 866                                 if (line == NULL) {
 867                                         if (PyErr_Occurred())
 868                                                 goto error;
 869                                         break;
 870                                 }
 871                                 PyList_SetItem(list, j, line);
 872                         }
 873                 }
 874                 if (j == 0)
 875                         break;
 876
 877                 /* Check that all entries are indeed strings. If not,
 878                    apply the same rules as for file.write() and
 879                    convert the rets to strings. This is slow, but
 880                    seems to be the only way since all conversion APIs
 881                    could potentially execute Python code. */
 882                 for (i = 0; i < j; i++) {
 883                         PyObject *v = PyList_GET_ITEM(list, i);
 884                         if (!PyString_Check(v)) {
 885                                 const char *buffer;
 886                                 int len;
 887                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 888                                         PyErr_SetString(PyExc_TypeError,
 889                                                         "writelines() "
 890                                                         "argument must be "
 891                                                         "a sequence of "
 892                                                         "strings");
 893                                         goto error;
 894                                 }
 895                                 line = PyString_FromStringAndSize(buffer,
 896                                                                   len);
 897                                 if (line == NULL)
 898                                         goto error;
 899                                 Py_DECREF(v);
 900                                 PyList_SET_ITEM(list, i, line);
 901                         }
 902                 }
 903
 904                 self->f_softspace = 0;
 905
 906                 /* Since we are releasing the global lock, the
 907                    following code may *not* execute Python code. */
 908                 Py_BEGIN_ALLOW_THREADS
 909                 for (i = 0; i < j; i++) {
 910                         line = PyList_GET_ITEM(list, i);
 911                         len = PyString_GET_SIZE(line);
 912                         BZ2_bzWrite (&bzerror, self->fp,
 913                                      PyString_AS_STRING(line), len);
 914                         if (bzerror != BZ_OK) {
 915                                 Py_BLOCK_THREADS
 916                                 Util_CatchBZ2Error(bzerror);
 917                                 goto error;
 918                         }
 919                 }
 920                 Py_END_ALLOW_THREADS
 921
 922                 if (j < CHUNKSIZE)
 923                         break;
 924         }
 925
 926         Py_INCREF(Py_None);
 927         ret = Py_None;
 928
 929   error:
 930         RELEASE_LOCK(self);
 931         Py_XDECREF(list);
 932         Py_XDECREF(iter);
 933         return ret;
 934 #undef CHUNKSIZE
 935 }
 936
 937 PyDoc_STRVAR(BZ2File_seek__doc__,
 938 "seek(offset [, whence]) -> None\n\
 939 \n\
 940 Move to new file position. Argument offset is a byte count. Optional\n\
 941 argument whence defaults to 0 (offset from start of file, offset\n\
 942 should be >= 0); other values are 1 (move relative to current position,\n\
 943 positive or negative), and 2 (move relative to end of file, usually\n\
 944 negative, although many platforms allow seeking beyond the end of a file).\n\
 945 \n\
 946 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 947 the operation may be extremely slow.\n\
 948 ");
 949
 950 static PyObject *
 951 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 952 {
 953         int where = 0;
 954         long offset;
 955         char small_buffer[SMALLCHUNK];
 956         char *buffer = small_buffer;
 957         size_t buffersize = SMALLCHUNK;
 958         int bytesread = 0;
 959         int readsize;
 960         int chunksize;
 961         int bzerror;
 962         int rewind = 0;
 963         PyObject *ret = NULL;
 964
 965         if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
 966                 return NULL;
 967
 968         ACQUIRE_LOCK(self);
 969         Util_DropReadAhead(self);
 970         switch (self->mode) {
 971                 case MODE_READ:
 972                 case MODE_READ_EOF:
 973                         break;
 974
 975                 case MODE_CLOSED:
 976                         PyErr_SetString(PyExc_ValueError,
 977                                         "I/O operation on closed file");
 978                         goto cleanup;;
 979
 980                 default:
 981                         PyErr_SetString(PyExc_IOError,
 982                                         "seek works only while reading");
 983                         goto cleanup;;
 984         }
 985
 986         if (offset < 0) {
 987                 if (where == 1) {
 988                         offset = self->pos + offset;
 989                         rewind = 1;
 990                 } else if (where == 2) {
 991                         if (self->size == -1) {
 992                                 assert(self->mode != MODE_READ_EOF);
 993                                 for (;;) {
 994                                         Py_BEGIN_ALLOW_THREADS
 995                                         chunksize = Util_UnivNewlineRead(
 996                                                         &bzerror, self->fp,
 997                                                         buffer, buffersize,
 998                                                         self);
 999                                         self->pos += chunksize;
1000                                         Py_END_ALLOW_THREADS
1001
1002                                         bytesread += chunksize;
1003                                         if (bzerror == BZ_STREAM_END) {
1004                                                 break;
1005                                         } else if (bzerror != BZ_OK) {
1006                                                 Util_CatchBZ2Error(bzerror);
1007                                                 goto cleanup;
1008                                         }
1009                                 }
1010                                 self->mode = MODE_READ_EOF;
1011                                 self->size = self->pos;
1012                                 bytesread = 0;
1013                         }
1014                         offset = self->size + offset;
1015                         if (offset >= self->pos)
1016                                 offset -= self->pos;
1017                         else
1018                                 rewind = 1;
1019                 }
1020                 if (offset < 0)
1021                         offset = 0;
1022         } else if (where == 0) {
1023                 if (offset >= self->pos)
1024                         offset -= self->pos;
1025                 else
1026                         rewind = 1;
1027         }
1028
1029         if (rewind) {
1030                 BZ2_bzReadClose(&bzerror, self->fp);
1031                 if (bzerror != BZ_OK) {
1032                         Util_CatchBZ2Error(bzerror);
1033                         goto cleanup;
1034                 }
1035                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1036                 if (!ret)
1037                         goto cleanup;
1038                 Py_DECREF(ret);
1039                 ret = NULL;
1040                 self->pos = 0;
1041                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1042                                           0, 0, NULL, 0);
1043                 if (bzerror != BZ_OK) {
1044                         Util_CatchBZ2Error(bzerror);
1045                         goto cleanup;
1046                 }
1047                 self->mode = MODE_READ;
1048         } else if (self->mode == MODE_READ_EOF) {
1049                 goto exit;
1050         }
1051
1052         if (offset == 0)
1053                 goto exit;
1054
1055         /* Before getting here, offset must be set to the number of bytes
1056          * to walk forward. */
1057         for (;;) {
1058                 if ((size_t)offset-bytesread > buffersize)
1059                         readsize = buffersize;
1060                 else
1061                         readsize = offset-bytesread;
1062                 Py_BEGIN_ALLOW_THREADS
1063                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1064                                                  buffer, readsize, self);
1065                 self->pos += chunksize;
1066                 Py_END_ALLOW_THREADS
1067                 bytesread += chunksize;
1068                 if (bzerror == BZ_STREAM_END) {
1069                         self->size = self->pos;
1070                         self->mode = MODE_READ_EOF;
1071                         break;
1072                 } else if (bzerror != BZ_OK) {
1073                         Util_CatchBZ2Error(bzerror);
1074                         goto cleanup;
1075                 }
1076                 if (bytesread == offset)
1077                         break;
1078         }
1079
1080 exit:
1081         Py_INCREF(Py_None);
1082         ret = Py_None;
1083
1084 cleanup:
1085         RELEASE_LOCK(self);
1086         return ret;
1087 }
1088
1089 PyDoc_STRVAR(BZ2File_tell__doc__,
1090 "tell() -> int\n\
1091 \n\
1092 Return the current file position, an integer (may be a long integer).\n\
1093 ");
1094
1095 static PyObject *
1096 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1097 {
1098         PyObject *ret = NULL;
1099
1100         if (self->mode == MODE_CLOSED) {
1101                 PyErr_SetString(PyExc_ValueError,
1102                                 "I/O operation on closed file");
1103                 goto cleanup;
1104         }
1105
1106         ret = PyInt_FromLong(self->pos);
1107
1108 cleanup:
1109         return ret;
1110 }
1111
1112 PyDoc_STRVAR(BZ2File_close__doc__,
1113 "close() -> None or (perhaps) an integer\n\
1114 \n\
1115 Close the file. Sets data attribute .closed to true. A closed file\n\
1116 cannot be used for further I/O operations. close() may be called more\n\
1117 than once without error.\n\
1118 ");
1119
1120 static PyObject *
1121 BZ2File_close(BZ2FileObject *self)
1122 {
1123         PyObject *ret = NULL;
1124         int bzerror = BZ_OK;
1125
1126         ACQUIRE_LOCK(self);
1127         switch (self->mode) {
1128                 case MODE_READ:
1129                 case MODE_READ_EOF:
1130                         BZ2_bzReadClose(&bzerror, self->fp);
1131                         break;
1132                 case MODE_WRITE:
1133                         BZ2_bzWriteClose(&bzerror, self->fp,
1134                                          0, NULL, NULL);
1135                         break;
1136         }
1137         self->mode = MODE_CLOSED;
1138         ret = PyObject_CallMethod(self->file, "close", NULL);
1139         if (bzerror != BZ_OK) {
1140                 Util_CatchBZ2Error(bzerror);
1141                 Py_XDECREF(ret);
1142                 ret = NULL;
1143         }
1144
1145         RELEASE_LOCK(self);
1146         return ret;
1147 }
1148
1149 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1150
1151 static PyMethodDef BZ2File_methods[] = {
1152         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1153         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1154         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1155         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1156         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1157         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1158         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1159         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1160         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1161         {NULL,          NULL}           /* sentinel */
1162 };
1163
1164
1165 /* ===================================================================== */
1166 /* Getters and setters of BZ2File. */
1167
1168 #ifdef WITH_UNIVERSAL_NEWLINES
1169 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1170 static PyObject *
1171 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1172 {
1173         switch (self->f_newlinetypes) {
1174         case NEWLINE_UNKNOWN:
1175                 Py_INCREF(Py_None);
1176                 return Py_None;
1177         case NEWLINE_CR:
1178                 return PyString_FromString("\r");
1179         case NEWLINE_LF:
1180                 return PyString_FromString("\n");
1181         case NEWLINE_CR|NEWLINE_LF:
1182                 return Py_BuildValue("(ss)", "\r", "\n");
1183         case NEWLINE_CRLF:
1184                 return PyString_FromString("\r\n");
1185         case NEWLINE_CR|NEWLINE_CRLF:
1186                 return Py_BuildValue("(ss)", "\r", "\r\n");
1187         case NEWLINE_LF|NEWLINE_CRLF:
1188                 return Py_BuildValue("(ss)", "\n", "\r\n");
1189         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1190                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1191         default:
1192                 PyErr_Format(PyExc_SystemError,
1193                              "Unknown newlines value 0x%x\n",
1194                              self->f_newlinetypes);
1195                 return NULL;
1196         }
1197 }
1198 #endif
1199
1200 static PyObject *
1201 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1202 {
1203         return PyInt_FromLong(self->mode == MODE_CLOSED);
1204 }
1205
1206 static PyObject *
1207 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1208 {
1209         return PyObject_GetAttrString(self->file, "mode");
1210 }
1211
1212 static PyObject *
1213 BZ2File_get_name(BZ2FileObject *self, void *closure)
1214 {
1215         return PyObject_GetAttrString(self->file, "name");
1216 }
1217
1218 static PyGetSetDef BZ2File_getset[] = {
1219         {"closed", (getter)BZ2File_get_closed, NULL,
1220                         "True if the file is closed"},
1221 #ifdef WITH_UNIVERSAL_NEWLINES
1222         {"newlines", (getter)BZ2File_get_newlines, NULL,
1223                         "end-of-line convention used in this file"},
1224 #endif
1225         {"mode", (getter)BZ2File_get_mode, NULL,
1226                         "file mode ('r', 'w', or 'U')"},
1227         {"name", (getter)BZ2File_get_name, NULL,
1228                         "file name"},
1229         {NULL}  /* Sentinel */
1230 };
1231
1232
1233 /* ===================================================================== */
1234 /* Members of BZ2File_Type. */
1235
1236 #undef OFF
1237 #define OFF(x) offsetof(BZ2FileObject, x)
1238
1239 static PyMemberDef BZ2File_members[] = {
1240         {"softspace",   T_INT,          OFF(f_softspace), 0,
1241          "flag indicating that a space needs to be printed; used by print"},
1242         {NULL}  /* Sentinel */
1243 };
1244
1245 /* ===================================================================== */
1246 /* Slot definitions for BZ2File_Type. */
1247
1248 static int
1249 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1250 {
1251         static char *kwlist[] = {"filename", "mode", "buffering",
1252                                  "compresslevel", 0};
1253         PyObject *name;
1254         char *mode = "r";
1255         int buffering = -1;
1256         int compresslevel = 9;
1257         int bzerror;
1258         int mode_char = 0;
1259
1260         self->size = -1;
1261
1262         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1263                                          kwlist, &name, &mode, &buffering,
1264                                          &compresslevel))
1265                 return -1;
1266
1267         if (compresslevel < 1 || compresslevel > 9) {
1268                 PyErr_SetString(PyExc_ValueError,
1269                                 "compresslevel must be between 1 and 9");
1270                 return -1;
1271         }
1272
1273         for (;;) {
1274                 int error = 0;
1275                 switch (*mode) {
1276                         case 'r':
1277                         case 'w':
1278                                 if (mode_char)
1279                                         error = 1;
1280                                 mode_char = *mode;
1281                                 break;
1282
1283                         case 'b':
1284                                 break;
1285
1286                         case 'U':
1287                                 self->f_univ_newline = 1;
1288                                 break;
1289
1290                         default:
1291                                 error = 1;
1292                                 break;
1293                 }
1294                 if (error) {
1295                         PyErr_Format(PyExc_ValueError,
1296                                      "invalid mode char %c", *mode);
1297                         return -1;
1298                 }
1299                 mode++;
1300                 if (*mode == '\0')
1301                         break;
1302         }
1303
1304         mode = (mode_char == 'r') ? "rb" : "wb";
1305
1306         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1307                                            name, mode, buffering);
1308         if (self->file == NULL)
1309                 return -1;
1310
1311         /* From now on, we have stuff to dealloc, so jump to error label
1312          * instead of returning */
1313
1314 #ifdef WITH_THREAD
1315         self->lock = PyThread_allocate_lock();
1316         if (!self->lock)
1317                 goto error;
1318 #endif
1319
1320         if (mode_char == 'r')
1321                 self->fp = BZ2_bzReadOpen(&bzerror,
1322                                           PyFile_AsFile(self->file),
1323                                           0, 0, NULL, 0);
1324         else
1325                 self->fp = BZ2_bzWriteOpen(&bzerror,
1326                                            PyFile_AsFile(self->file),
1327                                            compresslevel, 0, 0);
1328
1329         if (bzerror != BZ_OK) {
1330                 Util_CatchBZ2Error(bzerror);
1331                 goto error;
1332         }
1333
1334         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1335
1336         return 0;
1337
1338 error:
1339         Py_DECREF(self->file);
1340 #ifdef WITH_THREAD
1341         if (self->lock)
1342                 PyThread_free_lock(self->lock);
1343 #endif
1344         return -1;
1345 }
1346
1347 static void
1348 BZ2File_dealloc(BZ2FileObject *self)
1349 {
1350         int bzerror;
1351 #ifdef WITH_THREAD
1352         if (self->lock)
1353                 PyThread_free_lock(self->lock);
1354 #endif
1355         switch (self->mode) {
1356                 case MODE_READ:
1357                 case MODE_READ_EOF:
1358                         BZ2_bzReadClose(&bzerror, self->fp);
1359                         break;
1360                 case MODE_WRITE:
1361                         BZ2_bzWriteClose(&bzerror, self->fp,
1362                                          0, NULL, NULL);
1363                         break;
1364         }
1365         Util_DropReadAhead(self);
1366         Py_DECREF(self->file);
1367         self->ob_type->tp_free((PyObject *)self);
1368 }
1369
1370 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1371 static PyObject *
1372 BZ2File_getiter(BZ2FileObject *self)
1373 {
1374         if (self->mode == MODE_CLOSED) {
1375                 PyErr_SetString(PyExc_ValueError,
1376                                 "I/O operation on closed file");
1377                 return NULL;
1378         }
1379         Py_INCREF((PyObject*)self);
1380         return (PyObject *)self;
1381 }
1382
1383 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1384 #define READAHEAD_BUFSIZE 8192
1385 static PyObject *
1386 BZ2File_iternext(BZ2FileObject *self)
1387 {
1388         PyStringObject* ret;
1389         ACQUIRE_LOCK(self);
1390         if (self->mode == MODE_CLOSED) {
1391                 PyErr_SetString(PyExc_ValueError,
1392                                 "I/O operation on closed file");
1393                 return NULL;
1394         }
1395         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1396         RELEASE_LOCK(self);
1397         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1398                 Py_XDECREF(ret);
1399                 return NULL;
1400         }
1401         return (PyObject *)ret;
1402 }
1403
1404 /* ===================================================================== */
1405 /* BZ2File_Type definition. */
1406
1407 PyDoc_VAR(BZ2File__doc__) =
1408 PyDoc_STR(
1409 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1410 \n\
1411 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1412 writing. When opened for writing, the file will be created if it doesn't\n\
1413 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1414 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1415 is given, must be a number between 1 and 9.\n\
1416 ")
1417 #ifdef WITH_UNIVERSAL_NEWLINES
1418 PyDoc_STR(
1419 "\n\
1420 Add a 'U' to mode to open the file for input with universal newline\n\
1421 support. Any line ending in the input file will be seen as a '\\n' in\n\
1422 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1423 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1424 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1425 newlines are available only when reading.\n\
1426 ")
1427 #endif
1428 ;
1429
1430 static PyTypeObject BZ2File_Type = {
1431         PyObject_HEAD_INIT(NULL)
1432         0,                      /*ob_size*/
1433         "bz2.BZ2File",          /*tp_name*/
1434         sizeof(BZ2FileObject),  /*tp_basicsize*/
1435         0,                      /*tp_itemsize*/
1436         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1437         0,                      /*tp_print*/
1438         0,                      /*tp_getattr*/
1439         0,                      /*tp_setattr*/
1440         0,                      /*tp_compare*/
1441         0,                      /*tp_repr*/
1442         0,                      /*tp_as_number*/
1443         0,                      /*tp_as_sequence*/
1444         0,                      /*tp_as_mapping*/
1445         0,                      /*tp_hash*/
1446         0,                      /*tp_call*/
1447         0,                      /*tp_str*/
1448         PyObject_GenericGetAttr,/*tp_getattro*/
1449         PyObject_GenericSetAttr,/*tp_setattro*/
1450         0,                      /*tp_as_buffer*/
1451         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1452         BZ2File__doc__,         /*tp_doc*/
1453         0,                      /*tp_traverse*/
1454         0,                      /*tp_clear*/
1455         0,                      /*tp_richcompare*/
1456         0,                      /*tp_weaklistoffset*/
1457         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1458         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1459         BZ2File_methods,        /*tp_methods*/
1460         BZ2File_members,        /*tp_members*/
1461         BZ2File_getset,         /*tp_getset*/
1462         0,                      /*tp_base*/
1463         0,                      /*tp_dict*/
1464         0,                      /*tp_descr_get*/
1465         0,                      /*tp_descr_set*/
1466         0,                      /*tp_dictoffset*/
1467         (initproc)BZ2File_init, /*tp_init*/
1468         PyType_GenericAlloc,    /*tp_alloc*/
1469         PyType_GenericNew,      /*tp_new*/
1470         _PyObject_Del,          /*tp_free*/
1471         0,                      /*tp_is_gc*/
1472 };
1473
1474
1475 /* ===================================================================== */
1476 /* Methods of BZ2Comp. */
1477
1478 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1479 "compress(data) -> string\n\
1480 \n\
1481 Provide more data to the compressor object. It will return chunks of\n\
1482 compressed data whenever possible. When you've finished providing data\n\
1483 to compress, call the flush() method to finish the compression process,\n\
1484 and return what is left in the internal buffers.\n\
1485 ");
1486
1487 static PyObject *
1488 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1489 {
1490         char *data;
1491         int datasize;
1492         int bufsize = SMALLCHUNK;
1493         LONG_LONG totalout;
1494         PyObject *ret = NULL;
1495         bz_stream *bzs = &self->bzs;
1496         int bzerror;
1497
1498         if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1499                 return NULL;
1500
1501         ACQUIRE_LOCK(self);
1502         if (!self->running) {
1503                 PyErr_SetString(PyExc_ValueError,
1504                                 "this object was already flushed");
1505                 goto error;
1506         }
1507
1508         ret = PyString_FromStringAndSize(NULL, bufsize);
1509         if (!ret)
1510                 goto error;
1511
1512         bzs->next_in = data;
1513         bzs->avail_in = datasize;
1514         bzs->next_out = BUF(ret);
1515         bzs->avail_out = bufsize;
1516
1517         totalout = BZS_TOTAL_OUT(bzs);
1518
1519         for (;;) {
1520                 Py_BEGIN_ALLOW_THREADS
1521                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1522                 Py_END_ALLOW_THREADS
1523                 if (bzerror != BZ_RUN_OK) {
1524                         Util_CatchBZ2Error(bzerror);
1525                         goto error;
1526                 }
1527                 if (bzs->avail_out == 0) {
1528                         bufsize = Util_NewBufferSize(bufsize);
1529                         if (_PyString_Resize(&ret, bufsize) < 0) {
1530                                 BZ2_bzCompressEnd(bzs);
1531                                 goto error;
1532                         }
1533                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1534                                                     - totalout);
1535                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1536                 } else if (bzs->avail_in == 0) {
1537                         break;
1538                 }
1539         }
1540
1541         _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1542
1543         RELEASE_LOCK(self);
1544         return ret;
1545
1546 error:
1547         RELEASE_LOCK(self);
1548         Py_XDECREF(ret);
1549         return NULL;
1550 }
1551
1552 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1553 "flush() -> string\n\
1554 \n\
1555 Finish the compression process and return what is left in internal buffers.\n\
1556 You must not use the compressor object after calling this method.\n\
1557 ");
1558
1559 static PyObject *
1560 BZ2Comp_flush(BZ2CompObject *self)
1561 {
1562         int bufsize = SMALLCHUNK;
1563         PyObject *ret = NULL;
1564         bz_stream *bzs = &self->bzs;
1565         LONG_LONG totalout;
1566         int bzerror;
1567
1568         ACQUIRE_LOCK(self);
1569         if (!self->running) {
1570                 PyErr_SetString(PyExc_ValueError, "object was already "
1571                                                   "flushed");
1572                 goto error;
1573         }
1574         self->running = 0;
1575
1576         ret = PyString_FromStringAndSize(NULL, bufsize);
1577         if (!ret)
1578                 goto error;
1579
1580         bzs->next_out = BUF(ret);
1581         bzs->avail_out = bufsize;
1582
1583         totalout = BZS_TOTAL_OUT(bzs);
1584
1585         for (;;) {
1586                 Py_BEGIN_ALLOW_THREADS
1587                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1588                 Py_END_ALLOW_THREADS
1589                 if (bzerror == BZ_STREAM_END) {
1590                         break;
1591                 } else if (bzerror != BZ_FINISH_OK) {
1592                         Util_CatchBZ2Error(bzerror);
1593                         goto error;
1594                 }
1595                 if (bzs->avail_out == 0) {
1596                         bufsize = Util_NewBufferSize(bufsize);
1597                         if (_PyString_Resize(&ret, bufsize) < 0)
1598                                 goto error;
1599                         bzs->next_out = BUF(ret);
1600                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1601                                                     - totalout);
1602                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1603                 }
1604         }
1605
1606         if (bzs->avail_out != 0)
1607                 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1608
1609         RELEASE_LOCK(self);
1610         return ret;
1611
1612 error:
1613         RELEASE_LOCK(self);
1614         Py_XDECREF(ret);
1615         return NULL;
1616 }
1617
1618 static PyMethodDef BZ2Comp_methods[] = {
1619         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1620          BZ2Comp_compress__doc__},
1621         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1622          BZ2Comp_flush__doc__},
1623         {NULL,          NULL}           /* sentinel */
1624 };
1625
1626
1627 /* ===================================================================== */
1628 /* Slot definitions for BZ2Comp_Type. */
1629
1630 static int
1631 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1632 {
1633         int compresslevel = 9;
1634         int bzerror;
1635         static char *kwlist[] = {"compresslevel", 0};
1636
1637         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1638                                          kwlist, &compresslevel))
1639                 return -1;
1640
1641         if (compresslevel < 1 || compresslevel > 9) {
1642                 PyErr_SetString(PyExc_ValueError,
1643                                 "compresslevel must be between 1 and 9");
1644                 goto error;
1645         }
1646
1647 #ifdef WITH_THREAD
1648         self->lock = PyThread_allocate_lock();
1649         if (!self->lock)
1650                 goto error;
1651 #endif
1652
1653         memset(&self->bzs, 0, sizeof(bz_stream));
1654         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1655         if (bzerror != BZ_OK) {
1656                 Util_CatchBZ2Error(bzerror);
1657                 goto error;
1658         }
1659
1660         self->running = 1;
1661
1662         return 0;
1663 error:
1664 #ifdef WITH_THREAD
1665         if (self->lock)
1666                 PyThread_free_lock(self->lock);
1667 #endif
1668         return -1;
1669 }
1670
1671 static void
1672 BZ2Comp_dealloc(BZ2CompObject *self)
1673 {
1674 #ifdef WITH_THREAD
1675         if (self->lock)
1676                 PyThread_free_lock(self->lock);
1677 #endif
1678         BZ2_bzCompressEnd(&self->bzs);
1679         self->ob_type->tp_free((PyObject *)self);
1680 }
1681
1682
1683 /* ===================================================================== */
1684 /* BZ2Comp_Type definition. */
1685
1686 PyDoc_STRVAR(BZ2Comp__doc__,
1687 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1688 \n\
1689 Create a new compressor object. This object may be used to compress\n\
1690 data sequentially. If you want to compress data in one shot, use the\n\
1691 compress() function instead. The compresslevel parameter, if given,\n\
1692 must be a number between 1 and 9.\n\
1693 ");
1694
1695 static PyTypeObject BZ2Comp_Type = {
1696         PyObject_HEAD_INIT(NULL)
1697         0,                      /*ob_size*/
1698         "bz2.BZ2Compressor",    /*tp_name*/
1699         sizeof(BZ2CompObject),  /*tp_basicsize*/
1700         0,                      /*tp_itemsize*/
1701         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1702         0,                      /*tp_print*/
1703         0,                      /*tp_getattr*/
1704         0,                      /*tp_setattr*/
1705         0,                      /*tp_compare*/
1706         0,                      /*tp_repr*/
1707         0,                      /*tp_as_number*/
1708         0,                      /*tp_as_sequence*/
1709         0,                      /*tp_as_mapping*/
1710         0,                      /*tp_hash*/
1711         0,                      /*tp_call*/
1712         0,                      /*tp_str*/
1713         PyObject_GenericGetAttr,/*tp_getattro*/
1714         PyObject_GenericSetAttr,/*tp_setattro*/
1715         0,                      /*tp_as_buffer*/
1716         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1717         BZ2Comp__doc__,         /*tp_doc*/
1718         0,                      /*tp_traverse*/
1719         0,                      /*tp_clear*/
1720         0,                      /*tp_richcompare*/
1721         0,                      /*tp_weaklistoffset*/
1722         0,                      /*tp_iter*/
1723         0,                      /*tp_iternext*/
1724         BZ2Comp_methods,        /*tp_methods*/
1725         0,                      /*tp_members*/
1726         0,                      /*tp_getset*/
1727         0,                      /*tp_base*/
1728         0,                      /*tp_dict*/
1729         0,                      /*tp_descr_get*/
1730         0,                      /*tp_descr_set*/
1731         0,                      /*tp_dictoffset*/
1732         (initproc)BZ2Comp_init, /*tp_init*/
1733         PyType_GenericAlloc,    /*tp_alloc*/
1734         PyType_GenericNew,      /*tp_new*/
1735         _PyObject_Del,          /*tp_free*/
1736         0,                      /*tp_is_gc*/
1737 };
1738
1739
1740 /* ===================================================================== */
1741 /* Members of BZ2Decomp. */
1742
1743 #undef OFF
1744 #define OFF(x) offsetof(BZ2DecompObject, x)
1745
1746 static PyMemberDef BZ2Decomp_members[] = {
1747         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1748         {NULL}  /* Sentinel */
1749 };
1750
1751
1752 /* ===================================================================== */
1753 /* Methods of BZ2Decomp. */
1754
1755 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1756 "decompress(data) -> string\n\
1757 \n\
1758 Provide more data to the decompressor object. It will return chunks\n\
1759 of decompressed data whenever possible. If you try to decompress data\n\
1760 after the end of stream is found, EOFError will be raised. If any data\n\
1761 was found after the end of stream, it'll be ignored and saved in\n\
1762 unused_data attribute.\n\
1763 ");
1764
1765 static PyObject *
1766 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1767 {
1768         char *data;
1769         int datasize;
1770         int bufsize = SMALLCHUNK;
1771         LONG_LONG totalout;
1772         PyObject *ret = NULL;
1773         bz_stream *bzs = &self->bzs;
1774         int bzerror;
1775
1776         if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1777                 return NULL;
1778
1779         ACQUIRE_LOCK(self);
1780         if (!self->running) {
1781                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1782                                                 "already found");
1783                 goto error;
1784         }
1785
1786         ret = PyString_FromStringAndSize(NULL, bufsize);
1787         if (!ret)
1788                 goto error;
1789
1790         bzs->next_in = data;
1791         bzs->avail_in = datasize;
1792         bzs->next_out = BUF(ret);
1793         bzs->avail_out = bufsize;
1794
1795         totalout = BZS_TOTAL_OUT(bzs);
1796
1797         for (;;) {
1798                 Py_BEGIN_ALLOW_THREADS
1799                 bzerror = BZ2_bzDecompress(bzs);
1800                 Py_END_ALLOW_THREADS
1801                 if (bzerror == BZ_STREAM_END) {
1802                         if (bzs->avail_in != 0) {
1803                                 Py_DECREF(self->unused_data);
1804                                 self->unused_data =
1805                                     PyString_FromStringAndSize(bzs->next_in,
1806                                                                bzs->avail_in);
1807                         }
1808                         self->running = 0;
1809                         break;
1810                 }
1811                 if (bzerror != BZ_OK) {
1812                         Util_CatchBZ2Error(bzerror);
1813                         goto error;
1814                 }
1815                 if (bzs->avail_out == 0) {
1816                         bufsize = Util_NewBufferSize(bufsize);
1817                         if (_PyString_Resize(&ret, bufsize) < 0) {
1818                                 BZ2_bzDecompressEnd(bzs);
1819                                 goto error;
1820                         }
1821                         bzs->next_out = BUF(ret);
1822                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1823                                                     - totalout);
1824                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1825                 } else if (bzs->avail_in == 0) {
1826                         break;
1827                 }
1828         }
1829
1830         if (bzs->avail_out != 0)
1831                 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1832
1833         RELEASE_LOCK(self);
1834         return ret;
1835
1836 error:
1837         RELEASE_LOCK(self);
1838         Py_XDECREF(ret);
1839         return NULL;
1840 }
1841
1842 static PyMethodDef BZ2Decomp_methods[] = {
1843         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1844         {NULL,          NULL}           /* sentinel */
1845 };
1846
1847
1848 /* ===================================================================== */
1849 /* Slot definitions for BZ2Decomp_Type. */
1850
1851 static int
1852 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1853 {
1854         int bzerror;
1855
1856         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1857                 return -1;
1858
1859 #ifdef WITH_THREAD
1860         self->lock = PyThread_allocate_lock();
1861         if (!self->lock)
1862                 goto error;
1863 #endif
1864
1865         self->unused_data = PyString_FromString("");
1866         if (!self->unused_data)
1867                 goto error;
1868
1869         memset(&self->bzs, 0, sizeof(bz_stream));
1870         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1871         if (bzerror != BZ_OK) {
1872                 Util_CatchBZ2Error(bzerror);
1873                 goto error;
1874         }
1875
1876         self->running = 1;
1877
1878         return 0;
1879
1880 error:
1881 #ifdef WITH_THREAD
1882         if (self->lock)
1883                 PyThread_free_lock(self->lock);
1884 #endif
1885         Py_XDECREF(self->unused_data);
1886         return -1;
1887 }
1888
1889 static void
1890 BZ2Decomp_dealloc(BZ2DecompObject *self)
1891 {
1892 #ifdef WITH_THREAD
1893         if (self->lock)
1894                 PyThread_free_lock(self->lock);
1895 #endif
1896         Py_XDECREF(self->unused_data);
1897         BZ2_bzDecompressEnd(&self->bzs);
1898         self->ob_type->tp_free((PyObject *)self);
1899 }
1900
1901
1902 /* ===================================================================== */
1903 /* BZ2Decomp_Type definition. */
1904
1905 PyDoc_STRVAR(BZ2Decomp__doc__,
1906 "BZ2Decompressor() -> decompressor object\n\
1907 \n\
1908 Create a new decompressor object. This object may be used to decompress\n\
1909 data sequentially. If you want to decompress data in one shot, use the\n\
1910 decompress() function instead.\n\
1911 ");
1912
1913 static PyTypeObject BZ2Decomp_Type = {
1914         PyObject_HEAD_INIT(NULL)
1915         0,                      /*ob_size*/
1916         "bz2.BZ2Decompressor",  /*tp_name*/
1917         sizeof(BZ2DecompObject), /*tp_basicsize*/
1918         0,                      /*tp_itemsize*/
1919         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1920         0,                      /*tp_print*/
1921         0,                      /*tp_getattr*/
1922         0,                      /*tp_setattr*/
1923         0,                      /*tp_compare*/
1924         0,                      /*tp_repr*/
1925         0,                      /*tp_as_number*/
1926         0,                      /*tp_as_sequence*/
1927         0,                      /*tp_as_mapping*/
1928         0,                      /*tp_hash*/
1929         0,                      /*tp_call*/
1930         0,                      /*tp_str*/
1931         PyObject_GenericGetAttr,/*tp_getattro*/
1932         PyObject_GenericSetAttr,/*tp_setattro*/
1933         0,                      /*tp_as_buffer*/
1934         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1935         BZ2Decomp__doc__,       /*tp_doc*/
1936         0,                      /*tp_traverse*/
1937         0,                      /*tp_clear*/
1938         0,                      /*tp_richcompare*/
1939         0,                      /*tp_weaklistoffset*/
1940         0,                      /*tp_iter*/
1941         0,                      /*tp_iternext*/
1942         BZ2Decomp_methods,      /*tp_methods*/
1943         BZ2Decomp_members,      /*tp_members*/
1944         0,                      /*tp_getset*/
1945         0,                      /*tp_base*/
1946         0,                      /*tp_dict*/
1947         0,                      /*tp_descr_get*/
1948         0,                      /*tp_descr_set*/
1949         0,                      /*tp_dictoffset*/
1950         (initproc)BZ2Decomp_init, /*tp_init*/
1951         PyType_GenericAlloc,    /*tp_alloc*/
1952         PyType_GenericNew,      /*tp_new*/
1953         _PyObject_Del,          /*tp_free*/
1954         0,                      /*tp_is_gc*/
1955 };
1956
1957
1958 /* ===================================================================== */
1959 /* Module functions. */
1960
1961 PyDoc_STRVAR(bz2_compress__doc__,
1962 "compress(data [, compresslevel=9]) -> string\n\
1963 \n\
1964 Compress data in one shot. If you want to compress data sequentially,\n\
1965 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1966 given, must be a number between 1 and 9.\n\
1967 ");
1968
1969 static PyObject *
1970 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1971 {
1972         int compresslevel=9;
1973         char *data;
1974         int datasize;
1975         int bufsize;
1976         PyObject *ret = NULL;
1977         bz_stream _bzs;
1978         bz_stream *bzs = &_bzs;
1979         int bzerror;
1980         static char *kwlist[] = {"data", "compresslevel", 0};
1981
1982         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1983                                          kwlist, &data, &datasize,
1984                                          &compresslevel))
1985                 return NULL;
1986
1987         if (compresslevel < 1 || compresslevel > 9) {
1988                 PyErr_SetString(PyExc_ValueError,
1989                                 "compresslevel must be between 1 and 9");
1990                 return NULL;
1991         }
1992
1993         /* Conforming to bz2 manual, this is large enough to fit compressed
1994          * data in one shot. We will check it later anyway. */
1995         bufsize = datasize + (datasize/100+1) + 600;
1996
1997         ret = PyString_FromStringAndSize(NULL, bufsize);
1998         if (!ret)
1999                 return NULL;
2000
2001         memset(bzs, 0, sizeof(bz_stream));
2002
2003         bzs->next_in = data;
2004         bzs->avail_in = datasize;
2005         bzs->next_out = BUF(ret);
2006         bzs->avail_out = bufsize;
2007
2008         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2009         if (bzerror != BZ_OK) {
2010                 Util_CatchBZ2Error(bzerror);
2011                 Py_DECREF(ret);
2012                 return NULL;
2013         }
2014
2015         for (;;) {
2016                 Py_BEGIN_ALLOW_THREADS
2017                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2018                 Py_END_ALLOW_THREADS
2019                 if (bzerror == BZ_STREAM_END) {
2020                         break;
2021                 } else if (bzerror != BZ_FINISH_OK) {
2022                         BZ2_bzCompressEnd(bzs);
2023                         Util_CatchBZ2Error(bzerror);
2024                         Py_DECREF(ret);
2025                         return NULL;
2026                 }
2027                 if (bzs->avail_out == 0) {
2028                         bufsize = Util_NewBufferSize(bufsize);
2029                         if (_PyString_Resize(&ret, bufsize) < 0) {
2030                                 BZ2_bzCompressEnd(bzs);
2031                                 Py_DECREF(ret);
2032                                 return NULL;
2033                         }
2034                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2035                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2036                 }
2037         }
2038
2039         if (bzs->avail_out != 0)
2040                 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2041         BZ2_bzCompressEnd(bzs);
2042
2043         return ret;
2044 }
2045
2046 PyDoc_STRVAR(bz2_decompress__doc__,
2047 "decompress(data) -> decompressed data\n\
2048 \n\
2049 Decompress data in one shot. If you want to decompress data sequentially,\n\
2050 use an instance of BZ2Decompressor instead.\n\
2051 ");
2052
2053 static PyObject *
2054 bz2_decompress(PyObject *self, PyObject *args)
2055 {
2056         char *data;
2057         int datasize;
2058         int bufsize = SMALLCHUNK;
2059         PyObject *ret;
2060         bz_stream _bzs;
2061         bz_stream *bzs = &_bzs;
2062         int bzerror;
2063
2064         if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2065                 return NULL;
2066
2067         if (datasize == 0)
2068                 return PyString_FromString("");
2069
2070         ret = PyString_FromStringAndSize(NULL, bufsize);
2071         if (!ret)
2072                 return NULL;
2073
2074         memset(bzs, 0, sizeof(bz_stream));
2075
2076         bzs->next_in = data;
2077         bzs->avail_in = datasize;
2078         bzs->next_out = BUF(ret);
2079         bzs->avail_out = bufsize;
2080
2081         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2082         if (bzerror != BZ_OK) {
2083                 Util_CatchBZ2Error(bzerror);
2084                 Py_DECREF(ret);
2085                 return NULL;
2086         }
2087
2088         for (;;) {
2089                 Py_BEGIN_ALLOW_THREADS
2090                 bzerror = BZ2_bzDecompress(bzs);
2091                 Py_END_ALLOW_THREADS
2092                 if (bzerror == BZ_STREAM_END) {
2093                         break;
2094                 } else if (bzerror != BZ_OK) {
2095                         BZ2_bzDecompressEnd(bzs);
2096                         Util_CatchBZ2Error(bzerror);
2097                         Py_DECREF(ret);
2098                         return NULL;
2099                 }
2100                 if (bzs->avail_out == 0) {
2101                         bufsize = Util_NewBufferSize(bufsize);
2102                         if (_PyString_Resize(&ret, bufsize) < 0) {
2103                                 BZ2_bzDecompressEnd(bzs);
2104                                 Py_DECREF(ret);
2105                                 return NULL;
2106                         }
2107                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2108                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2109                 } else if (bzs->avail_in == 0) {
2110                         BZ2_bzDecompressEnd(bzs);
2111                         PyErr_SetString(PyExc_ValueError,
2112                                         "couldn't find end of stream");
2113                         Py_DECREF(ret);
2114                         return NULL;
2115                 }
2116         }
2117
2118         if (bzs->avail_out != 0)
2119                 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2120         BZ2_bzDecompressEnd(bzs);
2121
2122         return ret;
2123 }
2124
2125 static PyMethodDef bz2_methods[] = {
2126         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2127                 bz2_compress__doc__},
2128         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2129                 bz2_decompress__doc__},
2130         {NULL,          NULL}           /* sentinel */
2131 };
2132
2133 /* ===================================================================== */
2134 /* Initialization function. */
2135
2136 PyDoc_STRVAR(bz2__doc__,
2137 "The python bz2 module provides a comprehensive interface for\n\
2138 the bz2 compression library. It implements a complete file\n\
2139 interface, one shot (de)compression functions, and types for\n\
2140 sequential (de)compression.\n\
2141 ");
2142
2143 DL_EXPORT(void)
2144 initbz2(void)
2145 {
2146         PyObject *m;
2147
2148         BZ2File_Type.ob_type = &PyType_Type;
2149         BZ2Comp_Type.ob_type = &PyType_Type;
2150         BZ2Decomp_Type.ob_type = &PyType_Type;
2151
2152         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2153
2154         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2155
2156         Py_INCREF(&BZ2File_Type);
2157         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2158
2159         Py_INCREF(&BZ2Comp_Type);
2160         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2161
2162         Py_INCREF(&BZ2Decomp_Type);
2163         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2164 }