Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  26
  27 #define MODE_CLOSED   0
  28 #define MODE_READ     1
  29 #define MODE_READ_EOF 2
  30 #define MODE_WRITE    3
  31
  32 #define BZ2FileObject_Check(v)  ((v)->ob_type == &BZ2File_Type)
  33
  34
  35 #ifdef BZ_CONFIG_ERROR
  36
  37 #if SIZEOF_LONG >= 8
  38 #define BZS_TOTAL_OUT(bzs) \
  39         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  40 #elif SIZEOF_LONG_LONG >= 8
  41 #define BZS_TOTAL_OUT(bzs) \
  42         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  43 #else
  44 #define BZS_TOTAL_OUT(bzs) \
  45         bzs->total_out_lo32;
  46 #endif
  47
  48 #else /* ! BZ_CONFIG_ERROR */
  49
  50 #define BZ2_bzRead bzRead
  51 #define BZ2_bzReadOpen bzReadOpen
  52 #define BZ2_bzReadClose bzReadClose
  53 #define BZ2_bzWrite bzWrite
  54 #define BZ2_bzWriteOpen bzWriteOpen
  55 #define BZ2_bzWriteClose bzWriteClose
  56 #define BZ2_bzCompress bzCompress
  57 #define BZ2_bzCompressInit bzCompressInit
  58 #define BZ2_bzCompressEnd bzCompressEnd
  59 #define BZ2_bzDecompress bzDecompress
  60 #define BZ2_bzDecompressInit bzDecompressInit
  61 #define BZ2_bzDecompressEnd bzDecompressEnd
  62
  63 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  64
  65 #endif /* ! BZ_CONFIG_ERROR */
  66
  67
  68 #ifdef WITH_THREAD
  69 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  70 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  71 #else
  72 #define ACQUIRE_LOCK(obj)
  73 #define RELEASE_LOCK(obj)
  74 #endif
  75
  76 /* Bits in f_newlinetypes */
  77 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  78 #define NEWLINE_CR 1            /* \r newline seen */
  79 #define NEWLINE_LF 2            /* \n newline seen */
  80 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  81
  82 /* ===================================================================== */
  83 /* Structure definitions. */
  84
  85 typedef struct {
  86         PyObject_HEAD
  87         PyObject *file;
  88
  89         char* f_buf;            /* Allocated readahead buffer */
  90         char* f_bufend;         /* Points after last occupied position */
  91         char* f_bufptr;         /* Current buffer position */
  92
  93         int f_softspace;        /* Flag used by 'print' command */
  94
  95         int f_univ_newline;     /* Handle any newline convention */
  96         int f_newlinetypes;     /* Types of newlines seen */
  97         int f_skipnextlf;       /* Skip next \n */
  98
  99         BZFILE *fp;
 100         int mode;
 101         long pos;
 102         long size;
 103 #ifdef WITH_THREAD
 104         PyThread_type_lock lock;
 105 #endif
 106 } BZ2FileObject;
 107
 108 typedef struct {
 109         PyObject_HEAD
 110         bz_stream bzs;
 111         int running;
 112 #ifdef WITH_THREAD
 113         PyThread_type_lock lock;
 114 #endif
 115 } BZ2CompObject;
 116
 117 typedef struct {
 118         PyObject_HEAD
 119         bz_stream bzs;
 120         int running;
 121         PyObject *unused_data;
 122 #ifdef WITH_THREAD
 123         PyThread_type_lock lock;
 124 #endif
 125 } BZ2DecompObject;
 126
 127 /* ===================================================================== */
 128 /* Utility functions. */
 129
 130 static int
 131 Util_CatchBZ2Error(int bzerror)
 132 {
 133         int ret = 0;
 134         switch(bzerror) {
 135                 case BZ_OK:
 136                 case BZ_STREAM_END:
 137                         break;
 138
 139 #ifdef BZ_CONFIG_ERROR
 140                 case BZ_CONFIG_ERROR:
 141                         PyErr_SetString(PyExc_SystemError,
 142                                         "the bz2 library was not compiled "
 143                                         "correctly");
 144                         ret = 1;
 145                         break;
 146 #endif
 147
 148                 case BZ_PARAM_ERROR:
 149                         PyErr_SetString(PyExc_ValueError,
 150                                         "the bz2 library has received wrong "
 151                                         "parameters");
 152                         ret = 1;
 153                         break;
 154
 155                 case BZ_MEM_ERROR:
 156                         PyErr_NoMemory();
 157                         ret = 1;
 158                         break;
 159
 160                 case BZ_DATA_ERROR:
 161                 case BZ_DATA_ERROR_MAGIC:
 162                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 163                         ret = 1;
 164                         break;
 165
 166                 case BZ_IO_ERROR:
 167                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 168                         ret = 1;
 169                         break;
 170
 171                 case BZ_UNEXPECTED_EOF:
 172                         PyErr_SetString(PyExc_EOFError,
 173                                         "compressed file ended before the "
 174                                         "logical end-of-stream was detected");
 175                         ret = 1;
 176                         break;
 177
 178                 case BZ_SEQUENCE_ERROR:
 179                         PyErr_SetString(PyExc_RuntimeError,
 180                                         "wrong sequence of bz2 library "
 181                                         "commands used");
 182                         ret = 1;
 183                         break;
 184         }
 185         return ret;
 186 }
 187
 188 #if BUFSIZ < 8192
 189 #define SMALLCHUNK 8192
 190 #else
 191 #define SMALLCHUNK BUFSIZ
 192 #endif
 193
 194 #if SIZEOF_INT < 4
 195 #define BIGCHUNK  (512 * 32)
 196 #else
 197 #define BIGCHUNK  (512 * 1024)
 198 #endif
 199
 200 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 201 static size_t
 202 Util_NewBufferSize(size_t currentsize)
 203 {
 204         if (currentsize > SMALLCHUNK) {
 205                 /* Keep doubling until we reach BIGCHUNK;
 206                    then keep adding BIGCHUNK. */
 207                 if (currentsize <= BIGCHUNK)
 208                         return currentsize + currentsize;
 209                 else
 210                         return currentsize + BIGCHUNK;
 211         }
 212         return currentsize + SMALLCHUNK;
 213 }
 214
 215 /* This is a hacked version of Python's fileobject.c:get_line(). */
 216 static PyObject *
 217 Util_GetLine(BZ2FileObject *f, int n)
 218 {
 219         char c;
 220         char *buf, *end;
 221         size_t total_v_size;    /* total # of slots in buffer */
 222         size_t used_v_size;     /* # used slots in buffer */
 223         size_t increment;       /* amount to increment the buffer */
 224         PyObject *v;
 225         int bzerror;
 226         int newlinetypes = f->f_newlinetypes;
 227         int skipnextlf = f->f_skipnextlf;
 228         int univ_newline = f->f_univ_newline;
 229
 230         total_v_size = n > 0 ? n : 100;
 231         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 232         if (v == NULL)
 233                 return NULL;
 234
 235         buf = BUF(v);
 236         end = buf + total_v_size;
 237
 238         for (;;) {
 239                 Py_BEGIN_ALLOW_THREADS
 240                 if (univ_newline) {
 241                         while (1) {
 242                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 243                                 f->pos++;
 244                                 if (bzerror != BZ_OK || buf == end)
 245                                         break;
 246                                 if (skipnextlf) {
 247                                         skipnextlf = 0;
 248                                         if (c == '\n') {
 249                                                 /* Seeing a \n here with
 250                                                  * skipnextlf true means we
 251                                                  * saw a \r before.
 252                                                  */
 253                                                 newlinetypes |= NEWLINE_CRLF;
 254                                                 BZ2_bzRead(&bzerror, f->fp,
 255                                                            &c, 1);
 256                                                 if (bzerror != BZ_OK)
 257                                                         break;
 258                                         } else {
 259                                                 newlinetypes |= NEWLINE_CR;
 260                                         }
 261                                 }
 262                                 if (c == '\r') {
 263                                         skipnextlf = 1;
 264                                         c = '\n';
 265                                 } else if ( c == '\n')
 266                                         newlinetypes |= NEWLINE_LF;
 267                                 *buf++ = c;
 268                                 if (c == '\n') break;
 269                         }
 270                         if (bzerror == BZ_STREAM_END && skipnextlf)
 271                                 newlinetypes |= NEWLINE_CR;
 272                 } else /* If not universal newlines use the normal loop */
 273                         do {
 274                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 275                                 f->pos++;
 276                                 *buf++ = c;
 277                         } while (bzerror == BZ_OK && c != '\n' && buf != end);
 278                 Py_END_ALLOW_THREADS
 279                 f->f_newlinetypes = newlinetypes;
 280                 f->f_skipnextlf = skipnextlf;
 281                 if (bzerror == BZ_STREAM_END) {
 282                         f->size = f->pos;
 283                         f->mode = MODE_READ_EOF;
 284                         break;
 285                 } else if (bzerror != BZ_OK) {
 286                         Util_CatchBZ2Error(bzerror);
 287                         Py_DECREF(v);
 288                         return NULL;
 289                 }
 290                 if (c == '\n')
 291                         break;
 292                 /* Must be because buf == end */
 293                 if (n > 0)
 294                         break;
 295                 used_v_size = total_v_size;
 296                 increment = total_v_size >> 2; /* mild exponential growth */
 297                 total_v_size += increment;
 298                 if (total_v_size > INT_MAX) {
 299                         PyErr_SetString(PyExc_OverflowError,
 300                             "line is longer than a Python string can hold");
 301                         Py_DECREF(v);
 302                         return NULL;
 303                 }
 304                 if (_PyString_Resize(&v, total_v_size) < 0)
 305                         return NULL;
 306                 buf = BUF(v) + used_v_size;
 307                 end = BUF(v) + total_v_size;
 308         }
 309
 310         used_v_size = buf - BUF(v);
 311         if (used_v_size != total_v_size)
 312                 _PyString_Resize(&v, used_v_size);
 313         return v;
 314 }
 315
 316 /* This is a hacked version of Python's
 317  * fileobject.c:Py_UniversalNewlineFread(). */
 318 size_t
 319 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 320                      char* buf, size_t n, BZ2FileObject *f)
 321 {
 322         char *dst = buf;
 323         int newlinetypes, skipnextlf;
 324
 325         assert(buf != NULL);
 326         assert(stream != NULL);
 327
 328         if (!f->f_univ_newline)
 329                 return BZ2_bzRead(bzerror, stream, buf, n);
 330
 331         newlinetypes = f->f_newlinetypes;
 332         skipnextlf = f->f_skipnextlf;
 333
 334         /* Invariant:  n is the number of bytes remaining to be filled
 335          * in the buffer.
 336          */
 337         while (n) {
 338                 size_t nread;
 339                 int shortread;
 340                 char *src = dst;
 341
 342                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 343                 assert(nread <= n);
 344                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 345                 shortread = n != 0;     /* true iff EOF or error */
 346                 while (nread--) {
 347                         char c = *src++;
 348                         if (c == '\r') {
 349                                 /* Save as LF and set flag to skip next LF. */
 350                                 *dst++ = '\n';
 351                                 skipnextlf = 1;
 352                         }
 353                         else if (skipnextlf && c == '\n') {
 354                                 /* Skip LF, and remember we saw CR LF. */
 355                                 skipnextlf = 0;
 356                                 newlinetypes |= NEWLINE_CRLF;
 357                                 ++n;
 358                         }
 359                         else {
 360                                 /* Normal char to be stored in buffer.  Also
 361                                  * update the newlinetypes flag if either this
 362                                  * is an LF or the previous char was a CR.
 363                                  */
 364                                 if (c == '\n')
 365                                         newlinetypes |= NEWLINE_LF;
 366                                 else if (skipnextlf)
 367                                         newlinetypes |= NEWLINE_CR;
 368                                 *dst++ = c;
 369                                 skipnextlf = 0;
 370                         }
 371                 }
 372                 if (shortread) {
 373                         /* If this is EOF, update type flags. */
 374                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 375                                 newlinetypes |= NEWLINE_CR;
 376                         break;
 377                 }
 378         }
 379         f->f_newlinetypes = newlinetypes;
 380         f->f_skipnextlf = skipnextlf;
 381         return dst - buf;
 382 }
 383
 384 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 385 static void
 386 Util_DropReadAhead(BZ2FileObject *f)
 387 {
 388         if (f->f_buf != NULL) {
 389                 PyMem_Free(f->f_buf);
 390                 f->f_buf = NULL;
 391         }
 392 }
 393
 394 /* This is a hacked version of Python's fileobject.c:readahead(). */
 395 static int
 396 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 397 {
 398         int chunksize;
 399         int bzerror;
 400
 401         if (f->f_buf != NULL) {
 402                 if((f->f_bufend - f->f_bufptr) >= 1)
 403                         return 0;
 404                 else
 405                         Util_DropReadAhead(f);
 406         }
 407         if (f->mode == MODE_READ_EOF) {
 408                 return -1;
 409         }
 410         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 411                 return -1;
 412         }
 413         Py_BEGIN_ALLOW_THREADS
 414         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 415                                          bufsize, f);
 416         Py_END_ALLOW_THREADS
 417         f->pos += chunksize;
 418         if (bzerror == BZ_STREAM_END) {
 419                 f->size = f->pos;
 420                 f->mode = MODE_READ_EOF;
 421         } else if (bzerror != BZ_OK) {
 422                 Util_CatchBZ2Error(bzerror);
 423                 Util_DropReadAhead(f);
 424                 return -1;
 425         }
 426         f->f_bufptr = f->f_buf;
 427         f->f_bufend = f->f_buf + chunksize;
 428         return 0;
 429 }
 430
 431 /* This is a hacked version of Python's
 432  * fileobject.c:readahead_get_line_skip(). */
 433 static PyStringObject *
 434 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 435 {
 436         PyStringObject* s;
 437         char *bufptr;
 438         char *buf;
 439         int len;
 440
 441         if (f->f_buf == NULL)
 442                 if (Util_ReadAhead(f, bufsize) < 0)
 443                         return NULL;
 444
 445         len = f->f_bufend - f->f_bufptr;
 446         if (len == 0)
 447                 return (PyStringObject *)
 448                         PyString_FromStringAndSize(NULL, skip);
 449         bufptr = memchr(f->f_bufptr, '\n', len);
 450         if (bufptr != NULL) {
 451                 bufptr++;                       /* Count the '\n' */
 452                 len = bufptr - f->f_bufptr;
 453                 s = (PyStringObject *)
 454                         PyString_FromStringAndSize(NULL, skip+len);
 455                 if (s == NULL)
 456                         return NULL;
 457                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 458                 f->f_bufptr = bufptr;
 459                 if (bufptr == f->f_bufend)
 460                         Util_DropReadAhead(f);
 461         } else {
 462                 bufptr = f->f_bufptr;
 463                 buf = f->f_buf;
 464                 f->f_buf = NULL;        /* Force new readahead buffer */
 465                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 466                                               bufsize + (bufsize>>2));
 467                 if (s == NULL) {
 468                         PyMem_Free(buf);
 469                         return NULL;
 470                 }
 471                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 472                 PyMem_Free(buf);
 473         }
 474         return s;
 475 }
 476
 477 /* ===================================================================== */
 478 /* Methods of BZ2File. */
 479
 480 PyDoc_STRVAR(BZ2File_read__doc__,
 481 "read([size]) -> string\n\
 482 \n\
 483 Read at most size uncompressed bytes, returned as a string. If the size\n\
 484 argument is negative or omitted, read until EOF is reached.\n\
 485 ");
 486
 487 /* This is a hacked version of Python's fileobject.c:file_read(). */
 488 static PyObject *
 489 BZ2File_read(BZ2FileObject *self, PyObject *args)
 490 {
 491         long bytesrequested = -1;
 492         size_t bytesread, buffersize, chunksize;
 493         int bzerror;
 494         PyObject *ret = NULL;
 495
 496         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 497                 return NULL;
 498
 499         ACQUIRE_LOCK(self);
 500         switch (self->mode) {
 501                 case MODE_READ:
 502                         break;
 503                 case MODE_READ_EOF:
 504                         ret = PyString_FromString("");
 505                         goto cleanup;
 506                 case MODE_CLOSED:
 507                         PyErr_SetString(PyExc_ValueError,
 508                                         "I/O operation on closed file");
 509                         goto cleanup;
 510                 default:
 511                         PyErr_SetString(PyExc_IOError,
 512                                         "file is not ready for reading");
 513                         goto cleanup;
 514         }
 515
 516         if (bytesrequested < 0)
 517                 buffersize = Util_NewBufferSize((size_t)0);
 518         else
 519                 buffersize = bytesrequested;
 520         if (buffersize > INT_MAX) {
 521                 PyErr_SetString(PyExc_OverflowError,
 522                                 "requested number of bytes is "
 523                                 "more than a Python string can hold");
 524                 goto cleanup;
 525         }
 526         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 527         if (ret == NULL)
 528                 goto cleanup;
 529         bytesread = 0;
 530
 531         for (;;) {
 532                 Py_BEGIN_ALLOW_THREADS
 533                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 534                                                  BUF(ret)+bytesread,
 535                                                  buffersize-bytesread,
 536                                                  self);
 537                 self->pos += chunksize;
 538                 Py_END_ALLOW_THREADS
 539                 bytesread += chunksize;
 540                 if (bzerror == BZ_STREAM_END) {
 541                         self->size = self->pos;
 542                         self->mode = MODE_READ_EOF;
 543                         break;
 544                 } else if (bzerror != BZ_OK) {
 545                         Util_CatchBZ2Error(bzerror);
 546                         Py_DECREF(ret);
 547                         ret = NULL;
 548                         goto cleanup;
 549                 }
 550                 if (bytesrequested < 0) {
 551                         buffersize = Util_NewBufferSize(buffersize);
 552                         if (_PyString_Resize(&ret, buffersize) < 0)
 553                                 goto cleanup;
 554                 } else {
 555                         break;
 556                 }
 557         }
 558         if (bytesread != buffersize)
 559                 _PyString_Resize(&ret, bytesread);
 560
 561 cleanup:
 562         RELEASE_LOCK(self);
 563         return ret;
 564 }
 565
 566 PyDoc_STRVAR(BZ2File_readline__doc__,
 567 "readline([size]) -> string\n\
 568 \n\
 569 Return the next line from the file, as a string, retaining newline.\n\
 570 A non-negative size argument will limit the maximum number of bytes to\n\
 571 return (an incomplete line may be returned then). Return an empty\n\
 572 string at EOF.\n\
 573 ");
 574
 575 static PyObject *
 576 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 577 {
 578         PyObject *ret = NULL;
 579         int sizehint = -1;
 580
 581         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 582                 return NULL;
 583
 584         ACQUIRE_LOCK(self);
 585         switch (self->mode) {
 586                 case MODE_READ:
 587                         break;
 588                 case MODE_READ_EOF:
 589                         ret = PyString_FromString("");
 590                         goto cleanup;
 591                 case MODE_CLOSED:
 592                         PyErr_SetString(PyExc_ValueError,
 593                                         "I/O operation on closed file");
 594                         goto cleanup;
 595                 default:
 596                         PyErr_SetString(PyExc_IOError,
 597                                         "file is not ready for reading");
 598                         goto cleanup;
 599         }
 600
 601         if (sizehint == 0)
 602                 ret = PyString_FromString("");
 603         else
 604                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 605
 606 cleanup:
 607         RELEASE_LOCK(self);
 608         return ret;
 609 }
 610
 611 PyDoc_STRVAR(BZ2File_readlines__doc__,
 612 "readlines([size]) -> list\n\
 613 \n\
 614 Call readline() repeatedly and return a list of lines read.\n\
 615 The optional size argument, if given, is an approximate bound on the\n\
 616 total number of bytes in the lines returned.\n\
 617 ");
 618
 619 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 620 static PyObject *
 621 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 622 {
 623         long sizehint = 0;
 624         PyObject *list = NULL;
 625         PyObject *line;
 626         char small_buffer[SMALLCHUNK];
 627         char *buffer = small_buffer;
 628         size_t buffersize = SMALLCHUNK;
 629         PyObject *big_buffer = NULL;
 630         size_t nfilled = 0;
 631         size_t nread;
 632         size_t totalread = 0;
 633         char *p, *q, *end;
 634         int err;
 635         int shortread = 0;
 636         int bzerror;
 637
 638         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 639                 return NULL;
 640
 641         ACQUIRE_LOCK(self);
 642         switch (self->mode) {
 643                 case MODE_READ:
 644                         break;
 645                 case MODE_READ_EOF:
 646                         list = PyList_New(0);
 647                         goto cleanup;
 648                 case MODE_CLOSED:
 649                         PyErr_SetString(PyExc_ValueError,
 650                                         "I/O operation on closed file");
 651                         goto cleanup;
 652                 default:
 653                         PyErr_SetString(PyExc_IOError,
 654                                         "file is not ready for reading");
 655                         goto cleanup;
 656         }
 657
 658         if ((list = PyList_New(0)) == NULL)
 659                 goto cleanup;
 660
 661         for (;;) {
 662                 Py_BEGIN_ALLOW_THREADS
 663                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 664                                              buffer+nfilled,
 665                                              buffersize-nfilled, self);
 666                 self->pos += nread;
 667                 Py_END_ALLOW_THREADS
 668                 if (bzerror == BZ_STREAM_END) {
 669                         self->size = self->pos;
 670                         self->mode = MODE_READ_EOF;
 671                         if (nread == 0) {
 672                                 sizehint = 0;
 673                                 break;
 674                         }
 675                         shortread = 1;
 676                 } else if (bzerror != BZ_OK) {
 677                         Util_CatchBZ2Error(bzerror);
 678                   error:
 679                         Py_DECREF(list);
 680                         list = NULL;
 681                         goto cleanup;
 682                 }
 683                 totalread += nread;
 684                 p = memchr(buffer+nfilled, '\n', nread);
 685                 if (p == NULL) {
 686                         /* Need a larger buffer to fit this line */
 687                         nfilled += nread;
 688                         buffersize *= 2;
 689                         if (buffersize > INT_MAX) {
 690                                 PyErr_SetString(PyExc_OverflowError,
 691                             "line is longer than a Python string can hold");
 692                                 goto error;
 693                         }
 694                         if (big_buffer == NULL) {
 695                                 /* Create the big buffer */
 696                                 big_buffer = PyString_FromStringAndSize(
 697                                         NULL, buffersize);
 698                                 if (big_buffer == NULL)
 699                                         goto error;
 700                                 buffer = PyString_AS_STRING(big_buffer);
 701                                 memcpy(buffer, small_buffer, nfilled);
 702                         }
 703                         else {
 704                                 /* Grow the big buffer */
 705                                 _PyString_Resize(&big_buffer, buffersize);
 706                                 buffer = PyString_AS_STRING(big_buffer);
 707                         }
 708                         continue;
 709                 }
 710                 end = buffer+nfilled+nread;
 711                 q = buffer;
 712                 do {
 713                         /* Process complete lines */
 714                         p++;
 715                         line = PyString_FromStringAndSize(q, p-q);
 716                         if (line == NULL)
 717                                 goto error;
 718                         err = PyList_Append(list, line);
 719                         Py_DECREF(line);
 720                         if (err != 0)
 721                                 goto error;
 722                         q = p;
 723                         p = memchr(q, '\n', end-q);
 724                 } while (p != NULL);
 725                 /* Move the remaining incomplete line to the start */
 726                 nfilled = end-q;
 727                 memmove(buffer, q, nfilled);
 728                 if (sizehint > 0)
 729                         if (totalread >= (size_t)sizehint)
 730                                 break;
 731                 if (shortread) {
 732                         sizehint = 0;
 733                         break;
 734                 }
 735         }
 736         if (nfilled != 0) {
 737                 /* Partial last line */
 738                 line = PyString_FromStringAndSize(buffer, nfilled);
 739                 if (line == NULL)
 740                         goto error;
 741                 if (sizehint > 0) {
 742                         /* Need to complete the last line */
 743                         PyObject *rest = Util_GetLine(self, 0);
 744                         if (rest == NULL) {
 745                                 Py_DECREF(line);
 746                                 goto error;
 747                         }
 748                         PyString_Concat(&line, rest);
 749                         Py_DECREF(rest);
 750                         if (line == NULL)
 751                                 goto error;
 752                 }
 753                 err = PyList_Append(list, line);
 754                 Py_DECREF(line);
 755                 if (err != 0)
 756                         goto error;
 757         }
 758
 759   cleanup:
 760         RELEASE_LOCK(self);
 761         if (big_buffer) {
 762                 Py_DECREF(big_buffer);
 763         }
 764         return list;
 765 }
 766
 767 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 768 "xreadlines() -> self\n\
 769 \n\
 770 For backward compatibility. BZ2File objects now include the performance\n\
 771 optimizations previously implemented in the xreadlines module.\n\
 772 ");
 773
 774 PyDoc_STRVAR(BZ2File_write__doc__,
 775 "write(data) -> None\n\
 776 \n\
 777 Write the 'data' string to file. Note that due to buffering, close() may\n\
 778 be needed before the file on disk reflects the data written.\n\
 779 ");
 780
 781 /* This is a hacked version of Python's fileobject.c:file_write(). */
 782 static PyObject *
 783 BZ2File_write(BZ2FileObject *self, PyObject *args)
 784 {
 785         PyObject *ret = NULL;
 786         char *buf;
 787         int len;
 788         int bzerror;
 789
 790         if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
 791                 return NULL;
 792
 793         ACQUIRE_LOCK(self);
 794         switch (self->mode) {
 795                 case MODE_WRITE:
 796                         break;
 797
 798                 case MODE_CLOSED:
 799                         PyErr_SetString(PyExc_ValueError,
 800                                         "I/O operation on closed file");
 801                         goto cleanup;;
 802
 803                 default:
 804                         PyErr_SetString(PyExc_IOError,
 805                                         "file is not ready for writing");
 806                         goto cleanup;;
 807         }
 808
 809         self->f_softspace = 0;
 810
 811         Py_BEGIN_ALLOW_THREADS
 812         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 813         self->pos += len;
 814         Py_END_ALLOW_THREADS
 815
 816         if (bzerror != BZ_OK) {
 817                 Util_CatchBZ2Error(bzerror);
 818                 goto cleanup;
 819         }
 820
 821         Py_INCREF(Py_None);
 822         ret = Py_None;
 823
 824 cleanup:
 825         RELEASE_LOCK(self);
 826         return ret;
 827 }
 828
 829 PyDoc_STRVAR(BZ2File_writelines__doc__,
 830 "writelines(sequence_of_strings) -> None\n\
 831 \n\
 832 Write the sequence of strings to the file. Note that newlines are not\n\
 833 added. The sequence can be any iterable object producing strings. This is\n\
 834 equivalent to calling write() for each string.\n\
 835 ");
 836
 837 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 838 static PyObject *
 839 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 840 {
 841 #define CHUNKSIZE 1000
 842         PyObject *list = NULL;
 843         PyObject *iter = NULL;
 844         PyObject *ret = NULL;
 845         PyObject *line;
 846         int i, j, index, len, islist;
 847         int bzerror;
 848
 849         ACQUIRE_LOCK(self);
 850         islist = PyList_Check(seq);
 851         if  (!islist) {
 852                 iter = PyObject_GetIter(seq);
 853                 if (iter == NULL) {
 854                         PyErr_SetString(PyExc_TypeError,
 855                                 "writelines() requires an iterable argument");
 856                         goto error;
 857                 }
 858                 list = PyList_New(CHUNKSIZE);
 859                 if (list == NULL)
 860                         goto error;
 861         }
 862
 863         /* Strategy: slurp CHUNKSIZE lines into a private list,
 864            checking that they are all strings, then write that list
 865            without holding the interpreter lock, then come back for more. */
 866         for (index = 0; ; index += CHUNKSIZE) {
 867                 if (islist) {
 868                         Py_XDECREF(list);
 869                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 870                         if (list == NULL)
 871                                 goto error;
 872                         j = PyList_GET_SIZE(list);
 873                 }
 874                 else {
 875                         for (j = 0; j < CHUNKSIZE; j++) {
 876                                 line = PyIter_Next(iter);
 877                                 if (line == NULL) {
 878                                         if (PyErr_Occurred())
 879                                                 goto error;
 880                                         break;
 881                                 }
 882                                 PyList_SetItem(list, j, line);
 883                         }
 884                 }
 885                 if (j == 0)
 886                         break;
 887
 888                 /* Check that all entries are indeed strings. If not,
 889                    apply the same rules as for file.write() and
 890                    convert the rets to strings. This is slow, but
 891                    seems to be the only way since all conversion APIs
 892                    could potentially execute Python code. */
 893                 for (i = 0; i < j; i++) {
 894                         PyObject *v = PyList_GET_ITEM(list, i);
 895                         if (!PyString_Check(v)) {
 896                                 const char *buffer;
 897                                 int len;
 898                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 899                                         PyErr_SetString(PyExc_TypeError,
 900                                                         "writelines() "
 901                                                         "argument must be "
 902                                                         "a sequence of "
 903                                                         "strings");
 904                                         goto error;
 905                                 }
 906                                 line = PyString_FromStringAndSize(buffer,
 907                                                                   len);
 908                                 if (line == NULL)
 909                                         goto error;
 910                                 Py_DECREF(v);
 911                                 PyList_SET_ITEM(list, i, line);
 912                         }
 913                 }
 914
 915                 self->f_softspace = 0;
 916
 917                 /* Since we are releasing the global lock, the
 918                    following code may *not* execute Python code. */
 919                 Py_BEGIN_ALLOW_THREADS
 920                 for (i = 0; i < j; i++) {
 921                         line = PyList_GET_ITEM(list, i);
 922                         len = PyString_GET_SIZE(line);
 923                         BZ2_bzWrite (&bzerror, self->fp,
 924                                      PyString_AS_STRING(line), len);
 925                         if (bzerror != BZ_OK) {
 926                                 Py_BLOCK_THREADS
 927                                 Util_CatchBZ2Error(bzerror);
 928                                 goto error;
 929                         }
 930                 }
 931                 Py_END_ALLOW_THREADS
 932
 933                 if (j < CHUNKSIZE)
 934                         break;
 935         }
 936
 937         Py_INCREF(Py_None);
 938         ret = Py_None;
 939
 940   error:
 941         RELEASE_LOCK(self);
 942         Py_XDECREF(list);
 943         Py_XDECREF(iter);
 944         return ret;
 945 #undef CHUNKSIZE
 946 }
 947
 948 PyDoc_STRVAR(BZ2File_seek__doc__,
 949 "seek(offset [, whence]) -> None\n\
 950 \n\
 951 Move to new file position. Argument offset is a byte count. Optional\n\
 952 argument whence defaults to 0 (offset from start of file, offset\n\
 953 should be >= 0); other values are 1 (move relative to current position,\n\
 954 positive or negative), and 2 (move relative to end of file, usually\n\
 955 negative, although many platforms allow seeking beyond the end of a file).\n\
 956 \n\
 957 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 958 the operation may be extremely slow.\n\
 959 ");
 960
 961 static PyObject *
 962 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 963 {
 964         int where = 0;
 965         long offset;
 966         char small_buffer[SMALLCHUNK];
 967         char *buffer = small_buffer;
 968         size_t buffersize = SMALLCHUNK;
 969         int bytesread = 0;
 970         int readsize;
 971         int chunksize;
 972         int bzerror;
 973         int rewind = 0;
 974         PyObject *ret = NULL;
 975
 976         if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
 977                 return NULL;
 978
 979         ACQUIRE_LOCK(self);
 980         Util_DropReadAhead(self);
 981         switch (self->mode) {
 982                 case MODE_READ:
 983                 case MODE_READ_EOF:
 984                         break;
 985
 986                 case MODE_CLOSED:
 987                         PyErr_SetString(PyExc_ValueError,
 988                                         "I/O operation on closed file");
 989                         goto cleanup;;
 990
 991                 default:
 992                         PyErr_SetString(PyExc_IOError,
 993                                         "seek works only while reading");
 994                         goto cleanup;;
 995         }
 996
 997         if (offset < 0) {
 998                 if (where == 1) {
 999                         offset = self->pos + offset;
1000                         rewind = 1;
1001                 } else if (where == 2) {
1002                         if (self->size == -1) {
1003                                 assert(self->mode != MODE_READ_EOF);
1004                                 for (;;) {
1005                                         Py_BEGIN_ALLOW_THREADS
1006                                         chunksize = Util_UnivNewlineRead(
1007                                                         &bzerror, self->fp,
1008                                                         buffer, buffersize,
1009                                                         self);
1010                                         self->pos += chunksize;
1011                                         Py_END_ALLOW_THREADS
1012
1013                                         bytesread += chunksize;
1014                                         if (bzerror == BZ_STREAM_END) {
1015                                                 break;
1016                                         } else if (bzerror != BZ_OK) {
1017                                                 Util_CatchBZ2Error(bzerror);
1018                                                 goto cleanup;
1019                                         }
1020                                 }
1021                                 self->mode = MODE_READ_EOF;
1022                                 self->size = self->pos;
1023                                 bytesread = 0;
1024                         }
1025                         offset = self->size + offset;
1026                         if (offset >= self->pos)
1027                                 offset -= self->pos;
1028                         else
1029                                 rewind = 1;
1030                 }
1031                 if (offset < 0)
1032                         offset = 0;
1033         } else if (where == 0) {
1034                 if (offset >= self->pos)
1035                         offset -= self->pos;
1036                 else
1037                         rewind = 1;
1038         }
1039
1040         if (rewind) {
1041                 BZ2_bzReadClose(&bzerror, self->fp);
1042                 if (bzerror != BZ_OK) {
1043                         Util_CatchBZ2Error(bzerror);
1044                         goto cleanup;
1045                 }
1046                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1047                 if (!ret)
1048                         goto cleanup;
1049                 Py_DECREF(ret);
1050                 ret = NULL;
1051                 self->pos = 0;
1052                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1053                                           0, 0, NULL, 0);
1054                 if (bzerror != BZ_OK) {
1055                         Util_CatchBZ2Error(bzerror);
1056                         goto cleanup;
1057                 }
1058                 self->mode = MODE_READ;
1059         } else if (self->mode == MODE_READ_EOF) {
1060                 goto exit;
1061         }
1062
1063         if (offset == 0)
1064                 goto exit;
1065
1066         /* Before getting here, offset must be set to the number of bytes
1067          * to walk forward. */
1068         for (;;) {
1069                 if ((size_t)offset-bytesread > buffersize)
1070                         readsize = buffersize;
1071                 else
1072                         readsize = offset-bytesread;
1073                 Py_BEGIN_ALLOW_THREADS
1074                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1075                                                  buffer, readsize, self);
1076                 self->pos += chunksize;
1077                 Py_END_ALLOW_THREADS
1078                 bytesread += chunksize;
1079                 if (bzerror == BZ_STREAM_END) {
1080                         self->size = self->pos;
1081                         self->mode = MODE_READ_EOF;
1082                         break;
1083                 } else if (bzerror != BZ_OK) {
1084                         Util_CatchBZ2Error(bzerror);
1085                         goto cleanup;
1086                 }
1087                 if (bytesread == offset)
1088                         break;
1089         }
1090
1091 exit:
1092         Py_INCREF(Py_None);
1093         ret = Py_None;
1094
1095 cleanup:
1096         RELEASE_LOCK(self);
1097         return ret;
1098 }
1099
1100 PyDoc_STRVAR(BZ2File_tell__doc__,
1101 "tell() -> int\n\
1102 \n\
1103 Return the current file position, an integer (may be a long integer).\n\
1104 ");
1105
1106 static PyObject *
1107 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1108 {
1109         PyObject *ret = NULL;
1110
1111         if (self->mode == MODE_CLOSED) {
1112                 PyErr_SetString(PyExc_ValueError,
1113                                 "I/O operation on closed file");
1114                 goto cleanup;
1115         }
1116
1117         ret = PyInt_FromLong(self->pos);
1118
1119 cleanup:
1120         return ret;
1121 }
1122
1123 PyDoc_STRVAR(BZ2File_close__doc__,
1124 "close() -> None or (perhaps) an integer\n\
1125 \n\
1126 Close the file. Sets data attribute .closed to true. A closed file\n\
1127 cannot be used for further I/O operations. close() may be called more\n\
1128 than once without error.\n\
1129 ");
1130
1131 static PyObject *
1132 BZ2File_close(BZ2FileObject *self)
1133 {
1134         PyObject *ret = NULL;
1135         int bzerror = BZ_OK;
1136
1137         ACQUIRE_LOCK(self);
1138         switch (self->mode) {
1139                 case MODE_READ:
1140                 case MODE_READ_EOF:
1141                         BZ2_bzReadClose(&bzerror, self->fp);
1142                         break;
1143                 case MODE_WRITE:
1144                         BZ2_bzWriteClose(&bzerror, self->fp,
1145                                          0, NULL, NULL);
1146                         break;
1147         }
1148         self->mode = MODE_CLOSED;
1149         ret = PyObject_CallMethod(self->file, "close", NULL);
1150         if (bzerror != BZ_OK) {
1151                 Util_CatchBZ2Error(bzerror);
1152                 Py_XDECREF(ret);
1153                 ret = NULL;
1154         }
1155
1156         RELEASE_LOCK(self);
1157         return ret;
1158 }
1159
1160 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1161
1162 static PyMethodDef BZ2File_methods[] = {
1163         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1164         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1165         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1166         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1167         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1168         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1169         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1170         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1171         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1172         {NULL,          NULL}           /* sentinel */
1173 };
1174
1175
1176 /* ===================================================================== */
1177 /* Getters and setters of BZ2File. */
1178
1179 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1180 static PyObject *
1181 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1182 {
1183         switch (self->f_newlinetypes) {
1184         case NEWLINE_UNKNOWN:
1185                 Py_INCREF(Py_None);
1186                 return Py_None;
1187         case NEWLINE_CR:
1188                 return PyString_FromString("\r");
1189         case NEWLINE_LF:
1190                 return PyString_FromString("\n");
1191         case NEWLINE_CR|NEWLINE_LF:
1192                 return Py_BuildValue("(ss)", "\r", "\n");
1193         case NEWLINE_CRLF:
1194                 return PyString_FromString("\r\n");
1195         case NEWLINE_CR|NEWLINE_CRLF:
1196                 return Py_BuildValue("(ss)", "\r", "\r\n");
1197         case NEWLINE_LF|NEWLINE_CRLF:
1198                 return Py_BuildValue("(ss)", "\n", "\r\n");
1199         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1200                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1201         default:
1202                 PyErr_Format(PyExc_SystemError,
1203                              "Unknown newlines value 0x%x\n",
1204                              self->f_newlinetypes);
1205                 return NULL;
1206         }
1207 }
1208
1209 static PyObject *
1210 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1211 {
1212         return PyInt_FromLong(self->mode == MODE_CLOSED);
1213 }
1214
1215 static PyObject *
1216 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1217 {
1218         return PyObject_GetAttrString(self->file, "mode");
1219 }
1220
1221 static PyObject *
1222 BZ2File_get_name(BZ2FileObject *self, void *closure)
1223 {
1224         return PyObject_GetAttrString(self->file, "name");
1225 }
1226
1227 static PyGetSetDef BZ2File_getset[] = {
1228         {"closed", (getter)BZ2File_get_closed, NULL,
1229                         "True if the file is closed"},
1230         {"newlines", (getter)BZ2File_get_newlines, NULL,
1231                         "end-of-line convention used in this file"},
1232         {"mode", (getter)BZ2File_get_mode, NULL,
1233                         "file mode ('r', 'w', or 'U')"},
1234         {"name", (getter)BZ2File_get_name, NULL,
1235                         "file name"},
1236         {NULL}  /* Sentinel */
1237 };
1238
1239
1240 /* ===================================================================== */
1241 /* Members of BZ2File_Type. */
1242
1243 #undef OFF
1244 #define OFF(x) offsetof(BZ2FileObject, x)
1245
1246 static PyMemberDef BZ2File_members[] = {
1247         {"softspace",   T_INT,          OFF(f_softspace), 0,
1248          "flag indicating that a space needs to be printed; used by print"},
1249         {NULL}  /* Sentinel */
1250 };
1251
1252 /* ===================================================================== */
1253 /* Slot definitions for BZ2File_Type. */
1254
1255 static int
1256 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1257 {
1258         static char *kwlist[] = {"filename", "mode", "buffering",
1259                                  "compresslevel", 0};
1260         PyObject *name;
1261         char *mode = "r";
1262         int buffering = -1;
1263         int compresslevel = 9;
1264         int bzerror;
1265         int mode_char = 0;
1266
1267         self->size = -1;
1268
1269         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1270                                          kwlist, &name, &mode, &buffering,
1271                                          &compresslevel))
1272                 return -1;
1273
1274         if (compresslevel < 1 || compresslevel > 9) {
1275                 PyErr_SetString(PyExc_ValueError,
1276                                 "compresslevel must be between 1 and 9");
1277                 return -1;
1278         }
1279
1280         for (;;) {
1281                 int error = 0;
1282                 switch (*mode) {
1283                         case 'r':
1284                         case 'w':
1285                                 if (mode_char)
1286                                         error = 1;
1287                                 mode_char = *mode;
1288                                 break;
1289
1290                         case 'b':
1291                                 break;
1292
1293                         case 'U':
1294                                 self->f_univ_newline = 1;
1295                                 break;
1296
1297                         default:
1298                                 error = 1;
1299                                 break;
1300                 }
1301                 if (error) {
1302                         PyErr_Format(PyExc_ValueError,
1303                                      "invalid mode char %c", *mode);
1304                         return -1;
1305                 }
1306                 mode++;
1307                 if (*mode == '\0')
1308                         break;
1309         }
1310
1311         mode = (mode_char == 'r') ? "rb" : "wb";
1312
1313         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1314                                            name, mode, buffering);
1315         if (self->file == NULL)
1316                 return -1;
1317
1318         /* From now on, we have stuff to dealloc, so jump to error label
1319          * instead of returning */
1320
1321 #ifdef WITH_THREAD
1322         self->lock = PyThread_allocate_lock();
1323         if (!self->lock)
1324                 goto error;
1325 #endif
1326
1327         if (mode_char == 'r')
1328                 self->fp = BZ2_bzReadOpen(&bzerror,
1329                                           PyFile_AsFile(self->file),
1330                                           0, 0, NULL, 0);
1331         else
1332                 self->fp = BZ2_bzWriteOpen(&bzerror,
1333                                            PyFile_AsFile(self->file),
1334                                            compresslevel, 0, 0);
1335
1336         if (bzerror != BZ_OK) {
1337                 Util_CatchBZ2Error(bzerror);
1338                 goto error;
1339         }
1340
1341         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1342
1343         return 0;
1344
1345 error:
1346         Py_DECREF(self->file);
1347 #ifdef WITH_THREAD
1348         if (self->lock)
1349                 PyThread_free_lock(self->lock);
1350 #endif
1351         return -1;
1352 }
1353
1354 static void
1355 BZ2File_dealloc(BZ2FileObject *self)
1356 {
1357         int bzerror;
1358 #ifdef WITH_THREAD
1359         if (self->lock)
1360                 PyThread_free_lock(self->lock);
1361 #endif
1362         switch (self->mode) {
1363                 case MODE_READ:
1364                 case MODE_READ_EOF:
1365                         BZ2_bzReadClose(&bzerror, self->fp);
1366                         break;
1367                 case MODE_WRITE:
1368                         BZ2_bzWriteClose(&bzerror, self->fp,
1369                                          0, NULL, NULL);
1370                         break;
1371         }
1372         Util_DropReadAhead(self);
1373         Py_XDECREF(self->file);
1374         self->ob_type->tp_free((PyObject *)self);
1375 }
1376
1377 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1378 static PyObject *
1379 BZ2File_getiter(BZ2FileObject *self)
1380 {
1381         if (self->mode == MODE_CLOSED) {
1382                 PyErr_SetString(PyExc_ValueError,
1383                                 "I/O operation on closed file");
1384                 return NULL;
1385         }
1386         Py_INCREF((PyObject*)self);
1387         return (PyObject *)self;
1388 }
1389
1390 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1391 #define READAHEAD_BUFSIZE 8192
1392 static PyObject *
1393 BZ2File_iternext(BZ2FileObject *self)
1394 {
1395         PyStringObject* ret;
1396         ACQUIRE_LOCK(self);
1397         if (self->mode == MODE_CLOSED) {
1398                 PyErr_SetString(PyExc_ValueError,
1399                                 "I/O operation on closed file");
1400                 return NULL;
1401         }
1402         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1403         RELEASE_LOCK(self);
1404         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1405                 Py_XDECREF(ret);
1406                 return NULL;
1407         }
1408         return (PyObject *)ret;
1409 }
1410
1411 /* ===================================================================== */
1412 /* BZ2File_Type definition. */
1413
1414 PyDoc_VAR(BZ2File__doc__) =
1415 PyDoc_STR(
1416 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1417 \n\
1418 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1419 writing. When opened for writing, the file will be created if it doesn't\n\
1420 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1421 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1422 is given, must be a number between 1 and 9.\n\
1423 ")
1424 PyDoc_STR(
1425 "\n\
1426 Add a 'U' to mode to open the file for input with universal newline\n\
1427 support. Any line ending in the input file will be seen as a '\\n' in\n\
1428 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1429 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1430 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1431 newlines are available only when reading.\n\
1432 ")
1433 ;
1434
1435 static PyTypeObject BZ2File_Type = {
1436         PyObject_HEAD_INIT(NULL)
1437         0,                      /*ob_size*/
1438         "bz2.BZ2File",          /*tp_name*/
1439         sizeof(BZ2FileObject),  /*tp_basicsize*/
1440         0,                      /*tp_itemsize*/
1441         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1442         0,                      /*tp_print*/
1443         0,                      /*tp_getattr*/
1444         0,                      /*tp_setattr*/
1445         0,                      /*tp_compare*/
1446         0,                      /*tp_repr*/
1447         0,                      /*tp_as_number*/
1448         0,                      /*tp_as_sequence*/
1449         0,                      /*tp_as_mapping*/
1450         0,                      /*tp_hash*/
1451         0,                      /*tp_call*/
1452         0,                      /*tp_str*/
1453         PyObject_GenericGetAttr,/*tp_getattro*/
1454         PyObject_GenericSetAttr,/*tp_setattro*/
1455         0,                      /*tp_as_buffer*/
1456         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1457         BZ2File__doc__,         /*tp_doc*/
1458         0,                      /*tp_traverse*/
1459         0,                      /*tp_clear*/
1460         0,                      /*tp_richcompare*/
1461         0,                      /*tp_weaklistoffset*/
1462         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1463         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1464         BZ2File_methods,        /*tp_methods*/
1465         BZ2File_members,        /*tp_members*/
1466         BZ2File_getset,         /*tp_getset*/
1467         0,                      /*tp_base*/
1468         0,                      /*tp_dict*/
1469         0,                      /*tp_descr_get*/
1470         0,                      /*tp_descr_set*/
1471         0,                      /*tp_dictoffset*/
1472         (initproc)BZ2File_init, /*tp_init*/
1473         PyType_GenericAlloc,    /*tp_alloc*/
1474         PyType_GenericNew,      /*tp_new*/
1475         _PyObject_Del,          /*tp_free*/
1476         0,                      /*tp_is_gc*/
1477 };
1478
1479
1480 /* ===================================================================== */
1481 /* Methods of BZ2Comp. */
1482
1483 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1484 "compress(data) -> string\n\
1485 \n\
1486 Provide more data to the compressor object. It will return chunks of\n\
1487 compressed data whenever possible. When you've finished providing data\n\
1488 to compress, call the flush() method to finish the compression process,\n\
1489 and return what is left in the internal buffers.\n\
1490 ");
1491
1492 static PyObject *
1493 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1494 {
1495         char *data;
1496         int datasize;
1497         int bufsize = SMALLCHUNK;
1498         PY_LONG_LONG totalout;
1499         PyObject *ret = NULL;
1500         bz_stream *bzs = &self->bzs;
1501         int bzerror;
1502
1503         if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1504                 return NULL;
1505
1506         if (datasize == 0)
1507                 return PyString_FromString("");
1508
1509         ACQUIRE_LOCK(self);
1510         if (!self->running) {
1511                 PyErr_SetString(PyExc_ValueError,
1512                                 "this object was already flushed");
1513                 goto error;
1514         }
1515
1516         ret = PyString_FromStringAndSize(NULL, bufsize);
1517         if (!ret)
1518                 goto error;
1519
1520         bzs->next_in = data;
1521         bzs->avail_in = datasize;
1522         bzs->next_out = BUF(ret);
1523         bzs->avail_out = bufsize;
1524
1525         totalout = BZS_TOTAL_OUT(bzs);
1526
1527         for (;;) {
1528                 Py_BEGIN_ALLOW_THREADS
1529                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1530                 Py_END_ALLOW_THREADS
1531                 if (bzerror != BZ_RUN_OK) {
1532                         Util_CatchBZ2Error(bzerror);
1533                         goto error;
1534                 }
1535                 if (bzs->avail_out == 0) {
1536                         bufsize = Util_NewBufferSize(bufsize);
1537                         if (_PyString_Resize(&ret, bufsize) < 0) {
1538                                 BZ2_bzCompressEnd(bzs);
1539                                 goto error;
1540                         }
1541                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1542                                                     - totalout);
1543                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1544                 } else if (bzs->avail_in == 0) {
1545                         break;
1546                 }
1547         }
1548
1549         _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1550
1551         RELEASE_LOCK(self);
1552         return ret;
1553
1554 error:
1555         RELEASE_LOCK(self);
1556         Py_XDECREF(ret);
1557         return NULL;
1558 }
1559
1560 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1561 "flush() -> string\n\
1562 \n\
1563 Finish the compression process and return what is left in internal buffers.\n\
1564 You must not use the compressor object after calling this method.\n\
1565 ");
1566
1567 static PyObject *
1568 BZ2Comp_flush(BZ2CompObject *self)
1569 {
1570         int bufsize = SMALLCHUNK;
1571         PyObject *ret = NULL;
1572         bz_stream *bzs = &self->bzs;
1573         PY_LONG_LONG totalout;
1574         int bzerror;
1575
1576         ACQUIRE_LOCK(self);
1577         if (!self->running) {
1578                 PyErr_SetString(PyExc_ValueError, "object was already "
1579                                                   "flushed");
1580                 goto error;
1581         }
1582         self->running = 0;
1583
1584         ret = PyString_FromStringAndSize(NULL, bufsize);
1585         if (!ret)
1586                 goto error;
1587
1588         bzs->next_out = BUF(ret);
1589         bzs->avail_out = bufsize;
1590
1591         totalout = BZS_TOTAL_OUT(bzs);
1592
1593         for (;;) {
1594                 Py_BEGIN_ALLOW_THREADS
1595                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1596                 Py_END_ALLOW_THREADS
1597                 if (bzerror == BZ_STREAM_END) {
1598                         break;
1599                 } else if (bzerror != BZ_FINISH_OK) {
1600                         Util_CatchBZ2Error(bzerror);
1601                         goto error;
1602                 }
1603                 if (bzs->avail_out == 0) {
1604                         bufsize = Util_NewBufferSize(bufsize);
1605                         if (_PyString_Resize(&ret, bufsize) < 0)
1606                                 goto error;
1607                         bzs->next_out = BUF(ret);
1608                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1609                                                     - totalout);
1610                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1611                 }
1612         }
1613
1614         if (bzs->avail_out != 0)
1615                 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1616
1617         RELEASE_LOCK(self);
1618         return ret;
1619
1620 error:
1621         RELEASE_LOCK(self);
1622         Py_XDECREF(ret);
1623         return NULL;
1624 }
1625
1626 static PyMethodDef BZ2Comp_methods[] = {
1627         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1628          BZ2Comp_compress__doc__},
1629         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1630          BZ2Comp_flush__doc__},
1631         {NULL,          NULL}           /* sentinel */
1632 };
1633
1634
1635 /* ===================================================================== */
1636 /* Slot definitions for BZ2Comp_Type. */
1637
1638 static int
1639 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1640 {
1641         int compresslevel = 9;
1642         int bzerror;
1643         static char *kwlist[] = {"compresslevel", 0};
1644
1645         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1646                                          kwlist, &compresslevel))
1647                 return -1;
1648
1649         if (compresslevel < 1 || compresslevel > 9) {
1650                 PyErr_SetString(PyExc_ValueError,
1651                                 "compresslevel must be between 1 and 9");
1652                 goto error;
1653         }
1654
1655 #ifdef WITH_THREAD
1656         self->lock = PyThread_allocate_lock();
1657         if (!self->lock)
1658                 goto error;
1659 #endif
1660
1661         memset(&self->bzs, 0, sizeof(bz_stream));
1662         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1663         if (bzerror != BZ_OK) {
1664                 Util_CatchBZ2Error(bzerror);
1665                 goto error;
1666         }
1667
1668         self->running = 1;
1669
1670         return 0;
1671 error:
1672 #ifdef WITH_THREAD
1673         if (self->lock)
1674                 PyThread_free_lock(self->lock);
1675 #endif
1676         return -1;
1677 }
1678
1679 static void
1680 BZ2Comp_dealloc(BZ2CompObject *self)
1681 {
1682 #ifdef WITH_THREAD
1683         if (self->lock)
1684                 PyThread_free_lock(self->lock);
1685 #endif
1686         BZ2_bzCompressEnd(&self->bzs);
1687         self->ob_type->tp_free((PyObject *)self);
1688 }
1689
1690
1691 /* ===================================================================== */
1692 /* BZ2Comp_Type definition. */
1693
1694 PyDoc_STRVAR(BZ2Comp__doc__,
1695 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1696 \n\
1697 Create a new compressor object. This object may be used to compress\n\
1698 data sequentially. If you want to compress data in one shot, use the\n\
1699 compress() function instead. The compresslevel parameter, if given,\n\
1700 must be a number between 1 and 9.\n\
1701 ");
1702
1703 static PyTypeObject BZ2Comp_Type = {
1704         PyObject_HEAD_INIT(NULL)
1705         0,                      /*ob_size*/
1706         "bz2.BZ2Compressor",    /*tp_name*/
1707         sizeof(BZ2CompObject),  /*tp_basicsize*/
1708         0,                      /*tp_itemsize*/
1709         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1710         0,                      /*tp_print*/
1711         0,                      /*tp_getattr*/
1712         0,                      /*tp_setattr*/
1713         0,                      /*tp_compare*/
1714         0,                      /*tp_repr*/
1715         0,                      /*tp_as_number*/
1716         0,                      /*tp_as_sequence*/
1717         0,                      /*tp_as_mapping*/
1718         0,                      /*tp_hash*/
1719         0,                      /*tp_call*/
1720         0,                      /*tp_str*/
1721         PyObject_GenericGetAttr,/*tp_getattro*/
1722         PyObject_GenericSetAttr,/*tp_setattro*/
1723         0,                      /*tp_as_buffer*/
1724         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1725         BZ2Comp__doc__,         /*tp_doc*/
1726         0,                      /*tp_traverse*/
1727         0,                      /*tp_clear*/
1728         0,                      /*tp_richcompare*/
1729         0,                      /*tp_weaklistoffset*/
1730         0,                      /*tp_iter*/
1731         0,                      /*tp_iternext*/
1732         BZ2Comp_methods,        /*tp_methods*/
1733         0,                      /*tp_members*/
1734         0,                      /*tp_getset*/
1735         0,                      /*tp_base*/
1736         0,                      /*tp_dict*/
1737         0,                      /*tp_descr_get*/
1738         0,                      /*tp_descr_set*/
1739         0,                      /*tp_dictoffset*/
1740         (initproc)BZ2Comp_init, /*tp_init*/
1741         PyType_GenericAlloc,    /*tp_alloc*/
1742         PyType_GenericNew,      /*tp_new*/
1743         _PyObject_Del,          /*tp_free*/
1744         0,                      /*tp_is_gc*/
1745 };
1746
1747
1748 /* ===================================================================== */
1749 /* Members of BZ2Decomp. */
1750
1751 #undef OFF
1752 #define OFF(x) offsetof(BZ2DecompObject, x)
1753
1754 static PyMemberDef BZ2Decomp_members[] = {
1755         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1756         {NULL}  /* Sentinel */
1757 };
1758
1759
1760 /* ===================================================================== */
1761 /* Methods of BZ2Decomp. */
1762
1763 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1764 "decompress(data) -> string\n\
1765 \n\
1766 Provide more data to the decompressor object. It will return chunks\n\
1767 of decompressed data whenever possible. If you try to decompress data\n\
1768 after the end of stream is found, EOFError will be raised. If any data\n\
1769 was found after the end of stream, it'll be ignored and saved in\n\
1770 unused_data attribute.\n\
1771 ");
1772
1773 static PyObject *
1774 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1775 {
1776         char *data;
1777         int datasize;
1778         int bufsize = SMALLCHUNK;
1779         PY_LONG_LONG totalout;
1780         PyObject *ret = NULL;
1781         bz_stream *bzs = &self->bzs;
1782         int bzerror;
1783
1784         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1785                 return NULL;
1786
1787         ACQUIRE_LOCK(self);
1788         if (!self->running) {
1789                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1790                                                 "already found");
1791                 goto error;
1792         }
1793
1794         ret = PyString_FromStringAndSize(NULL, bufsize);
1795         if (!ret)
1796                 goto error;
1797
1798         bzs->next_in = data;
1799         bzs->avail_in = datasize;
1800         bzs->next_out = BUF(ret);
1801         bzs->avail_out = bufsize;
1802
1803         totalout = BZS_TOTAL_OUT(bzs);
1804
1805         for (;;) {
1806                 Py_BEGIN_ALLOW_THREADS
1807                 bzerror = BZ2_bzDecompress(bzs);
1808                 Py_END_ALLOW_THREADS
1809                 if (bzerror == BZ_STREAM_END) {
1810                         if (bzs->avail_in != 0) {
1811                                 Py_DECREF(self->unused_data);
1812                                 self->unused_data =
1813                                     PyString_FromStringAndSize(bzs->next_in,
1814                                                                bzs->avail_in);
1815                         }
1816                         self->running = 0;
1817                         break;
1818                 }
1819                 if (bzerror != BZ_OK) {
1820                         Util_CatchBZ2Error(bzerror);
1821                         goto error;
1822                 }
1823                 if (bzs->avail_out == 0) {
1824                         bufsize = Util_NewBufferSize(bufsize);
1825                         if (_PyString_Resize(&ret, bufsize) < 0) {
1826                                 BZ2_bzDecompressEnd(bzs);
1827                                 goto error;
1828                         }
1829                         bzs->next_out = BUF(ret);
1830                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1831                                                     - totalout);
1832                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1833                 } else if (bzs->avail_in == 0) {
1834                         break;
1835                 }
1836         }
1837
1838         if (bzs->avail_out != 0)
1839                 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1840
1841         RELEASE_LOCK(self);
1842         return ret;
1843
1844 error:
1845         RELEASE_LOCK(self);
1846         Py_XDECREF(ret);
1847         return NULL;
1848 }
1849
1850 static PyMethodDef BZ2Decomp_methods[] = {
1851         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1852         {NULL,          NULL}           /* sentinel */
1853 };
1854
1855
1856 /* ===================================================================== */
1857 /* Slot definitions for BZ2Decomp_Type. */
1858
1859 static int
1860 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1861 {
1862         int bzerror;
1863
1864         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1865                 return -1;
1866
1867 #ifdef WITH_THREAD
1868         self->lock = PyThread_allocate_lock();
1869         if (!self->lock)
1870                 goto error;
1871 #endif
1872
1873         self->unused_data = PyString_FromString("");
1874         if (!self->unused_data)
1875                 goto error;
1876
1877         memset(&self->bzs, 0, sizeof(bz_stream));
1878         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1879         if (bzerror != BZ_OK) {
1880                 Util_CatchBZ2Error(bzerror);
1881                 goto error;
1882         }
1883
1884         self->running = 1;
1885
1886         return 0;
1887
1888 error:
1889 #ifdef WITH_THREAD
1890         if (self->lock)
1891                 PyThread_free_lock(self->lock);
1892 #endif
1893         Py_XDECREF(self->unused_data);
1894         return -1;
1895 }
1896
1897 static void
1898 BZ2Decomp_dealloc(BZ2DecompObject *self)
1899 {
1900 #ifdef WITH_THREAD
1901         if (self->lock)
1902                 PyThread_free_lock(self->lock);
1903 #endif
1904         Py_XDECREF(self->unused_data);
1905         BZ2_bzDecompressEnd(&self->bzs);
1906         self->ob_type->tp_free((PyObject *)self);
1907 }
1908
1909
1910 /* ===================================================================== */
1911 /* BZ2Decomp_Type definition. */
1912
1913 PyDoc_STRVAR(BZ2Decomp__doc__,
1914 "BZ2Decompressor() -> decompressor object\n\
1915 \n\
1916 Create a new decompressor object. This object may be used to decompress\n\
1917 data sequentially. If you want to decompress data in one shot, use the\n\
1918 decompress() function instead.\n\
1919 ");
1920
1921 static PyTypeObject BZ2Decomp_Type = {
1922         PyObject_HEAD_INIT(NULL)
1923         0,                      /*ob_size*/
1924         "bz2.BZ2Decompressor",  /*tp_name*/
1925         sizeof(BZ2DecompObject), /*tp_basicsize*/
1926         0,                      /*tp_itemsize*/
1927         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1928         0,                      /*tp_print*/
1929         0,                      /*tp_getattr*/
1930         0,                      /*tp_setattr*/
1931         0,                      /*tp_compare*/
1932         0,                      /*tp_repr*/
1933         0,                      /*tp_as_number*/
1934         0,                      /*tp_as_sequence*/
1935         0,                      /*tp_as_mapping*/
1936         0,                      /*tp_hash*/
1937         0,                      /*tp_call*/
1938         0,                      /*tp_str*/
1939         PyObject_GenericGetAttr,/*tp_getattro*/
1940         PyObject_GenericSetAttr,/*tp_setattro*/
1941         0,                      /*tp_as_buffer*/
1942         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1943         BZ2Decomp__doc__,       /*tp_doc*/
1944         0,                      /*tp_traverse*/
1945         0,                      /*tp_clear*/
1946         0,                      /*tp_richcompare*/
1947         0,                      /*tp_weaklistoffset*/
1948         0,                      /*tp_iter*/
1949         0,                      /*tp_iternext*/
1950         BZ2Decomp_methods,      /*tp_methods*/
1951         BZ2Decomp_members,      /*tp_members*/
1952         0,                      /*tp_getset*/
1953         0,                      /*tp_base*/
1954         0,                      /*tp_dict*/
1955         0,                      /*tp_descr_get*/
1956         0,                      /*tp_descr_set*/
1957         0,                      /*tp_dictoffset*/
1958         (initproc)BZ2Decomp_init, /*tp_init*/
1959         PyType_GenericAlloc,    /*tp_alloc*/
1960         PyType_GenericNew,      /*tp_new*/
1961         _PyObject_Del,          /*tp_free*/
1962         0,                      /*tp_is_gc*/
1963 };
1964
1965
1966 /* ===================================================================== */
1967 /* Module functions. */
1968
1969 PyDoc_STRVAR(bz2_compress__doc__,
1970 "compress(data [, compresslevel=9]) -> string\n\
1971 \n\
1972 Compress data in one shot. If you want to compress data sequentially,\n\
1973 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1974 given, must be a number between 1 and 9.\n\
1975 ");
1976
1977 static PyObject *
1978 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1979 {
1980         int compresslevel=9;
1981         char *data;
1982         int datasize;
1983         int bufsize;
1984         PyObject *ret = NULL;
1985         bz_stream _bzs;
1986         bz_stream *bzs = &_bzs;
1987         int bzerror;
1988         static char *kwlist[] = {"data", "compresslevel", 0};
1989
1990         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1991                                          kwlist, &data, &datasize,
1992                                          &compresslevel))
1993                 return NULL;
1994
1995         if (compresslevel < 1 || compresslevel > 9) {
1996                 PyErr_SetString(PyExc_ValueError,
1997                                 "compresslevel must be between 1 and 9");
1998                 return NULL;
1999         }
2000
2001         /* Conforming to bz2 manual, this is large enough to fit compressed
2002          * data in one shot. We will check it later anyway. */
2003         bufsize = datasize + (datasize/100+1) + 600;
2004
2005         ret = PyString_FromStringAndSize(NULL, bufsize);
2006         if (!ret)
2007                 return NULL;
2008
2009         memset(bzs, 0, sizeof(bz_stream));
2010
2011         bzs->next_in = data;
2012         bzs->avail_in = datasize;
2013         bzs->next_out = BUF(ret);
2014         bzs->avail_out = bufsize;
2015
2016         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2017         if (bzerror != BZ_OK) {
2018                 Util_CatchBZ2Error(bzerror);
2019                 Py_DECREF(ret);
2020                 return NULL;
2021         }
2022
2023         for (;;) {
2024                 Py_BEGIN_ALLOW_THREADS
2025                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2026                 Py_END_ALLOW_THREADS
2027                 if (bzerror == BZ_STREAM_END) {
2028                         break;
2029                 } else if (bzerror != BZ_FINISH_OK) {
2030                         BZ2_bzCompressEnd(bzs);
2031                         Util_CatchBZ2Error(bzerror);
2032                         Py_DECREF(ret);
2033                         return NULL;
2034                 }
2035                 if (bzs->avail_out == 0) {
2036                         bufsize = Util_NewBufferSize(bufsize);
2037                         if (_PyString_Resize(&ret, bufsize) < 0) {
2038                                 BZ2_bzCompressEnd(bzs);
2039                                 Py_DECREF(ret);
2040                                 return NULL;
2041                         }
2042                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2043                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2044                 }
2045         }
2046
2047         if (bzs->avail_out != 0)
2048                 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2049         BZ2_bzCompressEnd(bzs);
2050
2051         return ret;
2052 }
2053
2054 PyDoc_STRVAR(bz2_decompress__doc__,
2055 "decompress(data) -> decompressed data\n\
2056 \n\
2057 Decompress data in one shot. If you want to decompress data sequentially,\n\
2058 use an instance of BZ2Decompressor instead.\n\
2059 ");
2060
2061 static PyObject *
2062 bz2_decompress(PyObject *self, PyObject *args)
2063 {
2064         char *data;
2065         int datasize;
2066         int bufsize = SMALLCHUNK;
2067         PyObject *ret;
2068         bz_stream _bzs;
2069         bz_stream *bzs = &_bzs;
2070         int bzerror;
2071
2072         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2073                 return NULL;
2074
2075         if (datasize == 0)
2076                 return PyString_FromString("");
2077
2078         ret = PyString_FromStringAndSize(NULL, bufsize);
2079         if (!ret)
2080                 return NULL;
2081
2082         memset(bzs, 0, sizeof(bz_stream));
2083
2084         bzs->next_in = data;
2085         bzs->avail_in = datasize;
2086         bzs->next_out = BUF(ret);
2087         bzs->avail_out = bufsize;
2088
2089         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2090         if (bzerror != BZ_OK) {
2091                 Util_CatchBZ2Error(bzerror);
2092                 Py_DECREF(ret);
2093                 return NULL;
2094         }
2095
2096         for (;;) {
2097                 Py_BEGIN_ALLOW_THREADS
2098                 bzerror = BZ2_bzDecompress(bzs);
2099                 Py_END_ALLOW_THREADS
2100                 if (bzerror == BZ_STREAM_END) {
2101                         break;
2102                 } else if (bzerror != BZ_OK) {
2103                         BZ2_bzDecompressEnd(bzs);
2104                         Util_CatchBZ2Error(bzerror);
2105                         Py_DECREF(ret);
2106                         return NULL;
2107                 }
2108                 if (bzs->avail_out == 0) {
2109                         bufsize = Util_NewBufferSize(bufsize);
2110                         if (_PyString_Resize(&ret, bufsize) < 0) {
2111                                 BZ2_bzDecompressEnd(bzs);
2112                                 Py_DECREF(ret);
2113                                 return NULL;
2114                         }
2115                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2116                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2117                 } else if (bzs->avail_in == 0) {
2118                         BZ2_bzDecompressEnd(bzs);
2119                         PyErr_SetString(PyExc_ValueError,
2120                                         "couldn't find end of stream");
2121                         Py_DECREF(ret);
2122                         return NULL;
2123                 }
2124         }
2125
2126         if (bzs->avail_out != 0)
2127                 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2128         BZ2_bzDecompressEnd(bzs);
2129
2130         return ret;
2131 }
2132
2133 static PyMethodDef bz2_methods[] = {
2134         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2135                 bz2_compress__doc__},
2136         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2137                 bz2_decompress__doc__},
2138         {NULL,          NULL}           /* sentinel */
2139 };
2140
2141 /* ===================================================================== */
2142 /* Initialization function. */
2143
2144 PyDoc_STRVAR(bz2__doc__,
2145 "The python bz2 module provides a comprehensive interface for\n\
2146 the bz2 compression library. It implements a complete file\n\
2147 interface, one shot (de)compression functions, and types for\n\
2148 sequential (de)compression.\n\
2149 ");
2150
2151 PyMODINIT_FUNC
2152 initbz2(void)
2153 {
2154         PyObject *m;
2155
2156         BZ2File_Type.ob_type = &PyType_Type;
2157         BZ2Comp_Type.ob_type = &PyType_Type;
2158         BZ2Decomp_Type.ob_type = &PyType_Type;
2159
2160         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2161
2162         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2163
2164         Py_INCREF(&BZ2File_Type);
2165         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2166
2167         Py_INCREF(&BZ2Comp_Type);
2168         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2169
2170         Py_INCREF(&BZ2Decomp_Type);
2171         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2172 }