Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  26
  27 #define MODE_CLOSED   0
  28 #define MODE_READ     1
  29 #define MODE_READ_EOF 2
  30 #define MODE_WRITE    3
  31
  32 #define BZ2FileObject_Check(v)  ((v)->ob_type == &BZ2File_Type)
  33
  34 #if SIZEOF_LONG >= 8
  35 #define BZS_TOTAL_OUT(bzs) \
  36         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  37 #elif SIZEOF_LONG_LONG >= 8
  38 #define BZS_TOTAL_OUT(bzs) \
  39         (((LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  40 #else
  41 #define BZS_TOTAL_OUT(bzs) \
  42         bzs->total_out_lo32;
  43 #endif
  44
  45 #ifdef WITH_THREAD
  46 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  47 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  48 #else
  49 #define ACQUIRE_LOCK(obj)
  50 #define RELEASE_LOCK(obj)
  51 #endif
  52
  53 #ifdef WITH_UNIVERSAL_NEWLINES
  54 /* Bits in f_newlinetypes */
  55 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  56 #define NEWLINE_CR 1            /* \r newline seen */
  57 #define NEWLINE_LF 2            /* \n newline seen */
  58 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  59 #endif
  60
  61 /* ===================================================================== */
  62 /* Structure definitions. */
  63
  64 typedef struct {
  65         PyFileObject file;
  66         BZFILE *fp;
  67         int mode;
  68         long pos;
  69         long size;
  70 #ifdef WITH_THREAD
  71         PyThread_type_lock lock;
  72 #endif
  73 } BZ2FileObject;
  74
  75 typedef struct {
  76         PyObject_HEAD
  77         bz_stream bzs;
  78         int running;
  79 #ifdef WITH_THREAD
  80         PyThread_type_lock lock;
  81 #endif
  82 } BZ2CompObject;
  83
  84 typedef struct {
  85         PyObject_HEAD
  86         bz_stream bzs;
  87         int running;
  88         PyObject *unused_data;
  89 #ifdef WITH_THREAD
  90         PyThread_type_lock lock;
  91 #endif
  92 } BZ2DecompObject;
  93
  94 /* ===================================================================== */
  95 /* Utility functions. */
  96
  97 static int
  98 Util_CatchBZ2Error(int bzerror)
  99 {
 100         int ret = 0;
 101         switch(bzerror) {
 102                 case BZ_OK:
 103                 case BZ_STREAM_END:
 104                         break;
 105
 106                 case BZ_CONFIG_ERROR:
 107                         PyErr_SetString(PyExc_SystemError,
 108                                         "the bz2 library was not compiled "
 109                                         "correctly");
 110                         ret = 1;
 111                         break;
 112
 113                 case BZ_PARAM_ERROR:
 114                         PyErr_SetString(PyExc_ValueError,
 115                                         "the bz2 library has received wrong "
 116                                         "parameters");
 117                         ret = 1;
 118                         break;
 119
 120                 case BZ_MEM_ERROR:
 121                         PyErr_NoMemory();
 122                         ret = 1;
 123                         break;
 124
 125                 case BZ_DATA_ERROR:
 126                 case BZ_DATA_ERROR_MAGIC:
 127                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 128                         ret = 1;
 129                         break;
 130
 131                 case BZ_IO_ERROR:
 132                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 133                         ret = 1;
 134                         break;
 135
 136                 case BZ_UNEXPECTED_EOF:
 137                         PyErr_SetString(PyExc_EOFError,
 138                                         "compressed file ended before the "
 139                                         "logical end-of-stream was detected");
 140                         ret = 1;
 141                         break;
 142
 143                 case BZ_SEQUENCE_ERROR:
 144                         PyErr_SetString(PyExc_RuntimeError,
 145                                         "wrong sequence of bz2 library "
 146                                         "commands used");
 147                         ret = 1;
 148                         break;
 149         }
 150         return ret;
 151 }
 152
 153 #if BUFSIZ < 8192
 154 #define SMALLCHUNK 8192
 155 #else
 156 #define SMALLCHUNK BUFSIZ
 157 #endif
 158
 159 #if SIZEOF_INT < 4
 160 #define BIGCHUNK  (512 * 32)
 161 #else
 162 #define BIGCHUNK  (512 * 1024)
 163 #endif
 164
 165 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 166 static size_t
 167 Util_NewBufferSize(size_t currentsize)
 168 {
 169         if (currentsize > SMALLCHUNK) {
 170                 /* Keep doubling until we reach BIGCHUNK;
 171                    then keep adding BIGCHUNK. */
 172                 if (currentsize <= BIGCHUNK)
 173                         return currentsize + currentsize;
 174                 else
 175                         return currentsize + BIGCHUNK;
 176         }
 177         return currentsize + SMALLCHUNK;
 178 }
 179
 180 /* This is a hacked version of Python's fileobject.c:get_line(). */
 181 static PyObject *
 182 Util_GetLine(BZ2FileObject *self, int n)
 183 {
 184         char c;
 185         char *buf, *end;
 186         size_t total_v_size;    /* total # of slots in buffer */
 187         size_t used_v_size;     /* # used slots in buffer */
 188         size_t increment;       /* amount to increment the buffer */
 189         PyObject *v;
 190         int bzerror;
 191 #ifdef WITH_UNIVERSAL_NEWLINES
 192         int newlinetypes = ((PyFileObject*)self)->f_newlinetypes;
 193         int skipnextlf = ((PyFileObject*)self)->f_skipnextlf;
 194         int univ_newline = ((PyFileObject*)self)->f_univ_newline;
 195 #endif
 196
 197         total_v_size = n > 0 ? n : 100;
 198         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 199         if (v == NULL)
 200                 return NULL;
 201
 202         buf = BUF(v);
 203         end = buf + total_v_size;
 204
 205         for (;;) {
 206                 Py_BEGIN_ALLOW_THREADS
 207 #ifdef WITH_UNIVERSAL_NEWLINES
 208                 if (univ_newline) {
 209                         while (1) {
 210                                 BZ2_bzRead(&bzerror, self->fp, &c, 1);
 211                                 self->pos++;
 212                                 if (bzerror != BZ_OK || buf == end)
 213                                         break;
 214                                 if (skipnextlf) {
 215                                         skipnextlf = 0;
 216                                         if (c == '\n') {
 217                                                 /* Seeing a \n here with
 218                                                  * skipnextlf true means we
 219                                                  * saw a \r before.
 220                                                  */
 221                                                 newlinetypes |= NEWLINE_CRLF;
 222                                                 BZ2_bzRead(&bzerror, self->fp,
 223                                                            &c, 1);
 224                                                 if (bzerror != BZ_OK)
 225                                                         break;
 226                                         } else {
 227                                                 newlinetypes |= NEWLINE_CR;
 228                                         }
 229                                 }
 230                                 if (c == '\r') {
 231                                         skipnextlf = 1;
 232                                         c = '\n';
 233                                 } else if ( c == '\n')
 234                                         newlinetypes |= NEWLINE_LF;
 235                                 *buf++ = c;
 236                                 if (c == '\n') break;
 237                         }
 238                         if (bzerror == BZ_STREAM_END && skipnextlf)
 239                                 newlinetypes |= NEWLINE_CR;
 240                 } else /* If not universal newlines use the normal loop */
 241 #endif
 242                         do {
 243                                 BZ2_bzRead(&bzerror, self->fp, &c, 1);
 244                                 self->pos++;
 245                                 *buf++ = c;
 246                         } while (bzerror == BZ_OK && c != '\n' && buf != end);
 247                 Py_END_ALLOW_THREADS
 248 #ifdef WITH_UNIVERSAL_NEWLINES
 249                 ((PyFileObject*)self)->f_newlinetypes = newlinetypes;
 250                 ((PyFileObject*)self)->f_skipnextlf = skipnextlf;
 251 #endif
 252                 if (bzerror == BZ_STREAM_END) {
 253                         self->size = self->pos;
 254                         self->mode = MODE_READ_EOF;
 255                         break;
 256                 } else if (bzerror != BZ_OK) {
 257                         Util_CatchBZ2Error(bzerror);
 258                         Py_DECREF(v);
 259                         return NULL;
 260                 }
 261                 if (c == '\n')
 262                         break;
 263                 /* Must be because buf == end */
 264                 if (n > 0)
 265                         break;
 266                 used_v_size = total_v_size;
 267                 increment = total_v_size >> 2; /* mild exponential growth */
 268                 total_v_size += increment;
 269                 if (total_v_size > INT_MAX) {
 270                         PyErr_SetString(PyExc_OverflowError,
 271                             "line is longer than a Python string can hold");
 272                         Py_DECREF(v);
 273                         return NULL;
 274                 }
 275                 if (_PyString_Resize(&v, total_v_size) < 0)
 276                         return NULL;
 277                 buf = BUF(v) + used_v_size;
 278                 end = BUF(v) + total_v_size;
 279         }
 280
 281         used_v_size = buf - BUF(v);
 282         if (used_v_size != total_v_size)
 283                 _PyString_Resize(&v, used_v_size);
 284         return v;
 285 }
 286
 287 #ifndef WITH_UNIVERSAL_NEWLINES
 288 #define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
 289 #else
 290 /* This is a hacked version of Python's
 291  * fileobject.c:Py_UniversalNewlineFread(). */
 292 size_t
 293 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 294                      char* buf, size_t n, BZ2FileObject *fobj)
 295 {
 296         char *dst = buf;
 297         PyFileObject *f = (PyFileObject *)fobj;
 298         int newlinetypes, skipnextlf;
 299
 300         assert(buf != NULL);
 301         assert(stream != NULL);
 302
 303         if (!f->f_univ_newline)
 304                 return BZ2_bzRead(bzerror, stream, buf, n);
 305
 306         newlinetypes = f->f_newlinetypes;
 307         skipnextlf = f->f_skipnextlf;
 308
 309         /* Invariant:  n is the number of bytes remaining to be filled
 310          * in the buffer.
 311          */
 312         while (n) {
 313                 size_t nread;
 314                 int shortread;
 315                 char *src = dst;
 316
 317                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 318                 assert(nread <= n);
 319                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 320                 shortread = n != 0;     /* true iff EOF or error */
 321                 while (nread--) {
 322                         char c = *src++;
 323                         if (c == '\r') {
 324                                 /* Save as LF and set flag to skip next LF. */
 325                                 *dst++ = '\n';
 326                                 skipnextlf = 1;
 327                         }
 328                         else if (skipnextlf && c == '\n') {
 329                                 /* Skip LF, and remember we saw CR LF. */
 330                                 skipnextlf = 0;
 331                                 newlinetypes |= NEWLINE_CRLF;
 332                                 ++n;
 333                         }
 334                         else {
 335                                 /* Normal char to be stored in buffer.  Also
 336                                  * update the newlinetypes flag if either this
 337                                  * is an LF or the previous char was a CR.
 338                                  */
 339                                 if (c == '\n')
 340                                         newlinetypes |= NEWLINE_LF;
 341                                 else if (skipnextlf)
 342                                         newlinetypes |= NEWLINE_CR;
 343                                 *dst++ = c;
 344                                 skipnextlf = 0;
 345                         }
 346                 }
 347                 if (shortread) {
 348                         /* If this is EOF, update type flags. */
 349                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 350                                 newlinetypes |= NEWLINE_CR;
 351                         break;
 352                 }
 353         }
 354         f->f_newlinetypes = newlinetypes;
 355         f->f_skipnextlf = skipnextlf;
 356         return dst - buf;
 357 }
 358 #endif
 359
 360 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 361 static void
 362 Util_DropReadAhead(BZ2FileObject *self)
 363 {
 364         PyFileObject *f = (PyFileObject*)self;
 365         if (f->f_buf != NULL) {
 366                 PyMem_Free(f->f_buf);
 367                 f->f_buf = NULL;
 368         }
 369 }
 370
 371 /* This is a hacked version of Python's fileobject.c:readahead(). */
 372 static int
 373 Util_ReadAhead(BZ2FileObject *self, int bufsize)
 374 {
 375         int chunksize;
 376         int bzerror;
 377         PyFileObject *f = (PyFileObject*)self;
 378
 379         if (f->f_buf != NULL) {
 380                 if((f->f_bufend - f->f_bufptr) >= 1)
 381                         return 0;
 382                 else
 383                         Util_DropReadAhead(self);
 384         }
 385         if (self->mode == MODE_READ_EOF) {
 386                 return -1;
 387         }
 388         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 389                 return -1;
 390         }
 391         Py_BEGIN_ALLOW_THREADS
 392         chunksize = Util_UnivNewlineRead(&bzerror, self->fp, f->f_buf,
 393                                          bufsize, self);
 394         Py_END_ALLOW_THREADS
 395         self->pos += chunksize;
 396         if (bzerror == BZ_STREAM_END) {
 397                 self->size = self->pos;
 398                 self->mode = MODE_READ_EOF;
 399         } else if (bzerror != BZ_OK) {
 400                 Util_CatchBZ2Error(bzerror);
 401                 Util_DropReadAhead(self);
 402                 return -1;
 403         }
 404         f->f_bufptr = f->f_buf;
 405         f->f_bufend = f->f_buf + chunksize;
 406         return 0;
 407 }
 408
 409 /* This is a hacked version of Python's
 410  * fileobject.c:readahead_get_line_skip(). */
 411 static PyStringObject *
 412 Util_ReadAheadGetLineSkip(BZ2FileObject *bf, int skip, int bufsize)
 413 {
 414         PyFileObject *f = (PyFileObject*)bf;
 415         PyStringObject* s;
 416         char *bufptr;
 417         char *buf;
 418         int len;
 419
 420         if (f->f_buf == NULL)
 421                 if (Util_ReadAhead(bf, bufsize) < 0)
 422                         return NULL;
 423
 424         len = f->f_bufend - f->f_bufptr;
 425         if (len == 0)
 426                 return (PyStringObject *)
 427                         PyString_FromStringAndSize(NULL, skip);
 428         bufptr = memchr(f->f_bufptr, '\n', len);
 429         if (bufptr != NULL) {
 430                 bufptr++;                       /* Count the '\n' */
 431                 len = bufptr - f->f_bufptr;
 432                 s = (PyStringObject *)
 433                         PyString_FromStringAndSize(NULL, skip+len);
 434                 if (s == NULL)
 435                         return NULL;
 436                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 437                 f->f_bufptr = bufptr;
 438                 if (bufptr == f->f_bufend)
 439                         Util_DropReadAhead(bf);
 440         } else {
 441                 bufptr = f->f_bufptr;
 442                 buf = f->f_buf;
 443                 f->f_buf = NULL;        /* Force new readahead buffer */
 444                 s = Util_ReadAheadGetLineSkip(
 445                         bf, skip+len, bufsize + (bufsize>>2) );
 446                 if (s == NULL) {
 447                         PyMem_Free(buf);
 448                         return NULL;
 449                 }
 450                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 451                 PyMem_Free(buf);
 452         }
 453         return s;
 454 }
 455
 456 /* ===================================================================== */
 457 /* Methods of BZ2File. */
 458
 459 PyDoc_STRVAR(BZ2File_read__doc__,
 460 "read([size]) -> string\n\
 461 \n\
 462 Read at most size uncompressed bytes, returned as a string. If the size\n\
 463 argument is negative or omitted, read until EOF is reached.\n\
 464 ");
 465
 466 /* This is a hacked version of Python's fileobject.c:file_read(). */
 467 static PyObject *
 468 BZ2File_read(BZ2FileObject *self, PyObject *args)
 469 {
 470         long bytesrequested = -1;
 471         size_t bytesread, buffersize, chunksize;
 472         int bzerror;
 473         PyObject *ret = NULL;
 474
 475         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 476                 return NULL;
 477
 478         ACQUIRE_LOCK(self);
 479         switch (self->mode) {
 480                 case MODE_READ:
 481                         break;
 482                 case MODE_READ_EOF:
 483                         ret = PyString_FromString("");
 484                         goto cleanup;
 485                 case MODE_CLOSED:
 486                         PyErr_SetString(PyExc_ValueError,
 487                                         "I/O operation on closed file");
 488                         goto cleanup;
 489                 default:
 490                         PyErr_SetString(PyExc_IOError,
 491                                         "file is not ready for reading");
 492                         goto cleanup;
 493         }
 494
 495         if (bytesrequested < 0)
 496                 buffersize = Util_NewBufferSize((size_t)0);
 497         else
 498                 buffersize = bytesrequested;
 499         if (buffersize > INT_MAX) {
 500                 PyErr_SetString(PyExc_OverflowError,
 501                                 "requested number of bytes is "
 502                                 "more than a Python string can hold");
 503                 goto cleanup;
 504         }
 505         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 506         if (ret == NULL)
 507                 goto cleanup;
 508         bytesread = 0;
 509
 510         for (;;) {
 511                 Py_BEGIN_ALLOW_THREADS
 512                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 513                                                  BUF(ret)+bytesread,
 514                                                  buffersize-bytesread,
 515                                                  self);
 516                 self->pos += chunksize;
 517                 Py_END_ALLOW_THREADS
 518                 bytesread += chunksize;
 519                 if (bzerror == BZ_STREAM_END) {
 520                         self->size = self->pos;
 521                         self->mode = MODE_READ_EOF;
 522                         break;
 523                 } else if (bzerror != BZ_OK) {
 524                         Util_CatchBZ2Error(bzerror);
 525                         Py_DECREF(ret);
 526                         ret = NULL;
 527                         goto cleanup;
 528                 }
 529                 if (bytesrequested < 0) {
 530                         buffersize = Util_NewBufferSize(buffersize);
 531                         if (_PyString_Resize(&ret, buffersize) < 0)
 532                                 goto cleanup;
 533                 } else {
 534                         break;
 535                 }
 536         }
 537         if (bytesread != buffersize)
 538                 _PyString_Resize(&ret, bytesread);
 539
 540 cleanup:
 541         RELEASE_LOCK(self);
 542         return ret;
 543 }
 544
 545 PyDoc_STRVAR(BZ2File_readline__doc__,
 546 "readline([size]) -> string\n\
 547 \n\
 548 Return the next line from the file, as a string, retaining newline.\n\
 549 A non-negative size argument will limit the maximum number of bytes to\n\
 550 return (an incomplete line may be returned then). Return an empty\n\
 551 string at EOF.\n\
 552 ");
 553
 554 static PyObject *
 555 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 556 {
 557         PyObject *ret = NULL;
 558         int sizehint = -1;
 559
 560         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 561                 return NULL;
 562
 563         ACQUIRE_LOCK(self);
 564         switch (self->mode) {
 565                 case MODE_READ:
 566                         break;
 567                 case MODE_READ_EOF:
 568                         ret = PyString_FromString("");
 569                         goto cleanup;
 570                 case MODE_CLOSED:
 571                         PyErr_SetString(PyExc_ValueError,
 572                                         "I/O operation on closed file");
 573                         goto cleanup;
 574                 default:
 575                         PyErr_SetString(PyExc_IOError,
 576                                         "file is not ready for reading");
 577                         goto cleanup;
 578         }
 579
 580         if (sizehint == 0)
 581                 ret = PyString_FromString("");
 582         else
 583                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 584
 585 cleanup:
 586         RELEASE_LOCK(self);
 587         return ret;
 588 }
 589
 590 PyDoc_STRVAR(BZ2File_readlines__doc__,
 591 "readlines([size]) -> list\n\
 592 \n\
 593 Call readline() repeatedly and return a list of lines read.\n\
 594 The optional size argument, if given, is an approximate bound on the\n\
 595 total number of bytes in the lines returned.\n\
 596 ");
 597
 598 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 599 static PyObject *
 600 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 601 {
 602         long sizehint = 0;
 603         PyObject *list = NULL;
 604         PyObject *line;
 605         char small_buffer[SMALLCHUNK];
 606         char *buffer = small_buffer;
 607         size_t buffersize = SMALLCHUNK;
 608         PyObject *big_buffer = NULL;
 609         size_t nfilled = 0;
 610         size_t nread;
 611         size_t totalread = 0;
 612         char *p, *q, *end;
 613         int err;
 614         int shortread = 0;
 615         int bzerror;
 616
 617         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 618                 return NULL;
 619
 620         ACQUIRE_LOCK(self);
 621         switch (self->mode) {
 622                 case MODE_READ:
 623                         break;
 624                 case MODE_READ_EOF:
 625                         list = PyList_New(0);
 626                         goto cleanup;
 627                 case MODE_CLOSED:
 628                         PyErr_SetString(PyExc_ValueError,
 629                                         "I/O operation on closed file");
 630                         goto cleanup;
 631                 default:
 632                         PyErr_SetString(PyExc_IOError,
 633                                         "file is not ready for reading");
 634                         goto cleanup;
 635         }
 636
 637         if ((list = PyList_New(0)) == NULL)
 638                 goto cleanup;
 639
 640         for (;;) {
 641                 Py_BEGIN_ALLOW_THREADS
 642                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 643                                              buffer+nfilled,
 644                                              buffersize-nfilled, self);
 645                 self->pos += nread;
 646                 Py_END_ALLOW_THREADS
 647                 if (bzerror == BZ_STREAM_END) {
 648                         self->size = self->pos;
 649                         self->mode = MODE_READ_EOF;
 650                         if (nread == 0) {
 651                                 sizehint = 0;
 652                                 break;
 653                         }
 654                         shortread = 1;
 655                 } else if (bzerror != BZ_OK) {
 656                         Util_CatchBZ2Error(bzerror);
 657                   error:
 658                         Py_DECREF(list);
 659                         list = NULL;
 660                         goto cleanup;
 661                 }
 662                 totalread += nread;
 663                 p = memchr(buffer+nfilled, '\n', nread);
 664                 if (p == NULL) {
 665                         /* Need a larger buffer to fit this line */
 666                         nfilled += nread;
 667                         buffersize *= 2;
 668                         if (buffersize > INT_MAX) {
 669                                 PyErr_SetString(PyExc_OverflowError,
 670                             "line is longer than a Python string can hold");
 671                                 goto error;
 672                         }
 673                         if (big_buffer == NULL) {
 674                                 /* Create the big buffer */
 675                                 big_buffer = PyString_FromStringAndSize(
 676                                         NULL, buffersize);
 677                                 if (big_buffer == NULL)
 678                                         goto error;
 679                                 buffer = PyString_AS_STRING(big_buffer);
 680                                 memcpy(buffer, small_buffer, nfilled);
 681                         }
 682                         else {
 683                                 /* Grow the big buffer */
 684                                 _PyString_Resize(&big_buffer, buffersize);
 685                                 buffer = PyString_AS_STRING(big_buffer);
 686                         }
 687                         continue;
 688                 }
 689                 end = buffer+nfilled+nread;
 690                 q = buffer;
 691                 do {
 692                         /* Process complete lines */
 693                         p++;
 694                         line = PyString_FromStringAndSize(q, p-q);
 695                         if (line == NULL)
 696                                 goto error;
 697                         err = PyList_Append(list, line);
 698                         Py_DECREF(line);
 699                         if (err != 0)
 700                                 goto error;
 701                         q = p;
 702                         p = memchr(q, '\n', end-q);
 703                 } while (p != NULL);
 704                 /* Move the remaining incomplete line to the start */
 705                 nfilled = end-q;
 706                 memmove(buffer, q, nfilled);
 707                 if (sizehint > 0)
 708                         if (totalread >= (size_t)sizehint)
 709                                 break;
 710                 if (shortread) {
 711                         sizehint = 0;
 712                         break;
 713                 }
 714         }
 715         if (nfilled != 0) {
 716                 /* Partial last line */
 717                 line = PyString_FromStringAndSize(buffer, nfilled);
 718                 if (line == NULL)
 719                         goto error;
 720                 if (sizehint > 0) {
 721                         /* Need to complete the last line */
 722                         PyObject *rest = Util_GetLine(self, 0);
 723                         if (rest == NULL) {
 724                                 Py_DECREF(line);
 725                                 goto error;
 726                         }
 727                         PyString_Concat(&line, rest);
 728                         Py_DECREF(rest);
 729                         if (line == NULL)
 730                                 goto error;
 731                 }
 732                 err = PyList_Append(list, line);
 733                 Py_DECREF(line);
 734                 if (err != 0)
 735                         goto error;
 736         }
 737
 738   cleanup:
 739         RELEASE_LOCK(self);
 740         if (big_buffer) {
 741                 Py_DECREF(big_buffer);
 742         }
 743         return list;
 744 }
 745
 746 PyDoc_STRVAR(BZ2File_write__doc__,
 747 "write(data) -> None\n\
 748 \n\
 749 Write the 'data' string to file. Note that due to buffering, close() may\n\
 750 be needed before the file on disk reflects the data written.\n\
 751 ");
 752
 753 /* This is a hacked version of Python's fileobject.c:file_write(). */
 754 static PyObject *
 755 BZ2File_write(BZ2FileObject *self, PyObject *args)
 756 {
 757         PyObject *ret = NULL;
 758         char *buf;
 759         int len;
 760         int bzerror;
 761
 762         if (!PyArg_ParseTuple(args, "s#", &buf, &len))
 763                 return NULL;
 764
 765         ACQUIRE_LOCK(self);
 766         switch (self->mode) {
 767                 case MODE_WRITE:
 768                         break;
 769
 770                 case MODE_CLOSED:
 771                         PyErr_SetString(PyExc_ValueError,
 772                                         "I/O operation on closed file");
 773                         goto cleanup;;
 774
 775                 default:
 776                         PyErr_SetString(PyExc_IOError,
 777                                         "file is not ready for writing");
 778                         goto cleanup;;
 779         }
 780
 781         PyFile_SoftSpace((PyObject*)self, 0);
 782
 783         Py_BEGIN_ALLOW_THREADS
 784         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 785         self->pos += len;
 786         Py_END_ALLOW_THREADS
 787
 788         if (bzerror != BZ_OK) {
 789                 Util_CatchBZ2Error(bzerror);
 790                 goto cleanup;
 791         }
 792
 793         Py_INCREF(Py_None);
 794         ret = Py_None;
 795
 796 cleanup:
 797         RELEASE_LOCK(self);
 798         return ret;
 799 }
 800
 801 PyDoc_STRVAR(BZ2File_writelines__doc__,
 802 "writelines(sequence_of_strings) -> None\n\
 803 \n\
 804 Write the sequence of strings to the file. Note that newlines are not\n\
 805 added. The sequence can be any iterable object producing strings. This is\n\
 806 equivalent to calling write() for each string.\n\
 807 ");
 808
 809 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 810 static PyObject *
 811 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 812 {
 813 #define CHUNKSIZE 1000
 814         PyObject *list = NULL;
 815         PyObject *iter = NULL;
 816         PyObject *ret = NULL;
 817         PyObject *line;
 818         int i, j, index, len, islist;
 819         int bzerror;
 820
 821         ACQUIRE_LOCK(self);
 822         islist = PyList_Check(seq);
 823         if  (!islist) {
 824                 iter = PyObject_GetIter(seq);
 825                 if (iter == NULL) {
 826                         PyErr_SetString(PyExc_TypeError,
 827                                 "writelines() requires an iterable argument");
 828                         goto error;
 829                 }
 830                 list = PyList_New(CHUNKSIZE);
 831                 if (list == NULL)
 832                         goto error;
 833         }
 834
 835         /* Strategy: slurp CHUNKSIZE lines into a private list,
 836            checking that they are all strings, then write that list
 837            without holding the interpreter lock, then come back for more. */
 838         for (index = 0; ; index += CHUNKSIZE) {
 839                 if (islist) {
 840                         Py_XDECREF(list);
 841                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 842                         if (list == NULL)
 843                                 goto error;
 844                         j = PyList_GET_SIZE(list);
 845                 }
 846                 else {
 847                         for (j = 0; j < CHUNKSIZE; j++) {
 848                                 line = PyIter_Next(iter);
 849                                 if (line == NULL) {
 850                                         if (PyErr_Occurred())
 851                                                 goto error;
 852                                         break;
 853                                 }
 854                                 PyList_SetItem(list, j, line);
 855                         }
 856                 }
 857                 if (j == 0)
 858                         break;
 859
 860                 /* Check that all entries are indeed strings. If not,
 861                    apply the same rules as for file.write() and
 862                    convert the rets to strings. This is slow, but
 863                    seems to be the only way since all conversion APIs
 864                    could potentially execute Python code. */
 865                 for (i = 0; i < j; i++) {
 866                         PyObject *v = PyList_GET_ITEM(list, i);
 867                         if (!PyString_Check(v)) {
 868                                 const char *buffer;
 869                                 int len;
 870                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 871                                         PyErr_SetString(PyExc_TypeError,
 872                                                         "writelines() "
 873                                                         "argument must be "
 874                                                         "a sequence of "
 875                                                         "strings");
 876                                         goto error;
 877                                 }
 878                                 line = PyString_FromStringAndSize(buffer,
 879                                                                   len);
 880                                 if (line == NULL)
 881                                         goto error;
 882                                 Py_DECREF(v);
 883                                 PyList_SET_ITEM(list, i, line);
 884                         }
 885                 }
 886
 887                 PyFile_SoftSpace((PyObject*)self, 0);
 888
 889                 /* Since we are releasing the global lock, the
 890                    following code may *not* execute Python code. */
 891                 Py_BEGIN_ALLOW_THREADS
 892                 for (i = 0; i < j; i++) {
 893                         line = PyList_GET_ITEM(list, i);
 894                         len = PyString_GET_SIZE(line);
 895                         BZ2_bzWrite (&bzerror, self->fp,
 896                                      PyString_AS_STRING(line), len);
 897                         if (bzerror != BZ_OK) {
 898                                 Py_BLOCK_THREADS
 899                                 Util_CatchBZ2Error(bzerror);
 900                                 goto error;
 901                         }
 902                 }
 903                 Py_END_ALLOW_THREADS
 904
 905                 if (j < CHUNKSIZE)
 906                         break;
 907         }
 908
 909         Py_INCREF(Py_None);
 910         ret = Py_None;
 911
 912   error:
 913         RELEASE_LOCK(self);
 914         Py_XDECREF(list);
 915         Py_XDECREF(iter);
 916         return ret;
 917 #undef CHUNKSIZE
 918 }
 919
 920 PyDoc_STRVAR(BZ2File_seek__doc__,
 921 "seek(offset [, whence]) -> None\n\
 922 \n\
 923 Move to new file position. Argument offset is a byte count. Optional\n\
 924 argument whence defaults to 0 (offset from start of file, offset\n\
 925 should be >= 0); other values are 1 (move relative to current position,\n\
 926 positive or negative), and 2 (move relative to end of file, usually\n\
 927 negative, although many platforms allow seeking beyond the end of a file).\n\
 928 \n\
 929 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 930 the operation may be extremely slow.\n\
 931 ");
 932
 933 static PyObject *
 934 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 935 {
 936         int where = 0;
 937         long offset;
 938         char small_buffer[SMALLCHUNK];
 939         char *buffer = small_buffer;
 940         size_t buffersize = SMALLCHUNK;
 941         int bytesread = 0;
 942         int readsize;
 943         int chunksize;
 944         int bzerror;
 945         int rewind = 0;
 946         PyObject *func;
 947         PyObject *ret = NULL;
 948
 949         if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
 950                 return NULL;
 951
 952         ACQUIRE_LOCK(self);
 953         Util_DropReadAhead(self);
 954         switch (self->mode) {
 955                 case MODE_READ:
 956                 case MODE_READ_EOF:
 957                         break;
 958
 959                 case MODE_CLOSED:
 960                         PyErr_SetString(PyExc_ValueError,
 961                                         "I/O operation on closed file");
 962                         goto cleanup;;
 963
 964                 default:
 965                         PyErr_SetString(PyExc_IOError,
 966                                         "seek works only while reading");
 967                         goto cleanup;;
 968         }
 969
 970         if (offset < 0) {
 971                 if (where == 1) {
 972                         offset = self->pos + offset;
 973                         rewind = 1;
 974                 } else if (where == 2) {
 975                         if (self->size == -1) {
 976                                 assert(self->mode != MODE_READ_EOF);
 977                                 for (;;) {
 978                                         Py_BEGIN_ALLOW_THREADS
 979                                         chunksize = Util_UnivNewlineRead(
 980                                                         &bzerror, self->fp,
 981                                                         buffer, buffersize,
 982                                                         self);
 983                                         self->pos += chunksize;
 984                                         Py_END_ALLOW_THREADS
 985
 986                                         bytesread += chunksize;
 987                                         if (bzerror == BZ_STREAM_END) {
 988                                                 break;
 989                                         } else if (bzerror != BZ_OK) {
 990                                                 Util_CatchBZ2Error(bzerror);
 991                                                 goto cleanup;
 992                                         }
 993                                 }
 994                                 self->mode = MODE_READ_EOF;
 995                                 self->size = self->pos;
 996                                 bytesread = 0;
 997                         }
 998                         offset = self->size + offset;
 999                         if (offset >= self->pos)
1000                                 offset -= self->pos;
1001                         else
1002                                 rewind = 1;
1003                 }
1004                 if (offset < 0)
1005                         offset = 0;
1006         } else if (where == 0) {
1007                 if (offset >= self->pos)
1008                         offset -= self->pos;
1009                 else
1010                         rewind = 1;
1011         }
1012
1013         if (rewind) {
1014                 BZ2_bzReadClose(&bzerror, self->fp);
1015                 func = Py_FindMethod(PyFile_Type.tp_methods, (PyObject*)self,
1016                                      "seek");
1017                 if (bzerror != BZ_OK) {
1018                         Util_CatchBZ2Error(bzerror);
1019                         goto cleanup;
1020                 }
1021                 if (!func) {
1022                         PyErr_SetString(PyExc_RuntimeError,
1023                                         "can't find file.seek method");
1024                         goto cleanup;
1025                 }
1026                 ret = PyObject_CallFunction(func, "(i)", 0);
1027                 if (!ret)
1028                         goto cleanup;
1029                 Py_DECREF(ret);
1030                 ret = NULL;
1031                 self->pos = 0;
1032                 self->fp = BZ2_bzReadOpen(&bzerror,
1033                                           PyFile_AsFile((PyObject*)self),
1034                                           0, 0, NULL, 0);
1035                 if (bzerror != BZ_OK) {
1036                         Util_CatchBZ2Error(bzerror);
1037                         goto cleanup;
1038                 }
1039                 self->mode = MODE_READ;
1040         } else if (self->mode == MODE_READ_EOF) {
1041                 goto exit;
1042         }
1043
1044         if (offset == 0)
1045                 goto exit;
1046
1047         /* Before getting here, offset must be set to the number of bytes
1048          * to walk forward. */
1049         for (;;) {
1050                 if ((size_t)offset-bytesread > buffersize)
1051                         readsize = buffersize;
1052                 else
1053                         readsize = offset-bytesread;
1054                 Py_BEGIN_ALLOW_THREADS
1055                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1056                                                  buffer, readsize, self);
1057                 self->pos += chunksize;
1058                 Py_END_ALLOW_THREADS
1059                 bytesread += chunksize;
1060                 if (bzerror == BZ_STREAM_END) {
1061                         self->size = self->pos;
1062                         self->mode = MODE_READ_EOF;
1063                         break;
1064                 } else if (bzerror != BZ_OK) {
1065                         Util_CatchBZ2Error(bzerror);
1066                         goto cleanup;
1067                 }
1068                 if (bytesread == offset)
1069                         break;
1070         }
1071
1072 exit:
1073         Py_INCREF(Py_None);
1074         ret = Py_None;
1075
1076 cleanup:
1077         RELEASE_LOCK(self);
1078         return ret;
1079 }
1080
1081 PyDoc_STRVAR(BZ2File_tell__doc__,
1082 "tell() -> int\n\
1083 \n\
1084 Return the current file position, an integer (may be a long integer).\n\
1085 ");
1086
1087 static PyObject *
1088 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1089 {
1090         PyObject *ret = NULL;
1091
1092         if (self->mode == MODE_CLOSED) {
1093                 PyErr_SetString(PyExc_ValueError,
1094                                 "I/O operation on closed file");
1095                 goto cleanup;
1096         }
1097
1098         ret = PyInt_FromLong(self->pos);
1099
1100 cleanup:
1101         return ret;
1102 }
1103
1104 PyDoc_STRVAR(BZ2File_notsup__doc__,
1105 "Operation not supported.\n\
1106 ");
1107
1108 static PyObject *
1109 BZ2File_notsup(BZ2FileObject *self, PyObject *args)
1110 {
1111         PyErr_SetString(PyExc_IOError, "operation not supported");
1112         return NULL;
1113 }
1114
1115 PyDoc_STRVAR(BZ2File_close__doc__,
1116 "close() -> None or (perhaps) an integer\n\
1117 \n\
1118 Close the file. Sets data attribute .closed to true. A closed file\n\
1119 cannot be used for further I/O operations. close() may be called more\n\
1120 than once without error.\n\
1121 ");
1122
1123 static PyObject *
1124 BZ2File_close(BZ2FileObject *self)
1125 {
1126         PyObject *file_close;
1127         PyObject *ret = NULL;
1128         int bzerror = BZ_OK;
1129
1130         ACQUIRE_LOCK(self);
1131         switch (self->mode) {
1132                 case MODE_READ:
1133                 case MODE_READ_EOF:
1134                         BZ2_bzReadClose(&bzerror, self->fp);
1135                         break;
1136                 case MODE_WRITE:
1137                         BZ2_bzWriteClose(&bzerror, self->fp,
1138                                          0, NULL, NULL);
1139                         break;
1140         }
1141         self->mode = MODE_CLOSED;
1142         file_close = Py_FindMethod(PyFile_Type.tp_methods, (PyObject*)self,
1143                                    "close");
1144         if (!file_close) {
1145                 PyErr_SetString(PyExc_RuntimeError,
1146                                 "can't find file.close method");
1147                 goto cleanup;
1148         }
1149         ret = PyObject_CallObject(file_close, NULL);
1150         if (bzerror != BZ_OK) {
1151                 Util_CatchBZ2Error(bzerror);
1152                 Py_XDECREF(ret);
1153                 ret = NULL;
1154                 goto cleanup;
1155         }
1156
1157 cleanup:
1158         RELEASE_LOCK(self);
1159         return ret;
1160 }
1161
1162 static PyMethodDef BZ2File_methods[] = {
1163         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1164         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1165         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1166         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1167         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1168         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1169         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1170         {"truncate", (PyCFunction)BZ2File_notsup, METH_VARARGS, BZ2File_notsup__doc__},
1171         {"readinto", (PyCFunction)BZ2File_notsup, METH_VARARGS, BZ2File_notsup__doc__},
1172         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1173         {NULL,          NULL}           /* sentinel */
1174 };
1175
1176
1177 /* ===================================================================== */
1178 /* Slot definitions for BZ2File_Type. */
1179
1180 static int
1181 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1182 {
1183         PyObject *file_args = NULL;
1184         static char *kwlist[] = {"filename", "mode", "buffering",
1185                                  "compresslevel", 0};
1186         char *name = NULL;
1187         char *mode = "r";
1188         int buffering = -1;
1189         int compresslevel = 9;
1190         int bzerror;
1191         int mode_char = 0;
1192         int univ_newline = 0;
1193
1194         self->size = -1;
1195
1196         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "et|sii:BZ2File",
1197                                          kwlist, Py_FileSystemDefaultEncoding,
1198                                          &name, &mode, &buffering,
1199                                          &compresslevel))
1200                 return -1;
1201
1202         if (compresslevel < 1 || compresslevel > 9) {
1203                 PyErr_SetString(PyExc_ValueError,
1204                                 "compresslevel must be between 1 and 9");
1205                 return -1;
1206         }
1207
1208         for (;;) {
1209                 int error = 0;
1210                 switch (*mode) {
1211                         case 'r':
1212                         case 'w':
1213                                 if (mode_char)
1214                                         error = 1;
1215                                 mode_char = *mode;
1216                                 break;
1217
1218                         case 'b':
1219                                 break;
1220
1221                         case 'U':
1222                                 univ_newline = 1;
1223                                 break;
1224
1225                         default:
1226                                 error = 1;
1227                                 break;
1228                 }
1229                 if (error) {
1230                         PyErr_Format(PyExc_ValueError,
1231                                      "invalid mode char %c", *mode);
1232                         return -1;
1233                 }
1234                 mode++;
1235                 if (*mode == '\0')
1236                         break;
1237         }
1238
1239         if (mode_char == 'r')
1240                 mode = univ_newline ? "rbU" : "rb";
1241         else
1242                 mode = univ_newline ? "wbU" : "wb";
1243
1244         file_args = Py_BuildValue("(ssi)", name, mode, buffering);
1245         if (!file_args)
1246                 return -1;
1247
1248         /* From now on, we have stuff to dealloc, so jump to error label
1249          * instead of returning */
1250
1251         if (PyFile_Type.tp_init((PyObject *)self, file_args, NULL) < 0)
1252                 goto error;
1253
1254 #ifdef WITH_THREAD
1255         self->lock = PyThread_allocate_lock();
1256         if (!self->lock)
1257                 goto error;
1258 #endif
1259
1260         if (mode_char == 'r')
1261                 self->fp = BZ2_bzReadOpen(&bzerror,
1262                                           PyFile_AsFile((PyObject*)self),
1263                                           0, 0, NULL, 0);
1264         else
1265                 self->fp = BZ2_bzWriteOpen(&bzerror,
1266                                            PyFile_AsFile((PyObject*)self),
1267                                            compresslevel, 0, 0);
1268
1269         if (bzerror != BZ_OK) {
1270                 Util_CatchBZ2Error(bzerror);
1271                 goto error;
1272         }
1273
1274         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1275
1276         Py_XDECREF(file_args);
1277         PyMem_Free(name);
1278         return 0;
1279
1280 error:
1281 #ifdef WITH_THREAD
1282         if (self->lock)
1283                 PyThread_free_lock(self->lock);
1284 #endif
1285         Py_XDECREF(file_args);
1286         PyMem_Free(name);
1287         return -1;
1288 }
1289
1290 static void
1291 BZ2File_dealloc(BZ2FileObject *self)
1292 {
1293         int bzerror;
1294 #ifdef WITH_THREAD
1295         if (self->lock)
1296                 PyThread_free_lock(self->lock);
1297 #endif
1298         switch (self->mode) {
1299                 case MODE_READ:
1300                 case MODE_READ_EOF:
1301                         BZ2_bzReadClose(&bzerror, self->fp);
1302                         break;
1303                 case MODE_WRITE:
1304                         BZ2_bzWriteClose(&bzerror, self->fp,
1305                                          0, NULL, NULL);
1306                         break;
1307         }
1308         Util_DropReadAhead(self);
1309         ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1310 }
1311
1312 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1313 static PyObject *
1314 BZ2File_getiter(BZ2FileObject *self)
1315 {
1316         if (self->mode == MODE_CLOSED) {
1317                 PyErr_SetString(PyExc_ValueError,
1318                                 "I/O operation on closed file");
1319                 return NULL;
1320         }
1321         Py_INCREF((PyObject*)self);
1322         return (PyObject *)self;
1323 }
1324
1325 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1326 #define READAHEAD_BUFSIZE 8192
1327 static PyObject *
1328 BZ2File_iternext(BZ2FileObject *self)
1329 {
1330         PyStringObject* ret;
1331         ACQUIRE_LOCK(self);
1332         if (self->mode == MODE_CLOSED) {
1333                 PyErr_SetString(PyExc_ValueError,
1334                                 "I/O operation on closed file");
1335                 return NULL;
1336         }
1337         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1338         RELEASE_LOCK(self);
1339         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1340                 Py_XDECREF(ret);
1341                 return NULL;
1342         }
1343         return (PyObject *)ret;
1344 }
1345
1346 /* ===================================================================== */
1347 /* BZ2File_Type definition. */
1348
1349 PyDoc_VAR(BZ2File__doc__) =
1350 PyDoc_STR(
1351 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1352 \n\
1353 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1354 writing. When opened for writing, the file will be created if it doesn't\n\
1355 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1356 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1357 is given, must be a number between 1 and 9.\n\
1358 ")
1359 #ifdef WITH_UNIVERSAL_NEWLINES
1360 PyDoc_STR(
1361 "\n\
1362 Add a 'U' to mode to open the file for input with universal newline\n\
1363 support. Any line ending in the input file will be seen as a '\\n' in\n\
1364 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1365 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1366 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1367 newlines are available only when reading.\n\
1368 ")
1369 #endif
1370 ;
1371
1372 static PyTypeObject BZ2File_Type = {
1373         PyObject_HEAD_INIT(NULL)
1374         0,                      /*ob_size*/
1375         "bz2.BZ2File",          /*tp_name*/
1376         sizeof(BZ2FileObject),  /*tp_basicsize*/
1377         0,                      /*tp_itemsize*/
1378         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1379         0,                      /*tp_print*/
1380         0,                      /*tp_getattr*/
1381         0,                      /*tp_setattr*/
1382         0,                      /*tp_compare*/
1383         0,                      /*tp_repr*/
1384         0,                      /*tp_as_number*/
1385         0,                      /*tp_as_sequence*/
1386         0,                      /*tp_as_mapping*/
1387         0,                      /*tp_hash*/
1388         0,                      /*tp_call*/
1389         0,                      /*tp_str*/
1390         0,                      /*tp_getattro*/
1391         0,                      /*tp_setattro*/
1392         0,                      /*tp_as_buffer*/
1393         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1394         BZ2File__doc__,         /*tp_doc*/
1395         0,                      /*tp_traverse*/
1396         0,                      /*tp_clear*/
1397         0,                      /*tp_richcompare*/
1398         0,                      /*tp_weaklistoffset*/
1399         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1400         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1401         BZ2File_methods,        /*tp_methods*/
1402         0,                      /*tp_members*/
1403         0,                      /*tp_getset*/
1404         0,                      /*tp_base*/
1405         0,                      /*tp_dict*/
1406         0,                      /*tp_descr_get*/
1407         0,                      /*tp_descr_set*/
1408         0,                      /*tp_dictoffset*/
1409         (initproc)BZ2File_init, /*tp_init*/
1410         0,                      /*tp_alloc*/
1411         0,                      /*tp_new*/
1412         0,                      /*tp_free*/
1413         0,                      /*tp_is_gc*/
1414 };
1415
1416
1417 /* ===================================================================== */
1418 /* Methods of BZ2Comp. */
1419
1420 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1421 "compress(data) -> string\n\
1422 \n\
1423 Provide more data to the compressor object. It will return chunks of\n\
1424 compressed data whenever possible. When you've finished providing data\n\
1425 to compress, call the flush() method to finish the compression process,\n\
1426 and return what is left in the internal buffers.\n\
1427 ");
1428
1429 static PyObject *
1430 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1431 {
1432         char *data;
1433         int datasize;
1434         int bufsize = SMALLCHUNK;
1435         LONG_LONG totalout;
1436         PyObject *ret = NULL;
1437         bz_stream *bzs = &self->bzs;
1438         int bzerror;
1439
1440         if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1441                 return NULL;
1442
1443         ACQUIRE_LOCK(self);
1444         if (!self->running) {
1445                 PyErr_SetString(PyExc_ValueError,
1446                                 "this object was already flushed");
1447                 goto error;
1448         }
1449
1450         ret = PyString_FromStringAndSize(NULL, bufsize);
1451         if (!ret)
1452                 goto error;
1453
1454         bzs->next_in = data;
1455         bzs->avail_in = datasize;
1456         bzs->next_out = BUF(ret);
1457         bzs->avail_out = bufsize;
1458
1459         totalout = BZS_TOTAL_OUT(bzs);
1460
1461         for (;;) {
1462                 Py_BEGIN_ALLOW_THREADS
1463                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1464                 Py_END_ALLOW_THREADS
1465                 if (bzerror != BZ_RUN_OK) {
1466                         Util_CatchBZ2Error(bzerror);
1467                         goto error;
1468                 }
1469                 if (bzs->avail_out == 0) {
1470                         bufsize = Util_NewBufferSize(bufsize);
1471                         if (_PyString_Resize(&ret, bufsize) < 0) {
1472                                 BZ2_bzCompressEnd(bzs);
1473                                 goto error;
1474                         }
1475                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1476                                                     - totalout);
1477                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1478                 } else if (bzs->avail_in == 0) {
1479                         break;
1480                 }
1481         }
1482
1483         _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1484
1485         RELEASE_LOCK(self);
1486         return ret;
1487
1488 error:
1489         RELEASE_LOCK(self);
1490         Py_XDECREF(ret);
1491         return NULL;
1492 }
1493
1494 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1495 "flush() -> string\n\
1496 \n\
1497 Finish the compression process and return what is left in internal buffers.\n\
1498 You must not use the compressor object after calling this method.\n\
1499 ");
1500
1501 static PyObject *
1502 BZ2Comp_flush(BZ2CompObject *self)
1503 {
1504         int bufsize = SMALLCHUNK;
1505         PyObject *ret = NULL;
1506         bz_stream *bzs = &self->bzs;
1507         LONG_LONG totalout;
1508         int bzerror;
1509
1510         ACQUIRE_LOCK(self);
1511         if (!self->running) {
1512                 PyErr_SetString(PyExc_ValueError, "object was already "
1513                                                   "flushed");
1514                 goto error;
1515         }
1516         self->running = 0;
1517
1518         ret = PyString_FromStringAndSize(NULL, bufsize);
1519         if (!ret)
1520                 goto error;
1521
1522         bzs->next_out = BUF(ret);
1523         bzs->avail_out = bufsize;
1524
1525         totalout = BZS_TOTAL_OUT(bzs);
1526
1527         for (;;) {
1528                 Py_BEGIN_ALLOW_THREADS
1529                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1530                 Py_END_ALLOW_THREADS
1531                 if (bzerror == BZ_STREAM_END) {
1532                         break;
1533                 } else if (bzerror != BZ_FINISH_OK) {
1534                         Util_CatchBZ2Error(bzerror);
1535                         goto error;
1536                 }
1537                 if (bzs->avail_out == 0) {
1538                         bufsize = Util_NewBufferSize(bufsize);
1539                         if (_PyString_Resize(&ret, bufsize) < 0)
1540                                 goto error;
1541                         bzs->next_out = BUF(ret);
1542                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1543                                                     - totalout);
1544                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1545                 }
1546         }
1547
1548         if (bzs->avail_out != 0)
1549                 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1550
1551         RELEASE_LOCK(self);
1552         return ret;
1553
1554 error:
1555         RELEASE_LOCK(self);
1556         Py_XDECREF(ret);
1557         return NULL;
1558 }
1559
1560 static PyMethodDef BZ2Comp_methods[] = {
1561         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1562          BZ2Comp_compress__doc__},
1563         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1564          BZ2Comp_flush__doc__},
1565         {NULL,          NULL}           /* sentinel */
1566 };
1567
1568
1569 /* ===================================================================== */
1570 /* Slot definitions for BZ2Comp_Type. */
1571
1572 static int
1573 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1574 {
1575         int compresslevel = 9;
1576         int bzerror;
1577         static char *kwlist[] = {"compresslevel", 0};
1578
1579         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1580                                          kwlist, &compresslevel))
1581                 return -1;
1582
1583         if (compresslevel < 1 || compresslevel > 9) {
1584                 PyErr_SetString(PyExc_ValueError,
1585                                 "compresslevel must be between 1 and 9");
1586                 goto error;
1587         }
1588
1589 #ifdef WITH_THREAD
1590         self->lock = PyThread_allocate_lock();
1591         if (!self->lock)
1592                 goto error;
1593 #endif
1594
1595         memset(&self->bzs, 0, sizeof(bz_stream));
1596         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1597         if (bzerror != BZ_OK) {
1598                 Util_CatchBZ2Error(bzerror);
1599                 goto error;
1600         }
1601
1602         self->running = 1;
1603
1604         return 0;
1605 error:
1606 #ifdef WITH_THREAD
1607         if (self->lock)
1608                 PyThread_free_lock(self->lock);
1609 #endif
1610         return -1;
1611 }
1612
1613 static void
1614 BZ2Comp_dealloc(BZ2CompObject *self)
1615 {
1616 #ifdef WITH_THREAD
1617         if (self->lock)
1618                 PyThread_free_lock(self->lock);
1619 #endif
1620         BZ2_bzCompressEnd(&self->bzs);
1621         ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1622 }
1623
1624
1625 /* ===================================================================== */
1626 /* BZ2Comp_Type definition. */
1627
1628 PyDoc_STRVAR(BZ2Comp__doc__,
1629 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1630 \n\
1631 Create a new compressor object. This object may be used to compress\n\
1632 data sequentially. If you want to compress data in one shot, use the\n\
1633 compress() function instead. The compresslevel parameter, if given,\n\
1634 must be a number between 1 and 9.\n\
1635 ");
1636
1637 static PyTypeObject BZ2Comp_Type = {
1638         PyObject_HEAD_INIT(NULL)
1639         0,                      /*ob_size*/
1640         "bz2.BZ2Compressor",    /*tp_name*/
1641         sizeof(BZ2CompObject),  /*tp_basicsize*/
1642         0,                      /*tp_itemsize*/
1643         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1644         0,                      /*tp_print*/
1645         0,                      /*tp_getattr*/
1646         0,                      /*tp_setattr*/
1647         0,                      /*tp_compare*/
1648         0,                      /*tp_repr*/
1649         0,                      /*tp_as_number*/
1650         0,                      /*tp_as_sequence*/
1651         0,                      /*tp_as_mapping*/
1652         0,                      /*tp_hash*/
1653         0,                      /*tp_call*/
1654         0,                      /*tp_str*/
1655         0,                      /*tp_getattro*/
1656         0,                      /*tp_setattro*/
1657         0,                      /*tp_as_buffer*/
1658         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1659         BZ2Comp__doc__,         /*tp_doc*/
1660         0,                      /*tp_traverse*/
1661         0,                      /*tp_clear*/
1662         0,                      /*tp_richcompare*/
1663         0,                      /*tp_weaklistoffset*/
1664         0,                      /*tp_iter*/
1665         0,                      /*tp_iternext*/
1666         BZ2Comp_methods,        /*tp_methods*/
1667         0,                      /*tp_members*/
1668         0,                      /*tp_getset*/
1669         0,                      /*tp_base*/
1670         0,                      /*tp_dict*/
1671         0,                      /*tp_descr_get*/
1672         0,                      /*tp_descr_set*/
1673         0,                      /*tp_dictoffset*/
1674         (initproc)BZ2Comp_init, /*tp_init*/
1675         0,                      /*tp_alloc*/
1676         0,                      /*tp_new*/
1677         0,                      /*tp_free*/
1678         0,                      /*tp_is_gc*/
1679 };
1680
1681
1682 /* ===================================================================== */
1683 /* Members of BZ2Decomp. */
1684
1685 #define OFF(x) offsetof(BZ2DecompObject, x)
1686
1687 static PyMemberDef BZ2Decomp_members[] = {
1688         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1689         {NULL}  /* Sentinel */
1690 };
1691
1692
1693 /* ===================================================================== */
1694 /* Methods of BZ2Decomp. */
1695
1696 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1697 "decompress(data) -> string\n\
1698 \n\
1699 Provide more data to the decompressor object. It will return chunks\n\
1700 of decompressed data whenever possible. If you try to decompress data\n\
1701 after the end of stream is found, EOFError will be raised. If any data\n\
1702 was found after the end of stream, it'll be ignored and saved in\n\
1703 unused_data attribute.\n\
1704 ");
1705
1706 static PyObject *
1707 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1708 {
1709         char *data;
1710         int datasize;
1711         int bufsize = SMALLCHUNK;
1712         LONG_LONG totalout;
1713         PyObject *ret = NULL;
1714         bz_stream *bzs = &self->bzs;
1715         int bzerror;
1716
1717         if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1718                 return NULL;
1719
1720         ACQUIRE_LOCK(self);
1721         if (!self->running) {
1722                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1723                                                 "already found");
1724                 goto error;
1725         }
1726
1727         ret = PyString_FromStringAndSize(NULL, bufsize);
1728         if (!ret)
1729                 goto error;
1730
1731         bzs->next_in = data;
1732         bzs->avail_in = datasize;
1733         bzs->next_out = BUF(ret);
1734         bzs->avail_out = bufsize;
1735
1736         totalout = BZS_TOTAL_OUT(bzs);
1737
1738         for (;;) {
1739                 Py_BEGIN_ALLOW_THREADS
1740                 bzerror = BZ2_bzDecompress(bzs);
1741                 Py_END_ALLOW_THREADS
1742                 if (bzerror == BZ_STREAM_END) {
1743                         if (bzs->avail_in != 0) {
1744                                 Py_DECREF(self->unused_data);
1745                                 self->unused_data =
1746                                     PyString_FromStringAndSize(bzs->next_in,
1747                                                                bzs->avail_in);
1748                         }
1749                         self->running = 0;
1750                         break;
1751                 }
1752                 if (bzerror != BZ_OK) {
1753                         Util_CatchBZ2Error(bzerror);
1754                         goto error;
1755                 }
1756                 if (bzs->avail_out == 0) {
1757                         bufsize = Util_NewBufferSize(bufsize);
1758                         if (_PyString_Resize(&ret, bufsize) < 0) {
1759                                 BZ2_bzDecompressEnd(bzs);
1760                                 goto error;
1761                         }
1762                         bzs->next_out = BUF(ret);
1763                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1764                                                     - totalout);
1765                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1766                 } else if (bzs->avail_in == 0) {
1767                         break;
1768                 }
1769         }
1770
1771         if (bzs->avail_out != 0)
1772                 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1773
1774         RELEASE_LOCK(self);
1775         return ret;
1776
1777 error:
1778         RELEASE_LOCK(self);
1779         Py_XDECREF(ret);
1780         return NULL;
1781 }
1782
1783 static PyMethodDef BZ2Decomp_methods[] = {
1784         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1785         {NULL,          NULL}           /* sentinel */
1786 };
1787
1788
1789 /* ===================================================================== */
1790 /* Slot definitions for BZ2Decomp_Type. */
1791
1792 static int
1793 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1794 {
1795         int bzerror;
1796
1797         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1798                 return -1;
1799
1800 #ifdef WITH_THREAD
1801         self->lock = PyThread_allocate_lock();
1802         if (!self->lock)
1803                 goto error;
1804 #endif
1805
1806         self->unused_data = PyString_FromString("");
1807         if (!self->unused_data)
1808                 goto error;
1809
1810         memset(&self->bzs, 0, sizeof(bz_stream));
1811         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1812         if (bzerror != BZ_OK) {
1813                 Util_CatchBZ2Error(bzerror);
1814                 goto error;
1815         }
1816
1817         self->running = 1;
1818
1819         return 0;
1820
1821 error:
1822 #ifdef WITH_THREAD
1823         if (self->lock)
1824                 PyThread_free_lock(self->lock);
1825 #endif
1826         Py_XDECREF(self->unused_data);
1827         return -1;
1828 }
1829
1830 static void
1831 BZ2Decomp_dealloc(BZ2DecompObject *self)
1832 {
1833 #ifdef WITH_THREAD
1834         if (self->lock)
1835                 PyThread_free_lock(self->lock);
1836 #endif
1837         Py_XDECREF(self->unused_data);
1838         BZ2_bzDecompressEnd(&self->bzs);
1839         ((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1840 }
1841
1842
1843 /* ===================================================================== */
1844 /* BZ2Decomp_Type definition. */
1845
1846 PyDoc_STRVAR(BZ2Decomp__doc__,
1847 "BZ2Decompressor() -> decompressor object\n\
1848 \n\
1849 Create a new decompressor object. This object may be used to decompress\n\
1850 data sequentially. If you want to decompress data in one shot, use the\n\
1851 decompress() function instead.\n\
1852 ");
1853
1854 static PyTypeObject BZ2Decomp_Type = {
1855         PyObject_HEAD_INIT(NULL)
1856         0,                      /*ob_size*/
1857         "bz2.BZ2Decompressor",  /*tp_name*/
1858         sizeof(BZ2DecompObject), /*tp_basicsize*/
1859         0,                      /*tp_itemsize*/
1860         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1861         0,                      /*tp_print*/
1862         0,                      /*tp_getattr*/
1863         0,                      /*tp_setattr*/
1864         0,                      /*tp_compare*/
1865         0,                      /*tp_repr*/
1866         0,                      /*tp_as_number*/
1867         0,                      /*tp_as_sequence*/
1868         0,                      /*tp_as_mapping*/
1869         0,                      /*tp_hash*/
1870         0,                      /*tp_call*/
1871         0,                      /*tp_str*/
1872         0,                      /*tp_getattro*/
1873         0,                      /*tp_setattro*/
1874         0,                      /*tp_as_buffer*/
1875         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1876         BZ2Decomp__doc__,       /*tp_doc*/
1877         0,                      /*tp_traverse*/
1878         0,                      /*tp_clear*/
1879         0,                      /*tp_richcompare*/
1880         0,                      /*tp_weaklistoffset*/
1881         0,                      /*tp_iter*/
1882         0,                      /*tp_iternext*/
1883         BZ2Decomp_methods,      /*tp_methods*/
1884         BZ2Decomp_members,      /*tp_members*/
1885         0,                      /*tp_getset*/
1886         0,                      /*tp_base*/
1887         0,                      /*tp_dict*/
1888         0,                      /*tp_descr_get*/
1889         0,                      /*tp_descr_set*/
1890         0,                      /*tp_dictoffset*/
1891         (initproc)BZ2Decomp_init, /*tp_init*/
1892         0,                      /*tp_alloc*/
1893         0,                      /*tp_new*/
1894         0,                      /*tp_free*/
1895         0,                      /*tp_is_gc*/
1896 };
1897
1898
1899 /* ===================================================================== */
1900 /* Module functions. */
1901
1902 PyDoc_STRVAR(bz2_compress__doc__,
1903 "compress(data [, compresslevel=9]) -> string\n\
1904 \n\
1905 Compress data in one shot. If you want to compress data sequentially,\n\
1906 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1907 given, must be a number between 1 and 9.\n\
1908 ");
1909
1910 static PyObject *
1911 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1912 {
1913         int compresslevel=9;
1914         char *data;
1915         int datasize;
1916         int bufsize;
1917         PyObject *ret = NULL;
1918         bz_stream _bzs;
1919         bz_stream *bzs = &_bzs;
1920         int bzerror;
1921         static char *kwlist[] = {"data", "compresslevel", 0};
1922
1923         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1924                                          kwlist, &data, &datasize,
1925                                          &compresslevel))
1926                 return NULL;
1927
1928         if (compresslevel < 1 || compresslevel > 9) {
1929                 PyErr_SetString(PyExc_ValueError,
1930                                 "compresslevel must be between 1 and 9");
1931                 return NULL;
1932         }
1933
1934         /* Conforming to bz2 manual, this is large enough to fit compressed
1935          * data in one shot. We will check it later anyway. */
1936         bufsize = datasize + (datasize/100+1) + 600;
1937
1938         ret = PyString_FromStringAndSize(NULL, bufsize);
1939         if (!ret)
1940                 return NULL;
1941
1942         memset(bzs, 0, sizeof(bz_stream));
1943
1944         bzs->next_in = data;
1945         bzs->avail_in = datasize;
1946         bzs->next_out = BUF(ret);
1947         bzs->avail_out = bufsize;
1948
1949         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1950         if (bzerror != BZ_OK) {
1951                 Util_CatchBZ2Error(bzerror);
1952                 Py_DECREF(ret);
1953                 return NULL;
1954         }
1955
1956         for (;;) {
1957                 Py_BEGIN_ALLOW_THREADS
1958                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1959                 Py_END_ALLOW_THREADS
1960                 if (bzerror == BZ_STREAM_END) {
1961                         break;
1962                 } else if (bzerror != BZ_FINISH_OK) {
1963                         BZ2_bzCompressEnd(bzs);
1964                         Util_CatchBZ2Error(bzerror);
1965                         Py_DECREF(ret);
1966                         return NULL;
1967                 }
1968                 if (bzs->avail_out == 0) {
1969                         bufsize = Util_NewBufferSize(bufsize);
1970                         if (_PyString_Resize(&ret, bufsize) < 0) {
1971                                 BZ2_bzCompressEnd(bzs);
1972                                 Py_DECREF(ret);
1973                                 return NULL;
1974                         }
1975                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1976                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1977                 }
1978         }
1979
1980         if (bzs->avail_out != 0)
1981                 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
1982         BZ2_bzCompressEnd(bzs);
1983
1984         return ret;
1985 }
1986
1987 PyDoc_STRVAR(bz2_decompress__doc__,
1988 "decompress(data) -> decompressed data\n\
1989 \n\
1990 Decompress data in one shot. If you want to decompress data sequentially,\n\
1991 use an instance of BZ2Decompressor instead.\n\
1992 ");
1993
1994 static PyObject *
1995 bz2_decompress(PyObject *self, PyObject *args)
1996 {
1997         char *data;
1998         int datasize;
1999         int bufsize = SMALLCHUNK;
2000         PyObject *ret;
2001         bz_stream _bzs;
2002         bz_stream *bzs = &_bzs;
2003         int bzerror;
2004
2005         if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2006                 return NULL;
2007
2008         if (datasize == 0)
2009                 return PyString_FromString("");
2010
2011         ret = PyString_FromStringAndSize(NULL, bufsize);
2012         if (!ret)
2013                 return NULL;
2014
2015         memset(bzs, 0, sizeof(bz_stream));
2016
2017         bzs->next_in = data;
2018         bzs->avail_in = datasize;
2019         bzs->next_out = BUF(ret);
2020         bzs->avail_out = bufsize;
2021
2022         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2023         if (bzerror != BZ_OK) {
2024                 Util_CatchBZ2Error(bzerror);
2025                 Py_DECREF(ret);
2026                 return NULL;
2027         }
2028
2029         for (;;) {
2030                 Py_BEGIN_ALLOW_THREADS
2031                 bzerror = BZ2_bzDecompress(bzs);
2032                 Py_END_ALLOW_THREADS
2033                 if (bzerror == BZ_STREAM_END) {
2034                         break;
2035                 } else if (bzerror != BZ_OK) {
2036                         BZ2_bzDecompressEnd(bzs);
2037                         Util_CatchBZ2Error(bzerror);
2038                         Py_DECREF(ret);
2039                         return NULL;
2040                 }
2041                 if (bzs->avail_out == 0) {
2042                         bufsize = Util_NewBufferSize(bufsize);
2043                         if (_PyString_Resize(&ret, bufsize) < 0) {
2044                                 BZ2_bzDecompressEnd(bzs);
2045                                 Py_DECREF(ret);
2046                                 return NULL;
2047                         }
2048                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2049                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2050                 } else if (bzs->avail_in == 0) {
2051                         BZ2_bzDecompressEnd(bzs);
2052                         PyErr_SetString(PyExc_ValueError,
2053                                         "couldn't find end of stream");
2054                         Py_DECREF(ret);
2055                         return NULL;
2056                 }
2057         }
2058
2059         if (bzs->avail_out != 0)
2060                 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2061         BZ2_bzDecompressEnd(bzs);
2062
2063         return ret;
2064 }
2065
2066 static PyMethodDef bz2_methods[] = {
2067         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2068                 bz2_compress__doc__},
2069         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2070                 bz2_decompress__doc__},
2071         {NULL,          NULL}           /* sentinel */
2072 };
2073
2074 /* ===================================================================== */
2075 /* Initialization function. */
2076
2077 PyDoc_STRVAR(bz2__doc__,
2078 "The python bz2 module provides a comprehensive interface for\n\
2079 the bz2 compression library. It implements a complete file\n\
2080 interface, one shot (de)compression functions, and types for\n\
2081 sequential (de)compression.\n\
2082 ");
2083
2084 DL_EXPORT(void)
2085 initbz2(void)
2086 {
2087         PyObject *m;
2088
2089         BZ2File_Type.ob_type = &PyType_Type;
2090         BZ2File_Type.tp_base = &PyFile_Type;
2091         BZ2File_Type.tp_new = PyFile_Type.tp_new;
2092         BZ2File_Type.tp_getattro = PyObject_GenericGetAttr;
2093         BZ2File_Type.tp_setattro = PyObject_GenericSetAttr;
2094         BZ2File_Type.tp_alloc = PyType_GenericAlloc;
2095         BZ2File_Type.tp_free = _PyObject_Del;
2096
2097         BZ2Comp_Type.ob_type = &PyType_Type;
2098         BZ2Comp_Type.tp_getattro = PyObject_GenericGetAttr;
2099         BZ2Comp_Type.tp_setattro = PyObject_GenericSetAttr;
2100         BZ2Comp_Type.tp_alloc = PyType_GenericAlloc;
2101         BZ2Comp_Type.tp_new = PyType_GenericNew;
2102         BZ2Comp_Type.tp_free = _PyObject_Del;
2103
2104         BZ2Decomp_Type.ob_type = &PyType_Type;
2105         BZ2Decomp_Type.tp_getattro = PyObject_GenericGetAttr;
2106         BZ2Decomp_Type.tp_setattro = PyObject_GenericSetAttr;
2107         BZ2Decomp_Type.tp_alloc = PyType_GenericAlloc;
2108         BZ2Decomp_Type.tp_new = PyType_GenericNew;
2109         BZ2Decomp_Type.tp_free = _PyObject_Del;
2110
2111         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2112
2113         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2114
2115         Py_INCREF(&BZ2File_Type);
2116         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2117
2118         Py_INCREF(&BZ2Comp_Type);
2119         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2120
2121         Py_INCREF(&BZ2Decomp_Type);
2122         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2123 }