Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  26
  27 #define MODE_CLOSED   0
  28 #define MODE_READ     1
  29 #define MODE_READ_EOF 2
  30 #define MODE_WRITE    3
  31
  32 #define BZ2FileObject_Check(v)  ((v)->ob_type == &BZ2File_Type)
  33
  34
  35 #ifdef BZ_CONFIG_ERROR
  36
  37 #if SIZEOF_LONG >= 8
  38 #define BZS_TOTAL_OUT(bzs) \
  39         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  40 #elif SIZEOF_LONG_LONG >= 8
  41 #define BZS_TOTAL_OUT(bzs) \
  42         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  43 #else
  44 #define BZS_TOTAL_OUT(bzs) \
  45         bzs->total_out_lo32;
  46 #endif
  47
  48 #else /* ! BZ_CONFIG_ERROR */
  49
  50 #define BZ2_bzRead bzRead
  51 #define BZ2_bzReadOpen bzReadOpen
  52 #define BZ2_bzReadClose bzReadClose
  53 #define BZ2_bzWrite bzWrite
  54 #define BZ2_bzWriteOpen bzWriteOpen
  55 #define BZ2_bzWriteClose bzWriteClose
  56 #define BZ2_bzCompress bzCompress
  57 #define BZ2_bzCompressInit bzCompressInit
  58 #define BZ2_bzCompressEnd bzCompressEnd
  59 #define BZ2_bzDecompress bzDecompress
  60 #define BZ2_bzDecompressInit bzDecompressInit
  61 #define BZ2_bzDecompressEnd bzDecompressEnd
  62
  63 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  64
  65 #endif /* ! BZ_CONFIG_ERROR */
  66
  67
  68 #ifdef WITH_THREAD
  69 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  70 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  71 #else
  72 #define ACQUIRE_LOCK(obj)
  73 #define RELEASE_LOCK(obj)
  74 #endif
  75
  76 #ifdef WITH_UNIVERSAL_NEWLINES
  77 /* Bits in f_newlinetypes */
  78 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  79 #define NEWLINE_CR 1            /* \r newline seen */
  80 #define NEWLINE_LF 2            /* \n newline seen */
  81 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  82 #endif
  83
  84 /* ===================================================================== */
  85 /* Structure definitions. */
  86
  87 typedef struct {
  88         PyObject_HEAD
  89         PyObject *file;
  90
  91         char* f_buf;            /* Allocated readahead buffer */
  92         char* f_bufend;         /* Points after last occupied position */
  93         char* f_bufptr;         /* Current buffer position */
  94
  95         int f_softspace;        /* Flag used by 'print' command */
  96
  97 #ifdef WITH_UNIVERSAL_NEWLINES
  98         int f_univ_newline;     /* Handle any newline convention */
  99         int f_newlinetypes;     /* Types of newlines seen */
 100         int f_skipnextlf;       /* Skip next \n */
 101 #endif
 102
 103         BZFILE *fp;
 104         int mode;
 105         long pos;
 106         long size;
 107 #ifdef WITH_THREAD
 108         PyThread_type_lock lock;
 109 #endif
 110 } BZ2FileObject;
 111
 112 typedef struct {
 113         PyObject_HEAD
 114         bz_stream bzs;
 115         int running;
 116 #ifdef WITH_THREAD
 117         PyThread_type_lock lock;
 118 #endif
 119 } BZ2CompObject;
 120
 121 typedef struct {
 122         PyObject_HEAD
 123         bz_stream bzs;
 124         int running;
 125         PyObject *unused_data;
 126 #ifdef WITH_THREAD
 127         PyThread_type_lock lock;
 128 #endif
 129 } BZ2DecompObject;
 130
 131 /* ===================================================================== */
 132 /* Utility functions. */
 133
 134 static int
 135 Util_CatchBZ2Error(int bzerror)
 136 {
 137         int ret = 0;
 138         switch(bzerror) {
 139                 case BZ_OK:
 140                 case BZ_STREAM_END:
 141                         break;
 142
 143 #ifdef BZ_CONFIG_ERROR
 144                 case BZ_CONFIG_ERROR:
 145                         PyErr_SetString(PyExc_SystemError,
 146                                         "the bz2 library was not compiled "
 147                                         "correctly");
 148                         ret = 1;
 149                         break;
 150 #endif
 151
 152                 case BZ_PARAM_ERROR:
 153                         PyErr_SetString(PyExc_ValueError,
 154                                         "the bz2 library has received wrong "
 155                                         "parameters");
 156                         ret = 1;
 157                         break;
 158
 159                 case BZ_MEM_ERROR:
 160                         PyErr_NoMemory();
 161                         ret = 1;
 162                         break;
 163
 164                 case BZ_DATA_ERROR:
 165                 case BZ_DATA_ERROR_MAGIC:
 166                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 167                         ret = 1;
 168                         break;
 169
 170                 case BZ_IO_ERROR:
 171                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 172                         ret = 1;
 173                         break;
 174
 175                 case BZ_UNEXPECTED_EOF:
 176                         PyErr_SetString(PyExc_EOFError,
 177                                         "compressed file ended before the "
 178                                         "logical end-of-stream was detected");
 179                         ret = 1;
 180                         break;
 181
 182                 case BZ_SEQUENCE_ERROR:
 183                         PyErr_SetString(PyExc_RuntimeError,
 184                                         "wrong sequence of bz2 library "
 185                                         "commands used");
 186                         ret = 1;
 187                         break;
 188         }
 189         return ret;
 190 }
 191
 192 #if BUFSIZ < 8192
 193 #define SMALLCHUNK 8192
 194 #else
 195 #define SMALLCHUNK BUFSIZ
 196 #endif
 197
 198 #if SIZEOF_INT < 4
 199 #define BIGCHUNK  (512 * 32)
 200 #else
 201 #define BIGCHUNK  (512 * 1024)
 202 #endif
 203
 204 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 205 static size_t
 206 Util_NewBufferSize(size_t currentsize)
 207 {
 208         if (currentsize > SMALLCHUNK) {
 209                 /* Keep doubling until we reach BIGCHUNK;
 210                    then keep adding BIGCHUNK. */
 211                 if (currentsize <= BIGCHUNK)
 212                         return currentsize + currentsize;
 213                 else
 214                         return currentsize + BIGCHUNK;
 215         }
 216         return currentsize + SMALLCHUNK;
 217 }
 218
 219 /* This is a hacked version of Python's fileobject.c:get_line(). */
 220 static PyObject *
 221 Util_GetLine(BZ2FileObject *f, int n)
 222 {
 223         char c;
 224         char *buf, *end;
 225         size_t total_v_size;    /* total # of slots in buffer */
 226         size_t used_v_size;     /* # used slots in buffer */
 227         size_t increment;       /* amount to increment the buffer */
 228         PyObject *v;
 229         int bzerror;
 230 #ifdef WITH_UNIVERSAL_NEWLINES
 231         int newlinetypes = f->f_newlinetypes;
 232         int skipnextlf = f->f_skipnextlf;
 233         int univ_newline = f->f_univ_newline;
 234 #endif
 235
 236         total_v_size = n > 0 ? n : 100;
 237         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 238         if (v == NULL)
 239                 return NULL;
 240
 241         buf = BUF(v);
 242         end = buf + total_v_size;
 243
 244         for (;;) {
 245                 Py_BEGIN_ALLOW_THREADS
 246 #ifdef WITH_UNIVERSAL_NEWLINES
 247                 if (univ_newline) {
 248                         while (1) {
 249                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 250                                 f->pos++;
 251                                 if (bzerror != BZ_OK || buf == end)
 252                                         break;
 253                                 if (skipnextlf) {
 254                                         skipnextlf = 0;
 255                                         if (c == '\n') {
 256                                                 /* Seeing a \n here with
 257                                                  * skipnextlf true means we
 258                                                  * saw a \r before.
 259                                                  */
 260                                                 newlinetypes |= NEWLINE_CRLF;
 261                                                 BZ2_bzRead(&bzerror, f->fp,
 262                                                            &c, 1);
 263                                                 if (bzerror != BZ_OK)
 264                                                         break;
 265                                         } else {
 266                                                 newlinetypes |= NEWLINE_CR;
 267                                         }
 268                                 }
 269                                 if (c == '\r') {
 270                                         skipnextlf = 1;
 271                                         c = '\n';
 272                                 } else if ( c == '\n')
 273                                         newlinetypes |= NEWLINE_LF;
 274                                 *buf++ = c;
 275                                 if (c == '\n') break;
 276                         }
 277                         if (bzerror == BZ_STREAM_END && skipnextlf)
 278                                 newlinetypes |= NEWLINE_CR;
 279                 } else /* If not universal newlines use the normal loop */
 280 #endif
 281                         do {
 282                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 283                                 f->pos++;
 284                                 *buf++ = c;
 285                         } while (bzerror == BZ_OK && c != '\n' && buf != end);
 286                 Py_END_ALLOW_THREADS
 287 #ifdef WITH_UNIVERSAL_NEWLINES
 288                 f->f_newlinetypes = newlinetypes;
 289                 f->f_skipnextlf = skipnextlf;
 290 #endif
 291                 if (bzerror == BZ_STREAM_END) {
 292                         f->size = f->pos;
 293                         f->mode = MODE_READ_EOF;
 294                         break;
 295                 } else if (bzerror != BZ_OK) {
 296                         Util_CatchBZ2Error(bzerror);
 297                         Py_DECREF(v);
 298                         return NULL;
 299                 }
 300                 if (c == '\n')
 301                         break;
 302                 /* Must be because buf == end */
 303                 if (n > 0)
 304                         break;
 305                 used_v_size = total_v_size;
 306                 increment = total_v_size >> 2; /* mild exponential growth */
 307                 total_v_size += increment;
 308                 if (total_v_size > INT_MAX) {
 309                         PyErr_SetString(PyExc_OverflowError,
 310                             "line is longer than a Python string can hold");
 311                         Py_DECREF(v);
 312                         return NULL;
 313                 }
 314                 if (_PyString_Resize(&v, total_v_size) < 0)
 315                         return NULL;
 316                 buf = BUF(v) + used_v_size;
 317                 end = BUF(v) + total_v_size;
 318         }
 319
 320         used_v_size = buf - BUF(v);
 321         if (used_v_size != total_v_size)
 322                 _PyString_Resize(&v, used_v_size);
 323         return v;
 324 }
 325
 326 #ifndef WITH_UNIVERSAL_NEWLINES
 327 #define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
 328 #else
 329 /* This is a hacked version of Python's
 330  * fileobject.c:Py_UniversalNewlineFread(). */
 331 size_t
 332 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 333                      char* buf, size_t n, BZ2FileObject *f)
 334 {
 335         char *dst = buf;
 336         int newlinetypes, skipnextlf;
 337
 338         assert(buf != NULL);
 339         assert(stream != NULL);
 340
 341         if (!f->f_univ_newline)
 342                 return BZ2_bzRead(bzerror, stream, buf, n);
 343
 344         newlinetypes = f->f_newlinetypes;
 345         skipnextlf = f->f_skipnextlf;
 346
 347         /* Invariant:  n is the number of bytes remaining to be filled
 348          * in the buffer.
 349          */
 350         while (n) {
 351                 size_t nread;
 352                 int shortread;
 353                 char *src = dst;
 354
 355                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 356                 assert(nread <= n);
 357                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 358                 shortread = n != 0;     /* true iff EOF or error */
 359                 while (nread--) {
 360                         char c = *src++;
 361                         if (c == '\r') {
 362                                 /* Save as LF and set flag to skip next LF. */
 363                                 *dst++ = '\n';
 364                                 skipnextlf = 1;
 365                         }
 366                         else if (skipnextlf && c == '\n') {
 367                                 /* Skip LF, and remember we saw CR LF. */
 368                                 skipnextlf = 0;
 369                                 newlinetypes |= NEWLINE_CRLF;
 370                                 ++n;
 371                         }
 372                         else {
 373                                 /* Normal char to be stored in buffer.  Also
 374                                  * update the newlinetypes flag if either this
 375                                  * is an LF or the previous char was a CR.
 376                                  */
 377                                 if (c == '\n')
 378                                         newlinetypes |= NEWLINE_LF;
 379                                 else if (skipnextlf)
 380                                         newlinetypes |= NEWLINE_CR;
 381                                 *dst++ = c;
 382                                 skipnextlf = 0;
 383                         }
 384                 }
 385                 if (shortread) {
 386                         /* If this is EOF, update type flags. */
 387                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 388                                 newlinetypes |= NEWLINE_CR;
 389                         break;
 390                 }
 391         }
 392         f->f_newlinetypes = newlinetypes;
 393         f->f_skipnextlf = skipnextlf;
 394         return dst - buf;
 395 }
 396 #endif
 397
 398 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 399 static void
 400 Util_DropReadAhead(BZ2FileObject *f)
 401 {
 402         if (f->f_buf != NULL) {
 403                 PyMem_Free(f->f_buf);
 404                 f->f_buf = NULL;
 405         }
 406 }
 407
 408 /* This is a hacked version of Python's fileobject.c:readahead(). */
 409 static int
 410 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 411 {
 412         int chunksize;
 413         int bzerror;
 414
 415         if (f->f_buf != NULL) {
 416                 if((f->f_bufend - f->f_bufptr) >= 1)
 417                         return 0;
 418                 else
 419                         Util_DropReadAhead(f);
 420         }
 421         if (f->mode == MODE_READ_EOF) {
 422                 return -1;
 423         }
 424         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 425                 return -1;
 426         }
 427         Py_BEGIN_ALLOW_THREADS
 428         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 429                                          bufsize, f);
 430         Py_END_ALLOW_THREADS
 431         f->pos += chunksize;
 432         if (bzerror == BZ_STREAM_END) {
 433                 f->size = f->pos;
 434                 f->mode = MODE_READ_EOF;
 435         } else if (bzerror != BZ_OK) {
 436                 Util_CatchBZ2Error(bzerror);
 437                 Util_DropReadAhead(f);
 438                 return -1;
 439         }
 440         f->f_bufptr = f->f_buf;
 441         f->f_bufend = f->f_buf + chunksize;
 442         return 0;
 443 }
 444
 445 /* This is a hacked version of Python's
 446  * fileobject.c:readahead_get_line_skip(). */
 447 static PyStringObject *
 448 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 449 {
 450         PyStringObject* s;
 451         char *bufptr;
 452         char *buf;
 453         int len;
 454
 455         if (f->f_buf == NULL)
 456                 if (Util_ReadAhead(f, bufsize) < 0)
 457                         return NULL;
 458
 459         len = f->f_bufend - f->f_bufptr;
 460         if (len == 0)
 461                 return (PyStringObject *)
 462                         PyString_FromStringAndSize(NULL, skip);
 463         bufptr = memchr(f->f_bufptr, '\n', len);
 464         if (bufptr != NULL) {
 465                 bufptr++;                       /* Count the '\n' */
 466                 len = bufptr - f->f_bufptr;
 467                 s = (PyStringObject *)
 468                         PyString_FromStringAndSize(NULL, skip+len);
 469                 if (s == NULL)
 470                         return NULL;
 471                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 472                 f->f_bufptr = bufptr;
 473                 if (bufptr == f->f_bufend)
 474                         Util_DropReadAhead(f);
 475         } else {
 476                 bufptr = f->f_bufptr;
 477                 buf = f->f_buf;
 478                 f->f_buf = NULL;        /* Force new readahead buffer */
 479                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 480                                               bufsize + (bufsize>>2));
 481                 if (s == NULL) {
 482                         PyMem_Free(buf);
 483                         return NULL;
 484                 }
 485                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 486                 PyMem_Free(buf);
 487         }
 488         return s;
 489 }
 490
 491 /* ===================================================================== */
 492 /* Methods of BZ2File. */
 493
 494 PyDoc_STRVAR(BZ2File_read__doc__,
 495 "read([size]) -> string\n\
 496 \n\
 497 Read at most size uncompressed bytes, returned as a string. If the size\n\
 498 argument is negative or omitted, read until EOF is reached.\n\
 499 ");
 500
 501 /* This is a hacked version of Python's fileobject.c:file_read(). */
 502 static PyObject *
 503 BZ2File_read(BZ2FileObject *self, PyObject *args)
 504 {
 505         long bytesrequested = -1;
 506         size_t bytesread, buffersize, chunksize;
 507         int bzerror;
 508         PyObject *ret = NULL;
 509
 510         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 511                 return NULL;
 512
 513         ACQUIRE_LOCK(self);
 514         switch (self->mode) {
 515                 case MODE_READ:
 516                         break;
 517                 case MODE_READ_EOF:
 518                         ret = PyString_FromString("");
 519                         goto cleanup;
 520                 case MODE_CLOSED:
 521                         PyErr_SetString(PyExc_ValueError,
 522                                         "I/O operation on closed file");
 523                         goto cleanup;
 524                 default:
 525                         PyErr_SetString(PyExc_IOError,
 526                                         "file is not ready for reading");
 527                         goto cleanup;
 528         }
 529
 530         if (bytesrequested < 0)
 531                 buffersize = Util_NewBufferSize((size_t)0);
 532         else
 533                 buffersize = bytesrequested;
 534         if (buffersize > INT_MAX) {
 535                 PyErr_SetString(PyExc_OverflowError,
 536                                 "requested number of bytes is "
 537                                 "more than a Python string can hold");
 538                 goto cleanup;
 539         }
 540         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 541         if (ret == NULL)
 542                 goto cleanup;
 543         bytesread = 0;
 544
 545         for (;;) {
 546                 Py_BEGIN_ALLOW_THREADS
 547                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 548                                                  BUF(ret)+bytesread,
 549                                                  buffersize-bytesread,
 550                                                  self);
 551                 self->pos += chunksize;
 552                 Py_END_ALLOW_THREADS
 553                 bytesread += chunksize;
 554                 if (bzerror == BZ_STREAM_END) {
 555                         self->size = self->pos;
 556                         self->mode = MODE_READ_EOF;
 557                         break;
 558                 } else if (bzerror != BZ_OK) {
 559                         Util_CatchBZ2Error(bzerror);
 560                         Py_DECREF(ret);
 561                         ret = NULL;
 562                         goto cleanup;
 563                 }
 564                 if (bytesrequested < 0) {
 565                         buffersize = Util_NewBufferSize(buffersize);
 566                         if (_PyString_Resize(&ret, buffersize) < 0)
 567                                 goto cleanup;
 568                 } else {
 569                         break;
 570                 }
 571         }
 572         if (bytesread != buffersize)
 573                 _PyString_Resize(&ret, bytesread);
 574
 575 cleanup:
 576         RELEASE_LOCK(self);
 577         return ret;
 578 }
 579
 580 PyDoc_STRVAR(BZ2File_readline__doc__,
 581 "readline([size]) -> string\n\
 582 \n\
 583 Return the next line from the file, as a string, retaining newline.\n\
 584 A non-negative size argument will limit the maximum number of bytes to\n\
 585 return (an incomplete line may be returned then). Return an empty\n\
 586 string at EOF.\n\
 587 ");
 588
 589 static PyObject *
 590 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 591 {
 592         PyObject *ret = NULL;
 593         int sizehint = -1;
 594
 595         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 596                 return NULL;
 597
 598         ACQUIRE_LOCK(self);
 599         switch (self->mode) {
 600                 case MODE_READ:
 601                         break;
 602                 case MODE_READ_EOF:
 603                         ret = PyString_FromString("");
 604                         goto cleanup;
 605                 case MODE_CLOSED:
 606                         PyErr_SetString(PyExc_ValueError,
 607                                         "I/O operation on closed file");
 608                         goto cleanup;
 609                 default:
 610                         PyErr_SetString(PyExc_IOError,
 611                                         "file is not ready for reading");
 612                         goto cleanup;
 613         }
 614
 615         if (sizehint == 0)
 616                 ret = PyString_FromString("");
 617         else
 618                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 619
 620 cleanup:
 621         RELEASE_LOCK(self);
 622         return ret;
 623 }
 624
 625 PyDoc_STRVAR(BZ2File_readlines__doc__,
 626 "readlines([size]) -> list\n\
 627 \n\
 628 Call readline() repeatedly and return a list of lines read.\n\
 629 The optional size argument, if given, is an approximate bound on the\n\
 630 total number of bytes in the lines returned.\n\
 631 ");
 632
 633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 634 static PyObject *
 635 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 636 {
 637         long sizehint = 0;
 638         PyObject *list = NULL;
 639         PyObject *line;
 640         char small_buffer[SMALLCHUNK];
 641         char *buffer = small_buffer;
 642         size_t buffersize = SMALLCHUNK;
 643         PyObject *big_buffer = NULL;
 644         size_t nfilled = 0;
 645         size_t nread;
 646         size_t totalread = 0;
 647         char *p, *q, *end;
 648         int err;
 649         int shortread = 0;
 650         int bzerror;
 651
 652         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 653                 return NULL;
 654
 655         ACQUIRE_LOCK(self);
 656         switch (self->mode) {
 657                 case MODE_READ:
 658                         break;
 659                 case MODE_READ_EOF:
 660                         list = PyList_New(0);
 661                         goto cleanup;
 662                 case MODE_CLOSED:
 663                         PyErr_SetString(PyExc_ValueError,
 664                                         "I/O operation on closed file");
 665                         goto cleanup;
 666                 default:
 667                         PyErr_SetString(PyExc_IOError,
 668                                         "file is not ready for reading");
 669                         goto cleanup;
 670         }
 671
 672         if ((list = PyList_New(0)) == NULL)
 673                 goto cleanup;
 674
 675         for (;;) {
 676                 Py_BEGIN_ALLOW_THREADS
 677                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 678                                              buffer+nfilled,
 679                                              buffersize-nfilled, self);
 680                 self->pos += nread;
 681                 Py_END_ALLOW_THREADS
 682                 if (bzerror == BZ_STREAM_END) {
 683                         self->size = self->pos;
 684                         self->mode = MODE_READ_EOF;
 685                         if (nread == 0) {
 686                                 sizehint = 0;
 687                                 break;
 688                         }
 689                         shortread = 1;
 690                 } else if (bzerror != BZ_OK) {
 691                         Util_CatchBZ2Error(bzerror);
 692                   error:
 693                         Py_DECREF(list);
 694                         list = NULL;
 695                         goto cleanup;
 696                 }
 697                 totalread += nread;
 698                 p = memchr(buffer+nfilled, '\n', nread);
 699                 if (p == NULL) {
 700                         /* Need a larger buffer to fit this line */
 701                         nfilled += nread;
 702                         buffersize *= 2;
 703                         if (buffersize > INT_MAX) {
 704                                 PyErr_SetString(PyExc_OverflowError,
 705                             "line is longer than a Python string can hold");
 706                                 goto error;
 707                         }
 708                         if (big_buffer == NULL) {
 709                                 /* Create the big buffer */
 710                                 big_buffer = PyString_FromStringAndSize(
 711                                         NULL, buffersize);
 712                                 if (big_buffer == NULL)
 713                                         goto error;
 714                                 buffer = PyString_AS_STRING(big_buffer);
 715                                 memcpy(buffer, small_buffer, nfilled);
 716                         }
 717                         else {
 718                                 /* Grow the big buffer */
 719                                 _PyString_Resize(&big_buffer, buffersize);
 720                                 buffer = PyString_AS_STRING(big_buffer);
 721                         }
 722                         continue;
 723                 }
 724                 end = buffer+nfilled+nread;
 725                 q = buffer;
 726                 do {
 727                         /* Process complete lines */
 728                         p++;
 729                         line = PyString_FromStringAndSize(q, p-q);
 730                         if (line == NULL)
 731                                 goto error;
 732                         err = PyList_Append(list, line);
 733                         Py_DECREF(line);
 734                         if (err != 0)
 735                                 goto error;
 736                         q = p;
 737                         p = memchr(q, '\n', end-q);
 738                 } while (p != NULL);
 739                 /* Move the remaining incomplete line to the start */
 740                 nfilled = end-q;
 741                 memmove(buffer, q, nfilled);
 742                 if (sizehint > 0)
 743                         if (totalread >= (size_t)sizehint)
 744                                 break;
 745                 if (shortread) {
 746                         sizehint = 0;
 747                         break;
 748                 }
 749         }
 750         if (nfilled != 0) {
 751                 /* Partial last line */
 752                 line = PyString_FromStringAndSize(buffer, nfilled);
 753                 if (line == NULL)
 754                         goto error;
 755                 if (sizehint > 0) {
 756                         /* Need to complete the last line */
 757                         PyObject *rest = Util_GetLine(self, 0);
 758                         if (rest == NULL) {
 759                                 Py_DECREF(line);
 760                                 goto error;
 761                         }
 762                         PyString_Concat(&line, rest);
 763                         Py_DECREF(rest);
 764                         if (line == NULL)
 765                                 goto error;
 766                 }
 767                 err = PyList_Append(list, line);
 768                 Py_DECREF(line);
 769                 if (err != 0)
 770                         goto error;
 771         }
 772
 773   cleanup:
 774         RELEASE_LOCK(self);
 775         if (big_buffer) {
 776                 Py_DECREF(big_buffer);
 777         }
 778         return list;
 779 }
 780
 781 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 782 "xreadlines() -> self\n\
 783 \n\
 784 For backward compatibility. BZ2File objects now include the performance\n\
 785 optimizations previously implemented in the xreadlines module.\n\
 786 ");
 787
 788 PyDoc_STRVAR(BZ2File_write__doc__,
 789 "write(data) -> None\n\
 790 \n\
 791 Write the 'data' string to file. Note that due to buffering, close() may\n\
 792 be needed before the file on disk reflects the data written.\n\
 793 ");
 794
 795 /* This is a hacked version of Python's fileobject.c:file_write(). */
 796 static PyObject *
 797 BZ2File_write(BZ2FileObject *self, PyObject *args)
 798 {
 799         PyObject *ret = NULL;
 800         char *buf;
 801         int len;
 802         int bzerror;
 803
 804         if (!PyArg_ParseTuple(args, "s#", &buf, &len))
 805                 return NULL;
 806
 807         ACQUIRE_LOCK(self);
 808         switch (self->mode) {
 809                 case MODE_WRITE:
 810                         break;
 811
 812                 case MODE_CLOSED:
 813                         PyErr_SetString(PyExc_ValueError,
 814                                         "I/O operation on closed file");
 815                         goto cleanup;;
 816
 817                 default:
 818                         PyErr_SetString(PyExc_IOError,
 819                                         "file is not ready for writing");
 820                         goto cleanup;;
 821         }
 822
 823         self->f_softspace = 0;
 824
 825         Py_BEGIN_ALLOW_THREADS
 826         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 827         self->pos += len;
 828         Py_END_ALLOW_THREADS
 829
 830         if (bzerror != BZ_OK) {
 831                 Util_CatchBZ2Error(bzerror);
 832                 goto cleanup;
 833         }
 834
 835         Py_INCREF(Py_None);
 836         ret = Py_None;
 837
 838 cleanup:
 839         RELEASE_LOCK(self);
 840         return ret;
 841 }
 842
 843 PyDoc_STRVAR(BZ2File_writelines__doc__,
 844 "writelines(sequence_of_strings) -> None\n\
 845 \n\
 846 Write the sequence of strings to the file. Note that newlines are not\n\
 847 added. The sequence can be any iterable object producing strings. This is\n\
 848 equivalent to calling write() for each string.\n\
 849 ");
 850
 851 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 852 static PyObject *
 853 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 854 {
 855 #define CHUNKSIZE 1000
 856         PyObject *list = NULL;
 857         PyObject *iter = NULL;
 858         PyObject *ret = NULL;
 859         PyObject *line;
 860         int i, j, index, len, islist;
 861         int bzerror;
 862
 863         ACQUIRE_LOCK(self);
 864         islist = PyList_Check(seq);
 865         if  (!islist) {
 866                 iter = PyObject_GetIter(seq);
 867                 if (iter == NULL) {
 868                         PyErr_SetString(PyExc_TypeError,
 869                                 "writelines() requires an iterable argument");
 870                         goto error;
 871                 }
 872                 list = PyList_New(CHUNKSIZE);
 873                 if (list == NULL)
 874                         goto error;
 875         }
 876
 877         /* Strategy: slurp CHUNKSIZE lines into a private list,
 878            checking that they are all strings, then write that list
 879            without holding the interpreter lock, then come back for more. */
 880         for (index = 0; ; index += CHUNKSIZE) {
 881                 if (islist) {
 882                         Py_XDECREF(list);
 883                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 884                         if (list == NULL)
 885                                 goto error;
 886                         j = PyList_GET_SIZE(list);
 887                 }
 888                 else {
 889                         for (j = 0; j < CHUNKSIZE; j++) {
 890                                 line = PyIter_Next(iter);
 891                                 if (line == NULL) {
 892                                         if (PyErr_Occurred())
 893                                                 goto error;
 894                                         break;
 895                                 }
 896                                 PyList_SetItem(list, j, line);
 897                         }
 898                 }
 899                 if (j == 0)
 900                         break;
 901
 902                 /* Check that all entries are indeed strings. If not,
 903                    apply the same rules as for file.write() and
 904                    convert the rets to strings. This is slow, but
 905                    seems to be the only way since all conversion APIs
 906                    could potentially execute Python code. */
 907                 for (i = 0; i < j; i++) {
 908                         PyObject *v = PyList_GET_ITEM(list, i);
 909                         if (!PyString_Check(v)) {
 910                                 const char *buffer;
 911                                 int len;
 912                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 913                                         PyErr_SetString(PyExc_TypeError,
 914                                                         "writelines() "
 915                                                         "argument must be "
 916                                                         "a sequence of "
 917                                                         "strings");
 918                                         goto error;
 919                                 }
 920                                 line = PyString_FromStringAndSize(buffer,
 921                                                                   len);
 922                                 if (line == NULL)
 923                                         goto error;
 924                                 Py_DECREF(v);
 925                                 PyList_SET_ITEM(list, i, line);
 926                         }
 927                 }
 928
 929                 self->f_softspace = 0;
 930
 931                 /* Since we are releasing the global lock, the
 932                    following code may *not* execute Python code. */
 933                 Py_BEGIN_ALLOW_THREADS
 934                 for (i = 0; i < j; i++) {
 935                         line = PyList_GET_ITEM(list, i);
 936                         len = PyString_GET_SIZE(line);
 937                         BZ2_bzWrite (&bzerror, self->fp,
 938                                      PyString_AS_STRING(line), len);
 939                         if (bzerror != BZ_OK) {
 940                                 Py_BLOCK_THREADS
 941                                 Util_CatchBZ2Error(bzerror);
 942                                 goto error;
 943                         }
 944                 }
 945                 Py_END_ALLOW_THREADS
 946
 947                 if (j < CHUNKSIZE)
 948                         break;
 949         }
 950
 951         Py_INCREF(Py_None);
 952         ret = Py_None;
 953
 954   error:
 955         RELEASE_LOCK(self);
 956         Py_XDECREF(list);
 957         Py_XDECREF(iter);
 958         return ret;
 959 #undef CHUNKSIZE
 960 }
 961
 962 PyDoc_STRVAR(BZ2File_seek__doc__,
 963 "seek(offset [, whence]) -> None\n\
 964 \n\
 965 Move to new file position. Argument offset is a byte count. Optional\n\
 966 argument whence defaults to 0 (offset from start of file, offset\n\
 967 should be >= 0); other values are 1 (move relative to current position,\n\
 968 positive or negative), and 2 (move relative to end of file, usually\n\
 969 negative, although many platforms allow seeking beyond the end of a file).\n\
 970 \n\
 971 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 972 the operation may be extremely slow.\n\
 973 ");
 974
 975 static PyObject *
 976 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 977 {
 978         int where = 0;
 979         long offset;
 980         char small_buffer[SMALLCHUNK];
 981         char *buffer = small_buffer;
 982         size_t buffersize = SMALLCHUNK;
 983         int bytesread = 0;
 984         int readsize;
 985         int chunksize;
 986         int bzerror;
 987         int rewind = 0;
 988         PyObject *ret = NULL;
 989
 990         if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
 991                 return NULL;
 992
 993         ACQUIRE_LOCK(self);
 994         Util_DropReadAhead(self);
 995         switch (self->mode) {
 996                 case MODE_READ:
 997                 case MODE_READ_EOF:
 998                         break;
 999
1000                 case MODE_CLOSED:
1001                         PyErr_SetString(PyExc_ValueError,
1002                                         "I/O operation on closed file");
1003                         goto cleanup;;
1004
1005                 default:
1006                         PyErr_SetString(PyExc_IOError,
1007                                         "seek works only while reading");
1008                         goto cleanup;;
1009         }
1010
1011         if (offset < 0) {
1012                 if (where == 1) {
1013                         offset = self->pos + offset;
1014                         rewind = 1;
1015                 } else if (where == 2) {
1016                         if (self->size == -1) {
1017                                 assert(self->mode != MODE_READ_EOF);
1018                                 for (;;) {
1019                                         Py_BEGIN_ALLOW_THREADS
1020                                         chunksize = Util_UnivNewlineRead(
1021                                                         &bzerror, self->fp,
1022                                                         buffer, buffersize,
1023                                                         self);
1024                                         self->pos += chunksize;
1025                                         Py_END_ALLOW_THREADS
1026
1027                                         bytesread += chunksize;
1028                                         if (bzerror == BZ_STREAM_END) {
1029                                                 break;
1030                                         } else if (bzerror != BZ_OK) {
1031                                                 Util_CatchBZ2Error(bzerror);
1032                                                 goto cleanup;
1033                                         }
1034                                 }
1035                                 self->mode = MODE_READ_EOF;
1036                                 self->size = self->pos;
1037                                 bytesread = 0;
1038                         }
1039                         offset = self->size + offset;
1040                         if (offset >= self->pos)
1041                                 offset -= self->pos;
1042                         else
1043                                 rewind = 1;
1044                 }
1045                 if (offset < 0)
1046                         offset = 0;
1047         } else if (where == 0) {
1048                 if (offset >= self->pos)
1049                         offset -= self->pos;
1050                 else
1051                         rewind = 1;
1052         }
1053
1054         if (rewind) {
1055                 BZ2_bzReadClose(&bzerror, self->fp);
1056                 if (bzerror != BZ_OK) {
1057                         Util_CatchBZ2Error(bzerror);
1058                         goto cleanup;
1059                 }
1060                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1061                 if (!ret)
1062                         goto cleanup;
1063                 Py_DECREF(ret);
1064                 ret = NULL;
1065                 self->pos = 0;
1066                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1067                                           0, 0, NULL, 0);
1068                 if (bzerror != BZ_OK) {
1069                         Util_CatchBZ2Error(bzerror);
1070                         goto cleanup;
1071                 }
1072                 self->mode = MODE_READ;
1073         } else if (self->mode == MODE_READ_EOF) {
1074                 goto exit;
1075         }
1076
1077         if (offset == 0)
1078                 goto exit;
1079
1080         /* Before getting here, offset must be set to the number of bytes
1081          * to walk forward. */
1082         for (;;) {
1083                 if ((size_t)offset-bytesread > buffersize)
1084                         readsize = buffersize;
1085                 else
1086                         readsize = offset-bytesread;
1087                 Py_BEGIN_ALLOW_THREADS
1088                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1089                                                  buffer, readsize, self);
1090                 self->pos += chunksize;
1091                 Py_END_ALLOW_THREADS
1092                 bytesread += chunksize;
1093                 if (bzerror == BZ_STREAM_END) {
1094                         self->size = self->pos;
1095                         self->mode = MODE_READ_EOF;
1096                         break;
1097                 } else if (bzerror != BZ_OK) {
1098                         Util_CatchBZ2Error(bzerror);
1099                         goto cleanup;
1100                 }
1101                 if (bytesread == offset)
1102                         break;
1103         }
1104
1105 exit:
1106         Py_INCREF(Py_None);
1107         ret = Py_None;
1108
1109 cleanup:
1110         RELEASE_LOCK(self);
1111         return ret;
1112 }
1113
1114 PyDoc_STRVAR(BZ2File_tell__doc__,
1115 "tell() -> int\n\
1116 \n\
1117 Return the current file position, an integer (may be a long integer).\n\
1118 ");
1119
1120 static PyObject *
1121 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1122 {
1123         PyObject *ret = NULL;
1124
1125         if (self->mode == MODE_CLOSED) {
1126                 PyErr_SetString(PyExc_ValueError,
1127                                 "I/O operation on closed file");
1128                 goto cleanup;
1129         }
1130
1131         ret = PyInt_FromLong(self->pos);
1132
1133 cleanup:
1134         return ret;
1135 }
1136
1137 PyDoc_STRVAR(BZ2File_close__doc__,
1138 "close() -> None or (perhaps) an integer\n\
1139 \n\
1140 Close the file. Sets data attribute .closed to true. A closed file\n\
1141 cannot be used for further I/O operations. close() may be called more\n\
1142 than once without error.\n\
1143 ");
1144
1145 static PyObject *
1146 BZ2File_close(BZ2FileObject *self)
1147 {
1148         PyObject *ret = NULL;
1149         int bzerror = BZ_OK;
1150
1151         ACQUIRE_LOCK(self);
1152         switch (self->mode) {
1153                 case MODE_READ:
1154                 case MODE_READ_EOF:
1155                         BZ2_bzReadClose(&bzerror, self->fp);
1156                         break;
1157                 case MODE_WRITE:
1158                         BZ2_bzWriteClose(&bzerror, self->fp,
1159                                          0, NULL, NULL);
1160                         break;
1161         }
1162         self->mode = MODE_CLOSED;
1163         ret = PyObject_CallMethod(self->file, "close", NULL);
1164         if (bzerror != BZ_OK) {
1165                 Util_CatchBZ2Error(bzerror);
1166                 Py_XDECREF(ret);
1167                 ret = NULL;
1168         }
1169
1170         RELEASE_LOCK(self);
1171         return ret;
1172 }
1173
1174 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1175
1176 static PyMethodDef BZ2File_methods[] = {
1177         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1178         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1179         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1180         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1181         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1182         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1183         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1184         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1185         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1186         {NULL,          NULL}           /* sentinel */
1187 };
1188
1189
1190 /* ===================================================================== */
1191 /* Getters and setters of BZ2File. */
1192
1193 #ifdef WITH_UNIVERSAL_NEWLINES
1194 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1195 static PyObject *
1196 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1197 {
1198         switch (self->f_newlinetypes) {
1199         case NEWLINE_UNKNOWN:
1200                 Py_INCREF(Py_None);
1201                 return Py_None;
1202         case NEWLINE_CR:
1203                 return PyString_FromString("\r");
1204         case NEWLINE_LF:
1205                 return PyString_FromString("\n");
1206         case NEWLINE_CR|NEWLINE_LF:
1207                 return Py_BuildValue("(ss)", "\r", "\n");
1208         case NEWLINE_CRLF:
1209                 return PyString_FromString("\r\n");
1210         case NEWLINE_CR|NEWLINE_CRLF:
1211                 return Py_BuildValue("(ss)", "\r", "\r\n");
1212         case NEWLINE_LF|NEWLINE_CRLF:
1213                 return Py_BuildValue("(ss)", "\n", "\r\n");
1214         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1215                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1216         default:
1217                 PyErr_Format(PyExc_SystemError,
1218                              "Unknown newlines value 0x%x\n",
1219                              self->f_newlinetypes);
1220                 return NULL;
1221         }
1222 }
1223 #endif
1224
1225 static PyObject *
1226 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1227 {
1228         return PyInt_FromLong(self->mode == MODE_CLOSED);
1229 }
1230
1231 static PyObject *
1232 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1233 {
1234         return PyObject_GetAttrString(self->file, "mode");
1235 }
1236
1237 static PyObject *
1238 BZ2File_get_name(BZ2FileObject *self, void *closure)
1239 {
1240         return PyObject_GetAttrString(self->file, "name");
1241 }
1242
1243 static PyGetSetDef BZ2File_getset[] = {
1244         {"closed", (getter)BZ2File_get_closed, NULL,
1245                         "True if the file is closed"},
1246 #ifdef WITH_UNIVERSAL_NEWLINES
1247         {"newlines", (getter)BZ2File_get_newlines, NULL,
1248                         "end-of-line convention used in this file"},
1249 #endif
1250         {"mode", (getter)BZ2File_get_mode, NULL,
1251                         "file mode ('r', 'w', or 'U')"},
1252         {"name", (getter)BZ2File_get_name, NULL,
1253                         "file name"},
1254         {NULL}  /* Sentinel */
1255 };
1256
1257
1258 /* ===================================================================== */
1259 /* Members of BZ2File_Type. */
1260
1261 #undef OFF
1262 #define OFF(x) offsetof(BZ2FileObject, x)
1263
1264 static PyMemberDef BZ2File_members[] = {
1265         {"softspace",   T_INT,          OFF(f_softspace), 0,
1266          "flag indicating that a space needs to be printed; used by print"},
1267         {NULL}  /* Sentinel */
1268 };
1269
1270 /* ===================================================================== */
1271 /* Slot definitions for BZ2File_Type. */
1272
1273 static int
1274 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1275 {
1276         static char *kwlist[] = {"filename", "mode", "buffering",
1277                                  "compresslevel", 0};
1278         PyObject *name;
1279         char *mode = "r";
1280         int buffering = -1;
1281         int compresslevel = 9;
1282         int bzerror;
1283         int mode_char = 0;
1284
1285         self->size = -1;
1286
1287         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1288                                          kwlist, &name, &mode, &buffering,
1289                                          &compresslevel))
1290                 return -1;
1291
1292         if (compresslevel < 1 || compresslevel > 9) {
1293                 PyErr_SetString(PyExc_ValueError,
1294                                 "compresslevel must be between 1 and 9");
1295                 return -1;
1296         }
1297
1298         for (;;) {
1299                 int error = 0;
1300                 switch (*mode) {
1301                         case 'r':
1302                         case 'w':
1303                                 if (mode_char)
1304                                         error = 1;
1305                                 mode_char = *mode;
1306                                 break;
1307
1308                         case 'b':
1309                                 break;
1310
1311                         case 'U':
1312 #ifdef WITH_UNIVERSAL_NEWLINES
1313                                 self->f_univ_newline = 1;
1314 #endif
1315                                 break;
1316
1317                         default:
1318                                 error = 1;
1319                                 break;
1320                 }
1321                 if (error) {
1322                         PyErr_Format(PyExc_ValueError,
1323                                      "invalid mode char %c", *mode);
1324                         return -1;
1325                 }
1326                 mode++;
1327                 if (*mode == '\0')
1328                         break;
1329         }
1330
1331         mode = (mode_char == 'r') ? "rb" : "wb";
1332
1333         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1334                                            name, mode, buffering);
1335         if (self->file == NULL)
1336                 return -1;
1337
1338         /* From now on, we have stuff to dealloc, so jump to error label
1339          * instead of returning */
1340
1341 #ifdef WITH_THREAD
1342         self->lock = PyThread_allocate_lock();
1343         if (!self->lock)
1344                 goto error;
1345 #endif
1346
1347         if (mode_char == 'r')
1348                 self->fp = BZ2_bzReadOpen(&bzerror,
1349                                           PyFile_AsFile(self->file),
1350                                           0, 0, NULL, 0);
1351         else
1352                 self->fp = BZ2_bzWriteOpen(&bzerror,
1353                                            PyFile_AsFile(self->file),
1354                                            compresslevel, 0, 0);
1355
1356         if (bzerror != BZ_OK) {
1357                 Util_CatchBZ2Error(bzerror);
1358                 goto error;
1359         }
1360
1361         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1362
1363         return 0;
1364
1365 error:
1366         Py_DECREF(self->file);
1367 #ifdef WITH_THREAD
1368         if (self->lock)
1369                 PyThread_free_lock(self->lock);
1370 #endif
1371         return -1;
1372 }
1373
1374 static void
1375 BZ2File_dealloc(BZ2FileObject *self)
1376 {
1377         int bzerror;
1378 #ifdef WITH_THREAD
1379         if (self->lock)
1380                 PyThread_free_lock(self->lock);
1381 #endif
1382         switch (self->mode) {
1383                 case MODE_READ:
1384                 case MODE_READ_EOF:
1385                         BZ2_bzReadClose(&bzerror, self->fp);
1386                         break;
1387                 case MODE_WRITE:
1388                         BZ2_bzWriteClose(&bzerror, self->fp,
1389                                          0, NULL, NULL);
1390                         break;
1391         }
1392         Util_DropReadAhead(self);
1393         Py_XDECREF(self->file);
1394         self->ob_type->tp_free((PyObject *)self);
1395 }
1396
1397 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1398 static PyObject *
1399 BZ2File_getiter(BZ2FileObject *self)
1400 {
1401         if (self->mode == MODE_CLOSED) {
1402                 PyErr_SetString(PyExc_ValueError,
1403                                 "I/O operation on closed file");
1404                 return NULL;
1405         }
1406         Py_INCREF((PyObject*)self);
1407         return (PyObject *)self;
1408 }
1409
1410 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1411 #define READAHEAD_BUFSIZE 8192
1412 static PyObject *
1413 BZ2File_iternext(BZ2FileObject *self)
1414 {
1415         PyStringObject* ret;
1416         ACQUIRE_LOCK(self);
1417         if (self->mode == MODE_CLOSED) {
1418                 PyErr_SetString(PyExc_ValueError,
1419                                 "I/O operation on closed file");
1420                 return NULL;
1421         }
1422         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1423         RELEASE_LOCK(self);
1424         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1425                 Py_XDECREF(ret);
1426                 return NULL;
1427         }
1428         return (PyObject *)ret;
1429 }
1430
1431 /* ===================================================================== */
1432 /* BZ2File_Type definition. */
1433
1434 PyDoc_VAR(BZ2File__doc__) =
1435 PyDoc_STR(
1436 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1437 \n\
1438 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1439 writing. When opened for writing, the file will be created if it doesn't\n\
1440 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1441 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1442 is given, must be a number between 1 and 9.\n\
1443 ")
1444 #ifdef WITH_UNIVERSAL_NEWLINES
1445 PyDoc_STR(
1446 "\n\
1447 Add a 'U' to mode to open the file for input with universal newline\n\
1448 support. Any line ending in the input file will be seen as a '\\n' in\n\
1449 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1450 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1451 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1452 newlines are available only when reading.\n\
1453 ")
1454 #endif
1455 ;
1456
1457 static PyTypeObject BZ2File_Type = {
1458         PyObject_HEAD_INIT(NULL)
1459         0,                      /*ob_size*/
1460         "bz2.BZ2File",          /*tp_name*/
1461         sizeof(BZ2FileObject),  /*tp_basicsize*/
1462         0,                      /*tp_itemsize*/
1463         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1464         0,                      /*tp_print*/
1465         0,                      /*tp_getattr*/
1466         0,                      /*tp_setattr*/
1467         0,                      /*tp_compare*/
1468         0,                      /*tp_repr*/
1469         0,                      /*tp_as_number*/
1470         0,                      /*tp_as_sequence*/
1471         0,                      /*tp_as_mapping*/
1472         0,                      /*tp_hash*/
1473         0,                      /*tp_call*/
1474         0,                      /*tp_str*/
1475         PyObject_GenericGetAttr,/*tp_getattro*/
1476         PyObject_GenericSetAttr,/*tp_setattro*/
1477         0,                      /*tp_as_buffer*/
1478         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1479         BZ2File__doc__,         /*tp_doc*/
1480         0,                      /*tp_traverse*/
1481         0,                      /*tp_clear*/
1482         0,                      /*tp_richcompare*/
1483         0,                      /*tp_weaklistoffset*/
1484         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1485         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1486         BZ2File_methods,        /*tp_methods*/
1487         BZ2File_members,        /*tp_members*/
1488         BZ2File_getset,         /*tp_getset*/
1489         0,                      /*tp_base*/
1490         0,                      /*tp_dict*/
1491         0,                      /*tp_descr_get*/
1492         0,                      /*tp_descr_set*/
1493         0,                      /*tp_dictoffset*/
1494         (initproc)BZ2File_init, /*tp_init*/
1495         PyType_GenericAlloc,    /*tp_alloc*/
1496         PyType_GenericNew,      /*tp_new*/
1497         _PyObject_Del,          /*tp_free*/
1498         0,                      /*tp_is_gc*/
1499 };
1500
1501
1502 /* ===================================================================== */
1503 /* Methods of BZ2Comp. */
1504
1505 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1506 "compress(data) -> string\n\
1507 \n\
1508 Provide more data to the compressor object. It will return chunks of\n\
1509 compressed data whenever possible. When you've finished providing data\n\
1510 to compress, call the flush() method to finish the compression process,\n\
1511 and return what is left in the internal buffers.\n\
1512 ");
1513
1514 static PyObject *
1515 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1516 {
1517         char *data;
1518         int datasize;
1519         int bufsize = SMALLCHUNK;
1520         PY_LONG_LONG totalout;
1521         PyObject *ret = NULL;
1522         bz_stream *bzs = &self->bzs;
1523         int bzerror;
1524
1525         if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1526                 return NULL;
1527
1528         ACQUIRE_LOCK(self);
1529         if (!self->running) {
1530                 PyErr_SetString(PyExc_ValueError,
1531                                 "this object was already flushed");
1532                 goto error;
1533         }
1534
1535         ret = PyString_FromStringAndSize(NULL, bufsize);
1536         if (!ret)
1537                 goto error;
1538
1539         bzs->next_in = data;
1540         bzs->avail_in = datasize;
1541         bzs->next_out = BUF(ret);
1542         bzs->avail_out = bufsize;
1543
1544         totalout = BZS_TOTAL_OUT(bzs);
1545
1546         for (;;) {
1547                 Py_BEGIN_ALLOW_THREADS
1548                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1549                 Py_END_ALLOW_THREADS
1550                 if (bzerror != BZ_RUN_OK) {
1551                         Util_CatchBZ2Error(bzerror);
1552                         goto error;
1553                 }
1554                 if (bzs->avail_out == 0) {
1555                         bufsize = Util_NewBufferSize(bufsize);
1556                         if (_PyString_Resize(&ret, bufsize) < 0) {
1557                                 BZ2_bzCompressEnd(bzs);
1558                                 goto error;
1559                         }
1560                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1561                                                     - totalout);
1562                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1563                 } else if (bzs->avail_in == 0) {
1564                         break;
1565                 }
1566         }
1567
1568         _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1569
1570         RELEASE_LOCK(self);
1571         return ret;
1572
1573 error:
1574         RELEASE_LOCK(self);
1575         Py_XDECREF(ret);
1576         return NULL;
1577 }
1578
1579 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1580 "flush() -> string\n\
1581 \n\
1582 Finish the compression process and return what is left in internal buffers.\n\
1583 You must not use the compressor object after calling this method.\n\
1584 ");
1585
1586 static PyObject *
1587 BZ2Comp_flush(BZ2CompObject *self)
1588 {
1589         int bufsize = SMALLCHUNK;
1590         PyObject *ret = NULL;
1591         bz_stream *bzs = &self->bzs;
1592         PY_LONG_LONG totalout;
1593         int bzerror;
1594
1595         ACQUIRE_LOCK(self);
1596         if (!self->running) {
1597                 PyErr_SetString(PyExc_ValueError, "object was already "
1598                                                   "flushed");
1599                 goto error;
1600         }
1601         self->running = 0;
1602
1603         ret = PyString_FromStringAndSize(NULL, bufsize);
1604         if (!ret)
1605                 goto error;
1606
1607         bzs->next_out = BUF(ret);
1608         bzs->avail_out = bufsize;
1609
1610         totalout = BZS_TOTAL_OUT(bzs);
1611
1612         for (;;) {
1613                 Py_BEGIN_ALLOW_THREADS
1614                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1615                 Py_END_ALLOW_THREADS
1616                 if (bzerror == BZ_STREAM_END) {
1617                         break;
1618                 } else if (bzerror != BZ_FINISH_OK) {
1619                         Util_CatchBZ2Error(bzerror);
1620                         goto error;
1621                 }
1622                 if (bzs->avail_out == 0) {
1623                         bufsize = Util_NewBufferSize(bufsize);
1624                         if (_PyString_Resize(&ret, bufsize) < 0)
1625                                 goto error;
1626                         bzs->next_out = BUF(ret);
1627                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1628                                                     - totalout);
1629                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1630                 }
1631         }
1632
1633         if (bzs->avail_out != 0)
1634                 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1635
1636         RELEASE_LOCK(self);
1637         return ret;
1638
1639 error:
1640         RELEASE_LOCK(self);
1641         Py_XDECREF(ret);
1642         return NULL;
1643 }
1644
1645 static PyMethodDef BZ2Comp_methods[] = {
1646         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1647          BZ2Comp_compress__doc__},
1648         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1649          BZ2Comp_flush__doc__},
1650         {NULL,          NULL}           /* sentinel */
1651 };
1652
1653
1654 /* ===================================================================== */
1655 /* Slot definitions for BZ2Comp_Type. */
1656
1657 static int
1658 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1659 {
1660         int compresslevel = 9;
1661         int bzerror;
1662         static char *kwlist[] = {"compresslevel", 0};
1663
1664         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1665                                          kwlist, &compresslevel))
1666                 return -1;
1667
1668         if (compresslevel < 1 || compresslevel > 9) {
1669                 PyErr_SetString(PyExc_ValueError,
1670                                 "compresslevel must be between 1 and 9");
1671                 goto error;
1672         }
1673
1674 #ifdef WITH_THREAD
1675         self->lock = PyThread_allocate_lock();
1676         if (!self->lock)
1677                 goto error;
1678 #endif
1679
1680         memset(&self->bzs, 0, sizeof(bz_stream));
1681         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1682         if (bzerror != BZ_OK) {
1683                 Util_CatchBZ2Error(bzerror);
1684                 goto error;
1685         }
1686
1687         self->running = 1;
1688
1689         return 0;
1690 error:
1691 #ifdef WITH_THREAD
1692         if (self->lock)
1693                 PyThread_free_lock(self->lock);
1694 #endif
1695         return -1;
1696 }
1697
1698 static void
1699 BZ2Comp_dealloc(BZ2CompObject *self)
1700 {
1701 #ifdef WITH_THREAD
1702         if (self->lock)
1703                 PyThread_free_lock(self->lock);
1704 #endif
1705         BZ2_bzCompressEnd(&self->bzs);
1706         self->ob_type->tp_free((PyObject *)self);
1707 }
1708
1709
1710 /* ===================================================================== */
1711 /* BZ2Comp_Type definition. */
1712
1713 PyDoc_STRVAR(BZ2Comp__doc__,
1714 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1715 \n\
1716 Create a new compressor object. This object may be used to compress\n\
1717 data sequentially. If you want to compress data in one shot, use the\n\
1718 compress() function instead. The compresslevel parameter, if given,\n\
1719 must be a number between 1 and 9.\n\
1720 ");
1721
1722 static PyTypeObject BZ2Comp_Type = {
1723         PyObject_HEAD_INIT(NULL)
1724         0,                      /*ob_size*/
1725         "bz2.BZ2Compressor",    /*tp_name*/
1726         sizeof(BZ2CompObject),  /*tp_basicsize*/
1727         0,                      /*tp_itemsize*/
1728         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1729         0,                      /*tp_print*/
1730         0,                      /*tp_getattr*/
1731         0,                      /*tp_setattr*/
1732         0,                      /*tp_compare*/
1733         0,                      /*tp_repr*/
1734         0,                      /*tp_as_number*/
1735         0,                      /*tp_as_sequence*/
1736         0,                      /*tp_as_mapping*/
1737         0,                      /*tp_hash*/
1738         0,                      /*tp_call*/
1739         0,                      /*tp_str*/
1740         PyObject_GenericGetAttr,/*tp_getattro*/
1741         PyObject_GenericSetAttr,/*tp_setattro*/
1742         0,                      /*tp_as_buffer*/
1743         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1744         BZ2Comp__doc__,         /*tp_doc*/
1745         0,                      /*tp_traverse*/
1746         0,                      /*tp_clear*/
1747         0,                      /*tp_richcompare*/
1748         0,                      /*tp_weaklistoffset*/
1749         0,                      /*tp_iter*/
1750         0,                      /*tp_iternext*/
1751         BZ2Comp_methods,        /*tp_methods*/
1752         0,                      /*tp_members*/
1753         0,                      /*tp_getset*/
1754         0,                      /*tp_base*/
1755         0,                      /*tp_dict*/
1756         0,                      /*tp_descr_get*/
1757         0,                      /*tp_descr_set*/
1758         0,                      /*tp_dictoffset*/
1759         (initproc)BZ2Comp_init, /*tp_init*/
1760         PyType_GenericAlloc,    /*tp_alloc*/
1761         PyType_GenericNew,      /*tp_new*/
1762         _PyObject_Del,          /*tp_free*/
1763         0,                      /*tp_is_gc*/
1764 };
1765
1766
1767 /* ===================================================================== */
1768 /* Members of BZ2Decomp. */
1769
1770 #undef OFF
1771 #define OFF(x) offsetof(BZ2DecompObject, x)
1772
1773 static PyMemberDef BZ2Decomp_members[] = {
1774         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1775         {NULL}  /* Sentinel */
1776 };
1777
1778
1779 /* ===================================================================== */
1780 /* Methods of BZ2Decomp. */
1781
1782 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1783 "decompress(data) -> string\n\
1784 \n\
1785 Provide more data to the decompressor object. It will return chunks\n\
1786 of decompressed data whenever possible. If you try to decompress data\n\
1787 after the end of stream is found, EOFError will be raised. If any data\n\
1788 was found after the end of stream, it'll be ignored and saved in\n\
1789 unused_data attribute.\n\
1790 ");
1791
1792 static PyObject *
1793 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1794 {
1795         char *data;
1796         int datasize;
1797         int bufsize = SMALLCHUNK;
1798         PY_LONG_LONG totalout;
1799         PyObject *ret = NULL;
1800         bz_stream *bzs = &self->bzs;
1801         int bzerror;
1802
1803         if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1804                 return NULL;
1805
1806         ACQUIRE_LOCK(self);
1807         if (!self->running) {
1808                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1809                                                 "already found");
1810                 goto error;
1811         }
1812
1813         ret = PyString_FromStringAndSize(NULL, bufsize);
1814         if (!ret)
1815                 goto error;
1816
1817         bzs->next_in = data;
1818         bzs->avail_in = datasize;
1819         bzs->next_out = BUF(ret);
1820         bzs->avail_out = bufsize;
1821
1822         totalout = BZS_TOTAL_OUT(bzs);
1823
1824         for (;;) {
1825                 Py_BEGIN_ALLOW_THREADS
1826                 bzerror = BZ2_bzDecompress(bzs);
1827                 Py_END_ALLOW_THREADS
1828                 if (bzerror == BZ_STREAM_END) {
1829                         if (bzs->avail_in != 0) {
1830                                 Py_DECREF(self->unused_data);
1831                                 self->unused_data =
1832                                     PyString_FromStringAndSize(bzs->next_in,
1833                                                                bzs->avail_in);
1834                         }
1835                         self->running = 0;
1836                         break;
1837                 }
1838                 if (bzerror != BZ_OK) {
1839                         Util_CatchBZ2Error(bzerror);
1840                         goto error;
1841                 }
1842                 if (bzs->avail_out == 0) {
1843                         bufsize = Util_NewBufferSize(bufsize);
1844                         if (_PyString_Resize(&ret, bufsize) < 0) {
1845                                 BZ2_bzDecompressEnd(bzs);
1846                                 goto error;
1847                         }
1848                         bzs->next_out = BUF(ret);
1849                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1850                                                     - totalout);
1851                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1852                 } else if (bzs->avail_in == 0) {
1853                         break;
1854                 }
1855         }
1856
1857         if (bzs->avail_out != 0)
1858                 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1859
1860         RELEASE_LOCK(self);
1861         return ret;
1862
1863 error:
1864         RELEASE_LOCK(self);
1865         Py_XDECREF(ret);
1866         return NULL;
1867 }
1868
1869 static PyMethodDef BZ2Decomp_methods[] = {
1870         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1871         {NULL,          NULL}           /* sentinel */
1872 };
1873
1874
1875 /* ===================================================================== */
1876 /* Slot definitions for BZ2Decomp_Type. */
1877
1878 static int
1879 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1880 {
1881         int bzerror;
1882
1883         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1884                 return -1;
1885
1886 #ifdef WITH_THREAD
1887         self->lock = PyThread_allocate_lock();
1888         if (!self->lock)
1889                 goto error;
1890 #endif
1891
1892         self->unused_data = PyString_FromString("");
1893         if (!self->unused_data)
1894                 goto error;
1895
1896         memset(&self->bzs, 0, sizeof(bz_stream));
1897         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1898         if (bzerror != BZ_OK) {
1899                 Util_CatchBZ2Error(bzerror);
1900                 goto error;
1901         }
1902
1903         self->running = 1;
1904
1905         return 0;
1906
1907 error:
1908 #ifdef WITH_THREAD
1909         if (self->lock)
1910                 PyThread_free_lock(self->lock);
1911 #endif
1912         Py_XDECREF(self->unused_data);
1913         return -1;
1914 }
1915
1916 static void
1917 BZ2Decomp_dealloc(BZ2DecompObject *self)
1918 {
1919 #ifdef WITH_THREAD
1920         if (self->lock)
1921                 PyThread_free_lock(self->lock);
1922 #endif
1923         Py_XDECREF(self->unused_data);
1924         BZ2_bzDecompressEnd(&self->bzs);
1925         self->ob_type->tp_free((PyObject *)self);
1926 }
1927
1928
1929 /* ===================================================================== */
1930 /* BZ2Decomp_Type definition. */
1931
1932 PyDoc_STRVAR(BZ2Decomp__doc__,
1933 "BZ2Decompressor() -> decompressor object\n\
1934 \n\
1935 Create a new decompressor object. This object may be used to decompress\n\
1936 data sequentially. If you want to decompress data in one shot, use the\n\
1937 decompress() function instead.\n\
1938 ");
1939
1940 static PyTypeObject BZ2Decomp_Type = {
1941         PyObject_HEAD_INIT(NULL)
1942         0,                      /*ob_size*/
1943         "bz2.BZ2Decompressor",  /*tp_name*/
1944         sizeof(BZ2DecompObject), /*tp_basicsize*/
1945         0,                      /*tp_itemsize*/
1946         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1947         0,                      /*tp_print*/
1948         0,                      /*tp_getattr*/
1949         0,                      /*tp_setattr*/
1950         0,                      /*tp_compare*/
1951         0,                      /*tp_repr*/
1952         0,                      /*tp_as_number*/
1953         0,                      /*tp_as_sequence*/
1954         0,                      /*tp_as_mapping*/
1955         0,                      /*tp_hash*/
1956         0,                      /*tp_call*/
1957         0,                      /*tp_str*/
1958         PyObject_GenericGetAttr,/*tp_getattro*/
1959         PyObject_GenericSetAttr,/*tp_setattro*/
1960         0,                      /*tp_as_buffer*/
1961         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1962         BZ2Decomp__doc__,       /*tp_doc*/
1963         0,                      /*tp_traverse*/
1964         0,                      /*tp_clear*/
1965         0,                      /*tp_richcompare*/
1966         0,                      /*tp_weaklistoffset*/
1967         0,                      /*tp_iter*/
1968         0,                      /*tp_iternext*/
1969         BZ2Decomp_methods,      /*tp_methods*/
1970         BZ2Decomp_members,      /*tp_members*/
1971         0,                      /*tp_getset*/
1972         0,                      /*tp_base*/
1973         0,                      /*tp_dict*/
1974         0,                      /*tp_descr_get*/
1975         0,                      /*tp_descr_set*/
1976         0,                      /*tp_dictoffset*/
1977         (initproc)BZ2Decomp_init, /*tp_init*/
1978         PyType_GenericAlloc,    /*tp_alloc*/
1979         PyType_GenericNew,      /*tp_new*/
1980         _PyObject_Del,          /*tp_free*/
1981         0,                      /*tp_is_gc*/
1982 };
1983
1984
1985 /* ===================================================================== */
1986 /* Module functions. */
1987
1988 PyDoc_STRVAR(bz2_compress__doc__,
1989 "compress(data [, compresslevel=9]) -> string\n\
1990 \n\
1991 Compress data in one shot. If you want to compress data sequentially,\n\
1992 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1993 given, must be a number between 1 and 9.\n\
1994 ");
1995
1996 static PyObject *
1997 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1998 {
1999         int compresslevel=9;
2000         char *data;
2001         int datasize;
2002         int bufsize;
2003         PyObject *ret = NULL;
2004         bz_stream _bzs;
2005         bz_stream *bzs = &_bzs;
2006         int bzerror;
2007         static char *kwlist[] = {"data", "compresslevel", 0};
2008
2009         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2010                                          kwlist, &data, &datasize,
2011                                          &compresslevel))
2012                 return NULL;
2013
2014         if (compresslevel < 1 || compresslevel > 9) {
2015                 PyErr_SetString(PyExc_ValueError,
2016                                 "compresslevel must be between 1 and 9");
2017                 return NULL;
2018         }
2019
2020         /* Conforming to bz2 manual, this is large enough to fit compressed
2021          * data in one shot. We will check it later anyway. */
2022         bufsize = datasize + (datasize/100+1) + 600;
2023
2024         ret = PyString_FromStringAndSize(NULL, bufsize);
2025         if (!ret)
2026                 return NULL;
2027
2028         memset(bzs, 0, sizeof(bz_stream));
2029
2030         bzs->next_in = data;
2031         bzs->avail_in = datasize;
2032         bzs->next_out = BUF(ret);
2033         bzs->avail_out = bufsize;
2034
2035         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2036         if (bzerror != BZ_OK) {
2037                 Util_CatchBZ2Error(bzerror);
2038                 Py_DECREF(ret);
2039                 return NULL;
2040         }
2041
2042         for (;;) {
2043                 Py_BEGIN_ALLOW_THREADS
2044                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2045                 Py_END_ALLOW_THREADS
2046                 if (bzerror == BZ_STREAM_END) {
2047                         break;
2048                 } else if (bzerror != BZ_FINISH_OK) {
2049                         BZ2_bzCompressEnd(bzs);
2050                         Util_CatchBZ2Error(bzerror);
2051                         Py_DECREF(ret);
2052                         return NULL;
2053                 }
2054                 if (bzs->avail_out == 0) {
2055                         bufsize = Util_NewBufferSize(bufsize);
2056                         if (_PyString_Resize(&ret, bufsize) < 0) {
2057                                 BZ2_bzCompressEnd(bzs);
2058                                 Py_DECREF(ret);
2059                                 return NULL;
2060                         }
2061                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2062                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2063                 }
2064         }
2065
2066         if (bzs->avail_out != 0)
2067                 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2068         BZ2_bzCompressEnd(bzs);
2069
2070         return ret;
2071 }
2072
2073 PyDoc_STRVAR(bz2_decompress__doc__,
2074 "decompress(data) -> decompressed data\n\
2075 \n\
2076 Decompress data in one shot. If you want to decompress data sequentially,\n\
2077 use an instance of BZ2Decompressor instead.\n\
2078 ");
2079
2080 static PyObject *
2081 bz2_decompress(PyObject *self, PyObject *args)
2082 {
2083         char *data;
2084         int datasize;
2085         int bufsize = SMALLCHUNK;
2086         PyObject *ret;
2087         bz_stream _bzs;
2088         bz_stream *bzs = &_bzs;
2089         int bzerror;
2090
2091         if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2092                 return NULL;
2093
2094         if (datasize == 0)
2095                 return PyString_FromString("");
2096
2097         ret = PyString_FromStringAndSize(NULL, bufsize);
2098         if (!ret)
2099                 return NULL;
2100
2101         memset(bzs, 0, sizeof(bz_stream));
2102
2103         bzs->next_in = data;
2104         bzs->avail_in = datasize;
2105         bzs->next_out = BUF(ret);
2106         bzs->avail_out = bufsize;
2107
2108         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2109         if (bzerror != BZ_OK) {
2110                 Util_CatchBZ2Error(bzerror);
2111                 Py_DECREF(ret);
2112                 return NULL;
2113         }
2114
2115         for (;;) {
2116                 Py_BEGIN_ALLOW_THREADS
2117                 bzerror = BZ2_bzDecompress(bzs);
2118                 Py_END_ALLOW_THREADS
2119                 if (bzerror == BZ_STREAM_END) {
2120                         break;
2121                 } else if (bzerror != BZ_OK) {
2122                         BZ2_bzDecompressEnd(bzs);
2123                         Util_CatchBZ2Error(bzerror);
2124                         Py_DECREF(ret);
2125                         return NULL;
2126                 }
2127                 if (bzs->avail_out == 0) {
2128                         bufsize = Util_NewBufferSize(bufsize);
2129                         if (_PyString_Resize(&ret, bufsize) < 0) {
2130                                 BZ2_bzDecompressEnd(bzs);
2131                                 Py_DECREF(ret);
2132                                 return NULL;
2133                         }
2134                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2135                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2136                 } else if (bzs->avail_in == 0) {
2137                         BZ2_bzDecompressEnd(bzs);
2138                         PyErr_SetString(PyExc_ValueError,
2139                                         "couldn't find end of stream");
2140                         Py_DECREF(ret);
2141                         return NULL;
2142                 }
2143         }
2144
2145         if (bzs->avail_out != 0)
2146                 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2147         BZ2_bzDecompressEnd(bzs);
2148
2149         return ret;
2150 }
2151
2152 static PyMethodDef bz2_methods[] = {
2153         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2154                 bz2_compress__doc__},
2155         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2156                 bz2_decompress__doc__},
2157         {NULL,          NULL}           /* sentinel */
2158 };
2159
2160 /* ===================================================================== */
2161 /* Initialization function. */
2162
2163 PyDoc_STRVAR(bz2__doc__,
2164 "The python bz2 module provides a comprehensive interface for\n\
2165 the bz2 compression library. It implements a complete file\n\
2166 interface, one shot (de)compression functions, and types for\n\
2167 sequential (de)compression.\n\
2168 ");
2169
2170 DL_EXPORT(void)
2171 initbz2(void)
2172 {
2173         PyObject *m;
2174
2175         BZ2File_Type.ob_type = &PyType_Type;
2176         BZ2Comp_Type.ob_type = &PyType_Type;
2177         BZ2Decomp_Type.ob_type = &PyType_Type;
2178
2179         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2180
2181         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2182
2183         Py_INCREF(&BZ2File_Type);
2184         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2185
2186         Py_INCREF(&BZ2Comp_Type);
2187         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2188
2189         Py_INCREF(&BZ2Decomp_Type);
2190         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2191 }