Doc/lib/libzlib.tex

   1 \section{\module{zlib} ---
   2          Compression compatible with \program{gzip}}
   3
   4 \declaremodule{builtin}{zlib}
   5 \modulesynopsis{Low-level interface to compression and decompression
   6                 routines compatible with \program{gzip}.}
   7
   8
   9 For applications that require data compression, the functions in this
  10 module allow compression and decompression, using the zlib library.
  11 The zlib library has its own home page at
  12 \url{http://www.info-zip.org/pub/infozip/zlib/}.  Version 1.1.3 is the
  13 most recent version as of September 2000; use a later version if one
  14 is available.  There are known incompatibilities between the Python
  15 module and earlier versions of the zlib library.
  16
  17 The available exception and functions in this module are:
  18
  19 \begin{excdesc}{error}
  20   Exception raised on compression and decompression errors.
  21 \end{excdesc}
  22
  23
  24 \begin{funcdesc}{adler32}{string\optional{, value}}
  25    Computes a Adler-32 checksum of \var{string}.  (An Adler-32
  26    checksum is almost as reliable as a CRC32 but can be computed much
  27    more quickly.)  If \var{value} is present, it is used as the
  28    starting value of the checksum; otherwise, a fixed default value is
  29    used.  This allows computing a running checksum over the
  30    concatenation of several input strings.  The algorithm is not
  31    cryptographically strong, and should not be used for
  32    authentication or digital signatures.
  33 \end{funcdesc}
  34
  35 \begin{funcdesc}{compress}{string\optional{, level}}
  36   Compresses the data in \var{string}, returning a string contained
  37   compressed data.  \var{level} is an integer from \code{1} to
  38   \code{9} controlling the level of compression; \code{1} is fastest
  39   and produces the least compression, \code{9} is slowest and produces
  40   the most.  The default value is \code{6}.  Raises the
  41   \exception{error} exception if any error occurs.
  42 \end{funcdesc}
  43
  44 \begin{funcdesc}{compressobj}{\optional{level}}
  45   Returns a compression object, to be used for compressing data streams
  46   that won't fit into memory at once.  \var{level} is an integer from
  47   \code{1} to \code{9} controlling the level of compression; \code{1} is
  48   fastest and produces the least compression, \code{9} is slowest and
  49   produces the most.  The default value is \code{6}.
  50 \end{funcdesc}
  51
  52 \begin{funcdesc}{crc32}{string\optional{, value}}
  53   Computes a CRC (Cyclic Redundancy Check)%
  54   \index{Cyclic Redundancy Check}
  55   \index{checksum!Cyclic Redundancy Check}
  56   checksum of \var{string}. If
  57   \var{value} is present, it is used as the starting value of the
  58   checksum; otherwise, a fixed default value is used.  This allows
  59   computing a running checksum over the concatenation of several
  60   input strings.  The algorithm is not cryptographically strong, and
  61   should not be used for authentication or digital signatures.
  62 \end{funcdesc}
  63
  64 \begin{funcdesc}{decompress}{string\optional{, wbits\optional{, bufsize}}}
  65   Decompresses the data in \var{string}, returning a string containing
  66   the uncompressed data.  The \var{wbits} parameter controls the size of
  67   the window buffer.  If \var{bufsize} is given, it is used as the
  68   initial size of the output buffer.  Raises the \exception{error}
  69   exception if any error occurs.
  70
  71 The absolute value of \var{wbits} is the base two logarithm of the
  72 size of the history buffer (the ``window size'') used when compressing
  73 data.  Its absolute value should be between 8 and 15 for the most
  74 recent versions of the zlib library, larger values resulting in better
  75 compression at the expense of greater memory usage.  The default value
  76 is 15.  When \var{wbits} is negative, the standard
  77 \program{gzip} header is suppressed; this is an undocumented feature
  78 of the zlib library, used for compatibility with \program{unzip}'s
  79 compression file format.
  80
  81 \var{bufsize} is the initial size of the buffer used to hold
  82 decompressed data.  If more space is required, the buffer size will be
  83 increased as needed, so you don't have to get this value exactly
  84 right; tuning it will only save a few calls to \cfunction{malloc()}.  The
  85 default size is 16384.
  86
  87 \end{funcdesc}
  88
  89 \begin{funcdesc}{decompressobj}{\optional{wbits}}
  90   Returns a compression object, to be used for decompressing data
  91   streams that won't fit into memory at once.  The \var{wbits}
  92   parameter controls the size of the window buffer.
  93 \end{funcdesc}
  94
  95 Compression objects support the following methods:
  96
  97 \begin{methoddesc}[Compress]{compress}{string}
  98 Compress \var{string}, returning a string containing compressed data
  99 for at least part of the data in \var{string}.  This data should be
 100 concatenated to the output produced by any preceding calls to the
 101 \method{compress()} method.  Some input may be kept in internal buffers
 102 for later processing.
 103 \end{methoddesc}
 104
 105 \begin{methoddesc}[Compress]{flush}{\optional{mode}}
 106 All pending input is processed, and a string containing the remaining
 107 compressed output is returned.  \var{mode} can be selected from the
 108 constants \constant{Z_SYNC_FLUSH},  \constant{Z_FULL_FLUSH},  or
 109 \constant{Z_FINISH}, defaulting to \constant{Z_FINISH}.  \constant{Z_SYNC_FLUSH} and
 110 \constant{Z_FULL_FLUSH} allow compressing further strings of data and
 111 are used to allow partial error recovery on decompression, while
 112 \constant{Z_FINISH} finishes the compressed stream and
 113 prevents compressing any more data.  After calling
 114 \method{flush()} with \var{mode} set to \constant{Z_FINISH}, the
 115 \method{compress()} method cannot be called again; the only realistic
 116 action is to delete the object.
 117 \end{methoddesc}
 118
 119 Decompression objects support the following methods, and a single attribute:
 120
 121 \begin{memberdesc}{unused_data}
 122 A string which contains any unused data from the last string fed to
 123 this decompression object.  If the whole string turned out to contain
 124 compressed data, this is \code{""}, the empty string.
 125
 126 The only way to determine where a string of compressed data ends is by
 127 actually decompressing it.  This means that when compressed data is
 128 contained part of a larger file, you can only find the end of it by
 129 reading data and feeding it into a decompression object's
 130 \method{decompress} method until the \member{unused_data} attribute is
 131 no longer the empty string.
 132 \end{memberdesc}
 133
 134 \begin{methoddesc}[Decompress]{decompress}{string}
 135 Decompress \var{string}, returning a string containing the
 136 uncompressed data corresponding to at least part of the data in
 137 \var{string}.  This data should be concatenated to the output produced
 138 by any preceding calls to the
 139 \method{decompress()} method.  Some of the input data may be preserved
 140 in internal buffers for later processing.
 141 \end{methoddesc}
 142
 143 \begin{methoddesc}[Decompress]{flush}{}
 144 All pending input is processed, and a string containing the remaining
 145 uncompressed output is returned.  After calling \method{flush()}, the
 146 \method{decompress()} method cannot be called again; the only realistic
 147 action is to delete the object.
 148 \end{methoddesc}
 149
 150 \begin{seealso}
 151   \seemodule{gzip}{Reading and writing \program{gzip}-format files.}
 152   \seeurl{http://www.info-zip.org/pub/infozip/zlib/}{The
 153           zlib library home page.}
 154 \end{seealso}