This commit was manufactured by cvs2svn to create tag 'r234c1'.
[python/dscho.git] / Doc / lib / libtarfile.tex
blob10ecc7777032b2b8a2c43ab130064e0fc53ba4cf
1 \section{\module{tarfile} --- Read and write tar archive files}
3 \declaremodule{standard}{tarfile}
4 \modulesynopsis{Read and write tar-format archive files.}
5 \versionadded{2.3}
7 \moduleauthor{Lars Gust\"abel}{lars@gustaebel.de}
8 \sectionauthor{Lars Gust\"abel}{lars@gustaebel.de}
10 The \module{tarfile} module makes it possible to read and create tar archives.
11 Some facts and figures:
13 \begin{itemize}
14 \item reads and writes \module{gzip} and \module{bzip2} compressed archives.
15 \item creates POSIX 1003.1-1990 compliant or GNU tar compatible archives.
16 \item reads GNU tar extensions \emph{longname}, \emph{longlink} and
17 \emph{sparse}.
18 \item stores pathnames of unlimited length using GNU tar extensions.
19 \item handles directories, regular files, hardlinks, symbolic links, fifos,
20 character devices and block devices and is able to acquire and
21 restore file information like timestamp, access permissions and owner.
22 \item can handle tape devices.
23 \end{itemize}
25 \begin{funcdesc}{open}{\optional{name\optional{, mode
26 \optional{, fileobj\optional{, bufsize}}}}}
27 Return a \class{TarFile} object for the pathname \var{name}.
28 For detailed information on \class{TarFile} objects,
29 see \citetitle{TarFile Objects} (section \ref{tarfile-objects}).
31 \var{mode} has to be a string of the form \code{'filemode[:compression]'},
32 it defaults to \code{'r'}. Here is a full list of mode combinations:
34 \begin{tableii}{c|l}{code}{mode}{action}
35 \lineii{'r'}{Open for reading with transparent compression (recommended).}
36 \lineii{'r:'}{Open for reading exclusively without compression.}
37 \lineii{'r:gz'}{Open for reading with gzip compression.}
38 \lineii{'r:bz2'}{Open for reading with bzip2 compression.}
39 \lineii{'a' or 'a:'}{Open for appending with no compression.}
40 \lineii{'w' or 'w:'}{Open for uncompressed writing.}
41 \lineii{'w:gz'}{Open for gzip compressed writing.}
42 \lineii{'w:bz2'}{Open for bzip2 compressed writing.}
43 \end{tableii}
45 Note that \code{'a:gz'} or \code{'a:bz2'} is not possible.
46 If \var{mode} is not suitable to open a certain (compressed) file for
47 reading, \exception{ReadError} is raised. Use \var{mode} \code{'r'} to
48 avoid this. If a compression method is not supported,
49 \exception{CompressionError} is raised.
51 If \var{fileobj} is specified, it is used as an alternative to
52 a file object opened for \var{name}.
54 For special purposes, there is a second format for \var{mode}:
55 \code{'filemode|[compression]'}. \code{open} will return a \class{TarFile}
56 object that processes its data as a stream of blocks. No random
57 seeking will be done on the file. If given, \var{fileobj} may be any
58 object that has a \code{read()} resp. \code{write()} method.
59 \var{bufsize} specifies the blocksize and defaults to \code{20 * 512}
60 bytes. Use this variant in combination with e.g. \code{sys.stdin}, a socket
61 file object or a tape device.
62 However, such a \class{TarFile} object is limited in that it does not allow
63 to be accessed randomly, see \citetitle{Examples} (section
64 \ref{tar-examples}).
65 The currently possible modes:
67 \begin{tableii}{c|l}{code}{mode}{action}
68 \lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.}
69 \lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.}
70 \lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.}
71 \lineii{'w|'}{Open an uncompressed \emph{stream} for writing.}
72 \lineii{'w|gz'}{Open an gzip compressed \emph{stream} for writing.}
73 \lineii{'w|bz2'}{Open an bzip2 compressed \emph{stream} for writing.}
74 \end{tableii}
75 \end{funcdesc}
77 \begin{classdesc*}{TarFile}
78 Class for reading and writing tar archives. Do not use this
79 class directly, better use \function{open()} instead.
80 See \citetitle{TarFile Objects} (section \ref{tarfile-objects}).
81 \end{classdesc*}
83 \begin{funcdesc}{is_tarfile}{name}
84 Return \code{True} if \var{name} is a tar archive file, that the
85 \module{tarfile} module can read.
86 \end{funcdesc}
88 \begin{classdesc}{TarFileCompat}{filename\optional{, mode\optional{,
89 compression}}}
91 Class for limited access to tar archives with a \code{zipfile}-like
92 interface. Please consult the documentation of \code{zipfile} for more
93 details.
94 \code{compression} must be one of the following constants:
95 \begin{datadesc}{TAR_PLAIN}
96 Constant for an uncompressed tar archive.
97 \end{datadesc}
98 \begin{datadesc}{TAR_GZIPPED}
99 Constant for a \code{gzip} compressed tar archive.
100 \end{datadesc}
101 \end{classdesc}
103 \begin{excdesc}{TarError}
104 Base class for all \module{tarfile} exceptions.
105 \end{excdesc}
107 \begin{excdesc}{ReadError}
108 Is raised when a tar archive is opened, that either cannot be handled by
109 the \module{tarfile} module or is somehow invalid.
110 \end{excdesc}
112 \begin{excdesc}{CompressionError}
113 Is raised when a compression method is not supported or when the data
114 cannot be decoded properly.
115 \end{excdesc}
117 \begin{excdesc}{StreamError}
118 Is raised for the limitations that are typical for stream-like
119 \class{TarFile} objects.
120 \end{excdesc}
122 \begin{excdesc}{ExtractError}
123 Is raised for \emph{non-fatal} errors when using \method{extract()}, but
124 only if \member{TarFile.errorlevel}\code{ == 2}.
125 \end{excdesc}
127 \begin{seealso}
128 \seemodule{zipfile}{Documentation of the \code{zipfile}
129 standard module.}
131 \seetitle[http://www.gnu.org/manual/tar/html_chapter/tar_8.html\#SEC118]
132 {GNU tar manual, Standard Section}{Documentation for tar archive files,
133 including GNU tar extensions.}
134 \end{seealso}
136 %-----------------
137 % TarFile Objects
138 %-----------------
140 \subsection{TarFile Objects \label{tarfile-objects}}
142 The \class{TarFile} object provides an interface to a tar archive. A tar
143 archive is a sequence of blocks. An archive member (a stored file) is made up
144 of a header block followed by data blocks. It is possible, to store a file in a
145 tar archive several times. Each archive member is represented by a
146 \class{TarInfo} object, see \citetitle{TarInfo Objects} (section
147 \ref{tarinfo-objects}) for details.
149 \begin{classdesc}{TarFile}{\optional{name
150 \optional{, mode\optional{, fileobj}}}}
151 Open an \emph{(uncompressed)} tar archive \var{name}.
152 \var{mode} is either \code{'r'} to read from an existing archive,
153 \code{'a'} to append data to an existing file or \code{'w'} to create a new
154 file overwriting an existing one. \var{mode} defaults to \code{'r'}.
156 If \var{fileobj} is given, it is used for reading or writing data.
157 If it can be determined, \var{mode} is overridden by \var{fileobj}'s mode.
158 \begin{notice}
159 \var{fileobj} is not closed, when \class{TarFile} is closed.
160 \end{notice}
161 \end{classdesc}
163 \begin{methoddesc}{open}{...}
164 Alternative constructor. The \function{open()} function on module level is
165 actually a shortcut to this classmethod. See section \ref{module-tarfile}
166 for details.
167 \end{methoddesc}
169 \begin{methoddesc}{getmember}{name}
170 Return a \class{TarInfo} object for member \var{name}. If \var{name} can
171 not be found in the archive, \exception{KeyError} is raised.
172 \begin{notice}
173 If a member occurs more than once in the archive, its last
174 occurence is assumed to be the most up-to-date version.
175 \end{notice}
176 \end{methoddesc}
178 \begin{methoddesc}{getmembers}{}
179 Return the members of the archive as a list of \class{TarInfo} objects.
180 The list has the same order as the members in the archive.
181 \end{methoddesc}
183 \begin{methoddesc}{getnames}{}
184 Return the members as a list of their names. It has the same order as
185 the list returned by \method{getmembers()}.
186 \end{methoddesc}
188 \begin{methoddesc}{list}{verbose=True}
189 Print a table of contents to \code{sys.stdout}. If \var{verbose} is
190 \code{False}, only the names of the members are printed. If it is
191 \code{True}, an \code{"ls -l"}-like output is produced.
192 \end{methoddesc}
194 \begin{methoddesc}{next}{}
195 Return the next member of the archive as a \class{TarInfo} object, when
196 \class{TarFile} is opened for reading. Return \code{None} if there is no
197 more available.
198 \end{methoddesc}
200 \begin{methoddesc}{extract}{member\optional{, path}}
201 Extract a member from the archive to the current working directory,
202 using its full name. Its file information is extracted as accurately as
203 possible.
204 \var{member} may be a filename or a \class{TarInfo} object.
205 You can specify a different directory using \var{path}.
206 \end{methoddesc}
208 \begin{methoddesc}{extractfile}{member}
209 Extract a member from the archive as a file object.
210 \var{member} may be a filename or a \class{TarInfo} object.
211 If \var{member} is a regular file, a file-like object is returned.
212 If \var{member} is a link, a file-like object is constructed from the
213 link's target.
214 If \var{member} is none of the above, \code{None} is returned.
215 \begin{notice}
216 The file-like object is read-only and provides the following methods:
217 \method{read()}, \method{readline()}, \method{readlines()},
218 \method{seek()}, \method{tell()}.
219 \end{notice}
220 \end{methoddesc}
222 \begin{methoddesc}{add}{name\optional{, arcname\optional{, recursive=True}}}
223 Add the file \var{name} to the archive. \var{name} may be any type
224 of file (directory, fifo, symbolic link, etc.).
225 If given, \var{arcname} specifies an alternative name for the file in the
226 archive. Directories are added recursively by default.
227 This can be avoided by setting \var{recursive} to \code{False}.
228 \end{methoddesc}
230 \begin{methoddesc}{addfile}{tarinfo\optional{, fileobj}}
231 Add the \class{TarInfo} object \var{tarinfo} to the archive.
232 If \var{fileobj} is given, \code{tarinfo.size} bytes are read
233 from it and added to the archive. You can create \class{TarInfo} objects
234 using \method{gettarinfo()}.
235 \begin{notice}
236 On Windows platforms, \var{fileobj} should always be opened with mode
237 \code{'rb'} to avoid irritation about the file size.
238 \end{notice}
239 \end{methoddesc}
241 \begin{methoddesc}{gettarinfo}{\optional{name\optional{, arcname
242 \optional{, fileobj}}}}
243 Create a \class{TarInfo} object for either the file \var{name} or the
244 file object \var{fileobj} (using \code{os.fstat()} on its file descriptor).
245 You can modify some of the \class{TarInfo}'s attributes before you add it
246 using \method{addfile()}.
247 If given, \var{arcname} specifies an alternative name for the file in the
248 archive.
249 \end{methoddesc}
251 \begin{methoddesc}{close}{}
252 Close the \class{TarFile}. In write-mode, two finishing zero blocks are
253 appended to the archive.
254 \end{methoddesc}
256 \begin{memberdesc}{posix=True}
257 If \code{True}, create a POSIX 1003.1-1990 compliant archive. GNU
258 extensions are not used, because they are not part of the POSIX standard.
259 This limits the length of filenames to at most 256 and linknames to 100
260 characters. A \exception{ValueError} is raised, if a pathname exceeds this
261 limit.
262 If \code{False}, create a GNU tar compatible archive. It will not be POSIX
263 compliant, but can store pathnames of unlimited length.
264 \end{memberdesc}
266 \begin{memberdesc}{dereference=False}
267 If \code{False}, add symbolic and hard links to archive. If \code{True},
268 add the content of the target files to the archive. This has no effect on
269 systems that do not support links.
270 \end{memberdesc}
272 \begin{memberdesc}{ignore_zeros=False}
273 If \code{False}, treat an empty block as the end of the archive. If
274 \code{True}, skip empty (and invalid) blocks and try to get as many
275 members as possible. This is only useful for concatenated or damaged
276 archives.
277 \end{memberdesc}
279 \begin{memberdesc}{debug=0}
280 To be set from \code{0}(no debug messages) up to \code{3}(all debug
281 messages). The messages are written to \code{sys.stdout}.
282 \end{memberdesc}
284 \begin{memberdesc}{errorlevel=0}
285 If \code{0}, all errors are ignored when using \method{extract()}.
286 Nevertheless, they appear as error messages in the debug output, when
287 debugging is enabled.
288 If \code{1}, all \emph{fatal} errors are raised as \exception{OSError}
289 or \exception{IOError} exceptions.
290 If \code{2}, all \emph{non-fatal} errors are raised as \exception{TarError}
291 exceptions as well.
292 \end{memberdesc}
294 %-----------------
295 % TarInfo Objects
296 %-----------------
298 \subsection{TarInfo Objects \label{tarinfo-objects}}
300 A \class{TarInfo} object represents one member in a \class{TarFile}. Aside from
301 storing all required attributes of a file (like file type, size, time,
302 permissions, owner etc.), it provides some useful methods to determine its
303 type. It does \emph{not} contain the file's data itself.
305 \class{TarInfo} objects are returned by \code{TarFile}'s methods
306 \code{getmember()}, \code{getmembers()} and \code{gettarinfo()}.
308 \begin{classdesc}{TarInfo}{\optional{name}}
309 Create a \class{TarInfo} object.
310 \end{classdesc}
312 \begin{methoddesc}{frombuf}{}
313 Create and return a \class{TarInfo} object from a string buffer.
314 \end{methoddesc}
316 \begin{methoddesc}{tobuf}{}
317 Create a string buffer from a \class{TarInfo} object.
318 \end{methoddesc}
320 A \code{TarInfo} object has the following public data attributes:
321 \begin{memberdesc}{name}
322 Name of the archive member.
323 \end{memberdesc}
325 \begin{memberdesc}{size}
326 Size in bytes.
327 \end{memberdesc}
329 \begin{memberdesc}{mtime}
330 Time of last modification.
331 \end{memberdesc}
333 \begin{memberdesc}{mode}
334 Permission bits.
335 \end{memberdesc}
337 \begin{memberdesc}{type}
338 File type.
339 \var{type} is usually one of these constants:
340 \code{REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, CONTTYPE,
341 CHRTYPE, BLKTYPE, GNUTYPE_SPARSE}.
342 To determine the type of a \class{TarInfo} object more conveniently, use
343 the \code{is_*()} methods below.
344 \end{memberdesc}
346 \begin{memberdesc}{linkname}
347 Name of the target file name, which is only present in \class{TarInfo}
348 objects of type LNKTYPE and SYMTYPE.
349 \end{memberdesc}
351 \begin{memberdesc}{uid, gid}
352 User and group ID of who originally stored this member.
353 \end{memberdesc}
355 \begin{memberdesc}{uname, gname}
356 User and group name.
357 \end{memberdesc}
359 A \class{TarInfo} object also provides some convenient query methods:
360 \begin{methoddesc}{isfile}{}
361 Return \code{True} if the \class{Tarinfo} object is a regular file.
362 \end{methoddesc}
364 \begin{methoddesc}{isreg}{}
365 Same as \method{isfile()}.
366 \end{methoddesc}
368 \begin{methoddesc}{isdir}{}
369 Return \code{True} if it is a directory.
370 \end{methoddesc}
372 \begin{methoddesc}{issym}{}
373 Return \code{True} if it is a symbolic link.
374 \end{methoddesc}
376 \begin{methoddesc}{islnk}{}
377 Return \code{True} if it is a hard link.
378 \end{methoddesc}
380 \begin{methoddesc}{ischr}{}
381 Return \code{True} if it is a character device.
382 \end{methoddesc}
384 \begin{methoddesc}{isblk}{}
385 Return \code{True} if it is a block device.
386 \end{methoddesc}
388 \begin{methoddesc}{isfifo}{}
389 Return \code{True} if it is a FIFO.
390 \end{methoddesc}
392 \begin{methoddesc}{isdev}{}
393 Return \code{True} if it is one of character device, block device or FIFO.
394 \end{methoddesc}
396 %------------------------
397 % Examples
398 %------------------------
400 \subsection{Examples \label{tar-examples}}
402 How to create an uncompressed tar archive from a list of filenames:
403 \begin{verbatim}
404 import tarfile
405 tar = tarfile.open("sample.tar", "w")
406 for name in ["foo", "bar", "quux"]:
407 tar.add(name)
408 tar.close()
409 \end{verbatim}
411 How to read a gzip compressed tar archive and display some member information:
412 \begin{verbatim}
413 import tarfile
414 tar = tarfile.open("sample.tar.gz", "r:gz")
415 for tarinfo in tar:
416 print tarinfo.name, "is", tarinfo.size, "bytes in size and is",
417 if tarinfo.isreg():
418 print "a regular file."
419 elif tarinfo.isdir():
420 print "a directory."
421 else:
422 print "something else."
423 tar.close()
424 \end{verbatim}
426 How to create a tar archive with faked information:
427 \begin{verbatim}
428 import tarfile
429 tar = tarfile.open("sample.tar.gz", "w:gz")
430 for name in namelist:
431 tarinfo = tar.gettarinfo(name, "fakeproj-1.0/" + name)
432 tarinfo.uid = 123
433 tarinfo.gid = 456
434 tarinfo.uname = "johndoe"
435 tarinfo.gname = "fake"
436 tar.addfile(tarinfo, file(name))
437 tar.close()
438 \end{verbatim}
440 The \emph{only} way to extract an uncompressed tar stream from
441 \code{sys.stdin}:
442 \begin{verbatim}
443 import sys
444 import tarfile
445 tar = tarfile.open(mode="r|", fileobj=sys.stdin)
446 for tarinfo in tar:
447 tar.extract(tarinfo)
448 tar.close()
449 \end{verbatim}