2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
33 __version__
= "$Revision$"
37 __author__
= "Lars Gustäbel (lars@gustaebel.de)"
40 __credits__
= "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
53 if sys
.platform
== 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
65 # from tarfile import *
66 __all__
= ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68 #---------------------------------------------------------
70 #---------------------------------------------------------
71 NUL
= "\0" # the null character
72 BLOCKSIZE
= 512 # length of processing blocks
73 RECORDSIZE
= BLOCKSIZE
* 20 # length of records
74 MAGIC
= "ustar" # magic tar string
75 VERSION
= "00" # version number
77 LENGTH_NAME
= 100 # maximum length of a filename
78 LENGTH_LINK
= 100 # maximum length of a linkname
79 LENGTH_PREFIX
= 155 # maximum length of the prefix field
80 MAXSIZE_MEMBER
= 077777777777L # maximum size of a file (11 octal digits)
82 REGTYPE
= "0" # regular file
83 AREGTYPE
= "\0" # regular file
84 LNKTYPE
= "1" # link (inside tarfile)
85 SYMTYPE
= "2" # symbolic link
86 CHRTYPE
= "3" # character special device
87 BLKTYPE
= "4" # block special device
88 DIRTYPE
= "5" # directory
89 FIFOTYPE
= "6" # fifo special device
90 CONTTYPE
= "7" # contiguous file
92 GNUTYPE_LONGNAME
= "L" # GNU tar extension for longnames
93 GNUTYPE_LONGLINK
= "K" # GNU tar extension for longlink
94 GNUTYPE_SPARSE
= "S" # GNU tar extension for sparse file
96 #---------------------------------------------------------
98 #---------------------------------------------------------
99 SUPPORTED_TYPES
= (REGTYPE
, AREGTYPE
, LNKTYPE
, # file types that tarfile
100 SYMTYPE
, DIRTYPE
, FIFOTYPE
, # can cope with.
101 CONTTYPE
, CHRTYPE
, BLKTYPE
,
102 GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
,
105 REGULAR_TYPES
= (REGTYPE
, AREGTYPE
, # file types that somehow
106 CONTTYPE
, GNUTYPE_SPARSE
) # represent regular files
108 #---------------------------------------------------------
109 # Bits used in the mode field, values in octal.
110 #---------------------------------------------------------
111 S_IFLNK
= 0120000 # symbolic link
112 S_IFREG
= 0100000 # regular file
113 S_IFBLK
= 0060000 # block device
114 S_IFDIR
= 0040000 # directory
115 S_IFCHR
= 0020000 # character device
116 S_IFIFO
= 0010000 # fifo
118 TSUID
= 04000 # set UID on execution
119 TSGID
= 02000 # set GID on execution
120 TSVTX
= 01000 # reserved
122 TUREAD
= 0400 # read by owner
123 TUWRITE
= 0200 # write by owner
124 TUEXEC
= 0100 # execute/search by owner
125 TGREAD
= 0040 # read by group
126 TGWRITE
= 0020 # write by group
127 TGEXEC
= 0010 # execute/search by group
128 TOREAD
= 0004 # read by other
129 TOWRITE
= 0002 # write by other
130 TOEXEC
= 0001 # execute/search by other
132 #---------------------------------------------------------
133 # Some useful functions
134 #---------------------------------------------------------
136 """Convert a null-terminated string buffer to a python string.
138 return s
.split(NUL
, 1)[0]
140 def calc_chksum(buf
):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
145 chk
= 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c
in buf
[:148]: chk
+= ord(c
) # sum up all bytes before chksum
148 for c
in buf
[156:]: chk
+= ord(c
) # sum up all bytes after chksum
151 def copyfileobj(src
, dst
, length
=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
158 shutil
.copyfileobj(src
, dst
)
162 blocks
, remainder
= divmod(length
, BUFSIZE
)
163 for b
in xrange(blocks
):
164 buf
= src
.read(BUFSIZE
)
165 if len(buf
) < BUFSIZE
:
166 raise IOError, "end of file reached"
170 buf
= src
.read(remainder
)
171 if len(buf
) < remainder
:
172 raise IOError, "end of file reached"
185 (TUEXEC
, "x", TSUID
, "S", TUEXEC|TSUID
, "s"),
188 (TGEXEC
, "x", TSGID
, "S", TGEXEC|TSGID
, "s"),
191 (TOEXEC
, "x", TSVTX
, "T", TOEXEC|TSVTX
, "t"))
194 """Convert a file's mode to a string of the form
196 Used by TarFile.list()
199 for t
in filemode_table
:
201 if mode
& t
[0] == t
[0]:
212 normpath
= lambda path
: os
.path
.normpath(path
).replace(os
.sep
, "/")
214 normpath
= os
.path
.normpath
216 class TarError(Exception):
217 """Base exception."""
219 class ExtractError(TarError
):
220 """General exception for extract errors."""
222 class ReadError(TarError
):
223 """Exception for unreadble tar archives."""
225 class CompressionError(TarError
):
226 """Exception for unavailable compression methods."""
228 class StreamError(TarError
):
229 """Exception for unsupported operations on stream-like TarFiles."""
232 #---------------------------
233 # internal stream interface
234 #---------------------------
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
241 def __init__(self
, name
, mode
):
244 "w": os
.O_WRONLY | os
.O_CREAT | os
.O_TRUNC
,
246 if hasattr(os
, "O_BINARY"):
248 self
.fd
= os
.open(name
, mode
)
253 def read(self
, size
):
254 return os
.read(self
.fd
, size
)
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
267 _Stream is intended to be used only internally.
270 def __init__(self
, name
, mode
, type, fileobj
, bufsize
):
271 """Construct a _Stream object.
273 self
._extfileobj
= True
275 fileobj
= _LowLevelFile(name
, mode
)
276 self
._extfileobj
= False
278 self
.name
= name
or ""
281 self
.fileobj
= fileobj
282 self
.bufsize
= bufsize
291 raise CompressionError
, "zlib module is not available"
293 self
.crc
= zlib
.crc32("")
297 self
._init
_write
_gz
()
303 raise CompressionError
, "bz2 module is not available"
306 self
.cmp = bz2
.BZ2Decompressor()
308 self
.cmp = bz2
.BZ2Compressor()
314 def _init_write_gz(self
):
315 """Initialize for writing with gzip compression.
317 self
.cmp = self
.zlib
.compressobj(9, self
.zlib
.DEFLATED
,
318 -self
.zlib
.MAX_WBITS
,
319 self
.zlib
.DEF_MEM_LEVEL
,
321 timestamp
= struct
.pack("<L", long(time
.time()))
322 self
.__write
("\037\213\010\010%s\002\377" % timestamp
)
323 if self
.name
.endswith(".gz"):
324 self
.name
= self
.name
[:-3]
325 self
.__write
(self
.name
+ NUL
)
328 """Write string s to the stream.
330 if self
.type == "gz":
331 self
.crc
= self
.zlib
.crc32(s
, self
.crc
)
333 if self
.type != "tar":
334 s
= self
.cmp.compress(s
)
337 def __write(self
, s
):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
342 while len(self
.buf
) > self
.bufsize
:
343 self
.fileobj
.write(self
.buf
[:self
.bufsize
])
344 self
.buf
= self
.buf
[self
.bufsize
:]
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
353 if self
.mode
== "w" and self
.buf
:
354 if self
.type != "tar":
355 self
.buf
+= self
.cmp.flush()
356 self
.__write
("") # Write remaining blocks to output
357 self
.fileobj
.write(self
.buf
)
359 if self
.type == "gz":
360 self
.fileobj
.write(struct
.pack("<l", self
.crc
))
361 self
.fileobj
.write(struct
.pack("<L", self
.pos
& 0xffffFFFFL
))
363 if not self
._extfileobj
:
368 def _init_read_gz(self
):
369 """Initialize for reading a gzip compressed fileobj.
371 self
.cmp = self
.zlib
.decompressobj(-self
.zlib
.MAX_WBITS
)
374 # taken from gzip.GzipFile with some alterations
375 if self
.__read
(2) != "\037\213":
376 raise ReadError
, "not a gzip file"
377 if self
.__read
(1) != "\010":
378 raise CompressionError
, "unsupported compression method"
380 flag
= ord(self
.__read
(1))
384 xlen
= ord(self
.__read
(1)) + 256 * ord(self
.__read
(1))
389 if not s
or s
== NUL
:
394 if not s
or s
== NUL
:
400 """Return the stream's file pointer position.
404 def seek(self
, pos
=0):
405 """Set the stream's file pointer to pos. Negative seeking
408 if pos
- self
.pos
>= 0:
409 blocks
, remainder
= divmod(pos
- self
.pos
, self
.bufsize
)
410 for i
in xrange(blocks
):
411 self
.read(self
.bufsize
)
414 raise StreamError
, "seeking backwards is not allowed"
417 def read(self
, size
=None):
418 """Return the next size number of bytes from the stream.
419 If size is not defined, return all bytes of the stream
425 buf
= self
._read
(self
.bufsize
)
431 buf
= self
._read
(size
)
435 def _read(self
, size
):
436 """Return size bytes from the stream.
438 if self
.type == "tar":
439 return self
.__read
(size
)
444 buf
= self
.__read
(self
.bufsize
)
447 buf
= self
.cmp.decompress(buf
)
454 def __read(self
, size
):
455 """Return size bytes from stream. If internal buffer is empty,
456 read another block from the stream.
461 buf
= self
.fileobj
.read(self
.bufsize
)
471 #------------------------
472 # Extraction file object
473 #------------------------
474 class ExFileObject(object):
475 """File-like object for reading an archive member.
476 Is returned by TarFile.extractfile(). Support for
477 sparse files included.
480 def __init__(self
, tarfile
, tarinfo
):
481 self
.fileobj
= tarfile
.fileobj
482 self
.name
= tarinfo
.name
485 self
.offset
= tarinfo
.offset_data
486 self
.size
= tarinfo
.size
489 if tarinfo
.issparse():
490 self
.sparse
= tarinfo
.sparse
491 self
.read
= self
._readsparse
493 self
.read
= self
._readnormal
495 def __read(self
, size
):
496 """Overloadable read method.
498 return self
.fileobj
.read(size
)
500 def readline(self
, size
=-1):
501 """Read a line with approx. size. If size is negative,
502 read a whole line. readline() and read() must not
508 nl
= self
.linebuffer
.find("\n")
512 size
-= len(self
.linebuffer
)
514 buf
= self
.read(min(size
, 100))
517 self
.linebuffer
+= buf
521 nl
= self
.linebuffer
.find("\n")
526 buf
= self
.linebuffer
[:nl
]
527 self
.linebuffer
= self
.linebuffer
[nl
+ 1:]
528 while buf
[-1:] == "\r":
533 """Return a list with all (following) lines.
537 line
= self
.readline()
542 def _readnormal(self
, size
=None):
543 """Read operation for regular files.
546 raise ValueError, "file is closed"
547 self
.fileobj
.seek(self
.offset
+ self
.pos
)
548 bytesleft
= self
.size
- self
.pos
550 bytestoread
= bytesleft
552 bytestoread
= min(size
, bytesleft
)
553 self
.pos
+= bytestoread
554 return self
.__read
(bytestoread
)
556 def _readsparse(self
, size
=None):
557 """Read operation for sparse files.
560 raise ValueError, "file is closed"
563 size
= self
.size
- self
.pos
567 buf
= self
._readsparsesection
(size
)
574 def _readsparsesection(self
, size
):
575 """Read a single section of a sparse file.
577 section
= self
.sparse
.find(self
.pos
)
582 toread
= min(size
, section
.offset
+ section
.size
- self
.pos
)
583 if isinstance(section
, _data
):
584 realpos
= section
.realpos
+ self
.pos
- section
.offset
586 self
.fileobj
.seek(self
.offset
+ realpos
)
587 return self
.__read
(toread
)
593 """Return the current file position.
597 def seek(self
, pos
, whence
=0):
598 """Seek to a position in the file.
602 self
.pos
= min(max(pos
, 0), self
.size
)
605 self
.pos
= max(self
.pos
+ pos
, 0)
607 self
.pos
= min(self
.pos
+ pos
, self
.size
)
609 self
.pos
= max(min(self
.size
+ pos
, self
.size
), 0)
612 """Close the file object.
620 class TarInfo(object):
621 """Informational class which holds the details about an
622 archive member given by a tar header block.
623 TarInfo objects are returned by TarFile.getmember(),
624 TarFile.getmembers() and TarFile.gettarinfo() and are
625 usually created internally.
628 def __init__(self
, name
=""):
629 """Construct a TarInfo object. name is the optional name
633 self
.name
= name
# member name (dirnames must end with '/')
634 self
.mode
= 0666 # file permissions
635 self
.uid
= 0 # user id
636 self
.gid
= 0 # group id
637 self
.size
= 0 # file size
638 self
.mtime
= 0 # modification time
639 self
.chksum
= 0 # header checksum
640 self
.type = REGTYPE
# member type
641 self
.linkname
= "" # link name
642 self
.uname
= "user" # user name
643 self
.gname
= "group" # group name
645 self
.devminor
= 0 #-for use with CHRTYPE and BLKTYPE
646 self
.prefix
= "" # prefix to filename or holding information
649 self
.offset
= 0 # the tar header starts here
650 self
.offset_data
= 0 # the file's data starts here
653 return "<%s %r at %#x>" % (self
.__class
__.__name
__,self
.name
,id(self
))
655 def frombuf(cls
, buf
):
656 """Construct a TarInfo object from a 512 byte string buffer.
659 tarinfo
.name
= nts(buf
[0:100])
660 tarinfo
.mode
= int(buf
[100:108], 8)
661 tarinfo
.uid
= int(buf
[108:116],8)
662 tarinfo
.gid
= int(buf
[116:124],8)
663 tarinfo
.size
= long(buf
[124:136], 8)
664 tarinfo
.mtime
= long(buf
[136:148], 8)
665 tarinfo
.chksum
= int(buf
[148:156], 8)
666 tarinfo
.type = buf
[156:157]
667 tarinfo
.linkname
= nts(buf
[157:257])
668 tarinfo
.uname
= nts(buf
[265:297])
669 tarinfo
.gname
= nts(buf
[297:329])
671 tarinfo
.devmajor
= int(buf
[329:337], 8)
672 tarinfo
.devminor
= int(buf
[337:345], 8)
674 tarinfo
.devmajor
= tarinfo
.devmajor
= 0
675 tarinfo
.prefix
= buf
[345:500]
677 # The prefix field is used for filenames > 100 in
678 # the POSIX standard.
679 # name = prefix + '/' + name
680 if tarinfo
.type != GNUTYPE_SPARSE
:
681 tarinfo
.name
= normpath(os
.path
.join(nts(tarinfo
.prefix
), tarinfo
.name
))
683 # Directory names should have a '/' at the end.
684 if tarinfo
.isdir() and tarinfo
.name
[-1:] != "/":
688 frombuf
= classmethod(frombuf
)
691 """Return a tar header block as a 512 byte string.
695 # The following code was contributed by Detlef Lannert.
697 for value
, fieldsize
in (
699 ("%07o" % (self
.mode
& 07777), 8),
700 ("%07o" % self
.uid
, 8),
701 ("%07o" % self
.gid
, 8),
702 ("%011o" % self
.size
, 12),
703 ("%011o" % self
.mtime
, 12),
706 (self
.linkname
, 100),
711 ("%07o" % self
.devmajor
, 8),
712 ("%07o" % self
.devminor
, 8),
716 parts
.append(value
+ (fieldsize
- l
) * NUL
)
719 chksum
= calc_chksum(buf
)
720 buf
= buf
[:148] + "%06o\0" % chksum
+ buf
[155:]
721 buf
+= (BLOCKSIZE
- len(buf
)) * NUL
726 return self
.type in REGULAR_TYPES
730 return self
.type == DIRTYPE
732 return self
.type == SYMTYPE
734 return self
.type == LNKTYPE
736 return self
.type == CHRTYPE
738 return self
.type == BLKTYPE
740 return self
.type == FIFOTYPE
742 return self
.type == GNUTYPE_SPARSE
744 return self
.type in (CHRTYPE
, BLKTYPE
, FIFOTYPE
)
747 class TarFile(object):
748 """The TarFile Class provides an interface to tar archives.
751 debug
= 0 # May be set from 0 (no msgs) to 3 (all msgs)
753 dereference
= False # If true, add content of linked file to the
754 # tar file, else the link.
756 ignore_zeros
= False # If true, skips empty or invalid blocks and
757 # continues processing.
759 errorlevel
= 0 # If 0, fatal errors only appear in debug
760 # messages (if debug >= 0). If > 0, errors
761 # are passed to the caller as exceptions.
763 posix
= True # If True, generates POSIX.1-1990-compliant
764 # archives (no GNU extensions!)
766 fileobject
= ExFileObject
768 def __init__(self
, name
=None, mode
="r", fileobj
=None):
769 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
770 read from an existing archive, 'a' to append data to an existing
771 file or 'w' to create a new file overwriting an existing one. `mode'
773 If `fileobj' is given, it is used for reading or writing data. If it
774 can be determined, `mode' is overridden by `fileobj's mode.
775 `fileobj' is not closed, when TarFile is closed.
779 if len(mode
) > 1 or mode
not in "raw":
780 raise ValueError, "mode must be 'r', 'a' or 'w'"
782 self
.mode
= {"r": "rb", "a": "r+b", "w": "wb"}[mode
]
785 fileobj
= file(self
.name
, self
.mode
)
786 self
._extfileobj
= False
788 if self
.name
is None and hasattr(fileobj
, "name"):
789 self
.name
= fileobj
.name
790 if hasattr(fileobj
, "mode"):
791 self
.mode
= fileobj
.mode
792 self
._extfileobj
= True
793 self
.fileobj
= fileobj
795 # Init datastructures
797 self
.members
= [] # list of members as TarInfo objects
798 self
.membernames
= [] # names of members
799 self
.chunks
= [0] # chunk cache
800 self
._loaded
= False # flag if all members have been read
801 self
.offset
= 0L # current position in the archive file
802 self
.inodes
= {} # dictionary caching the inodes of
803 # archive members already added
805 if self
._mode
== "r":
806 self
.firstmember
= None
807 self
.firstmember
= self
.next()
809 if self
._mode
== "a":
810 # Move to the end of the archive,
811 # before the first empty block.
812 self
.firstmember
= None
815 tarinfo
= self
.next()
820 self
.fileobj
.seek(- BLOCKSIZE
, 1)
823 if self
._mode
in "aw":
826 #--------------------------------------------------------------------------
827 # Below are the classmethods which act as alternate constructors to the
828 # TarFile class. The open() method is the only one that is needed for
829 # public use; it is the "super"-constructor and is able to select an
830 # adequate "sub"-constructor for a particular compression using the mapping
833 # This concept allows one to subclass TarFile without losing the comfort of
834 # the super-constructor. A sub-constructor is registered and made available
835 # by adding it to the mapping in OPEN_METH.
837 def open(cls
, name
=None, mode
="r", fileobj
=None, bufsize
=20*512):
838 """Open a tar archive for reading, writing or appending. Return
839 an appropriate TarFile class.
842 'r' open for reading with transparent compression
843 'r:' open for reading exclusively uncompressed
844 'r:gz' open for reading with gzip compression
845 'r:bz2' open for reading with bzip2 compression
846 'a' or 'a:' open for appending
847 'w' or 'w:' open for writing without compression
848 'w:gz' open for writing with gzip compression
849 'w:bz2' open for writing with bzip2 compression
850 'r|' open an uncompressed stream of tar blocks for reading
851 'r|gz' open a gzip compressed stream of tar blocks
852 'r|bz2' open a bzip2 compressed stream of tar blocks
853 'w|' open an uncompressed stream for writing
854 'w|gz' open a gzip compressed stream for writing
855 'w|bz2' open a bzip2 compressed stream for writing
858 if not name
and not fileobj
:
859 raise ValueError, "nothing to open"
862 filemode
, comptype
= mode
.split(":", 1)
863 filemode
= filemode
or "r"
864 comptype
= comptype
or "tar"
866 # Select the *open() function according to
868 if comptype
in cls
.OPEN_METH
:
869 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
871 raise CompressionError
, "unknown compression type %r" % comptype
872 return func(name
, filemode
, fileobj
)
875 filemode
, comptype
= mode
.split("|", 1)
876 filemode
= filemode
or "r"
877 comptype
= comptype
or "tar"
879 if filemode
not in "rw":
880 raise ValueError, "mode must be 'r' or 'w'"
882 t
= cls(name
, filemode
,
883 _Stream(name
, filemode
, comptype
, fileobj
, bufsize
))
884 t
._extfileobj
= False
888 # Find out which *open() is appropriate for opening the file.
889 for comptype
in cls
.OPEN_METH
:
890 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
892 return func(name
, "r", fileobj
)
893 except (ReadError
, CompressionError
):
895 raise ReadError
, "file could not be opened successfully"
898 return cls
.taropen(name
, mode
, fileobj
)
900 raise ValueError, "undiscernible mode"
902 open = classmethod(open)
904 def taropen(cls
, name
, mode
="r", fileobj
=None):
905 """Open uncompressed tar archive name for reading or writing.
907 if len(mode
) > 1 or mode
not in "raw":
908 raise ValueError, "mode must be 'r', 'a' or 'w'"
909 return cls(name
, mode
, fileobj
)
911 taropen
= classmethod(taropen
)
913 def gzopen(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
914 """Open gzip compressed tar archive name for reading or writing.
915 Appending is not allowed.
917 if len(mode
) > 1 or mode
not in "rw":
918 raise ValueError, "mode must be 'r' or 'w'"
923 except (ImportError, AttributeError):
924 raise CompressionError
, "gzip module is not available"
926 pre
, ext
= os
.path
.splitext(name
)
927 pre
= os
.path
.basename(pre
)
935 fileobj
= file(name
, mode
+ "b")
941 t
= cls
.taropen(tarname
, mode
,
942 gzip
.GzipFile(name
, mode
, compresslevel
, fileobj
)
945 raise ReadError
, "not a gzip file"
946 t
._extfileobj
= False
949 gzopen
= classmethod(gzopen
)
951 def bz2open(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
952 """Open bzip2 compressed tar archive name for reading or writing.
953 Appending is not allowed.
955 if len(mode
) > 1 or mode
not in "rw":
956 raise ValueError, "mode must be 'r' or 'w'."
961 raise CompressionError
, "bz2 module is not available"
963 pre
, ext
= os
.path
.splitext(name
)
964 pre
= os
.path
.basename(pre
)
971 if fileobj
is not None:
972 raise ValueError, "no support for external file objects"
975 t
= cls
.taropen(tarname
, mode
, bz2
.BZ2File(name
, mode
, compresslevel
=compresslevel
))
977 raise ReadError
, "not a bzip2 file"
978 t
._extfileobj
= False
981 bz2open
= classmethod(bz2open
)
983 # All *open() methods are registered here.
985 "tar": "taropen", # uncompressed tar
986 "gz": "gzopen", # gzip compressed tar
987 "bz2": "bz2open" # bzip2 compressed tar
990 #--------------------------------------------------------------------------
991 # The public methods which TarFile provides:
994 """Close the TarFile. In write-mode, two finishing zero blocks are
995 appended to the archive.
1000 if self
._mode
in "aw":
1001 self
.fileobj
.write(NUL
* (BLOCKSIZE
* 2))
1002 self
.offset
+= (BLOCKSIZE
* 2)
1003 # fill up the end with zero-blocks
1004 # (like option -b20 for tar does)
1005 blocks
, remainder
= divmod(self
.offset
, RECORDSIZE
)
1007 self
.fileobj
.write(NUL
* (RECORDSIZE
- remainder
))
1009 if not self
._extfileobj
:
1010 self
.fileobj
.close()
1013 def getmember(self
, name
):
1014 """Return a TarInfo object for member `name'. If `name' can not be
1015 found in the archive, KeyError is raised. If a member occurs more
1016 than once in the archive, its last occurence is assumed to be the
1017 most up-to-date version.
1020 if name
not in self
.membernames
and not self
._loaded
:
1022 if name
not in self
.membernames
:
1023 raise KeyError, "filename %r not found" % name
1024 return self
._getmember
(name
)
1026 def getmembers(self
):
1027 """Return the members of the archive as a list of TarInfo objects. The
1028 list has the same order as the members in the archive.
1031 if not self
._loaded
: # if we want to obtain a list of
1032 self
._load
() # all members, we first have to
1033 # scan the whole archive.
1037 """Return the members of the archive as a list of their names. It has
1038 the same order as the list returned by getmembers().
1041 if not self
._loaded
:
1043 return self
.membernames
1045 def gettarinfo(self
, name
=None, arcname
=None, fileobj
=None):
1046 """Create a TarInfo object for either the file `name' or the file
1047 object `fileobj' (using os.fstat on its file descriptor). You can
1048 modify some of the TarInfo's attributes before you add it using
1049 addfile(). If given, `arcname' specifies an alternative name for the
1050 file in the archive.
1054 # When fileobj is given, replace name by
1055 # fileobj's real name.
1056 if fileobj
is not None:
1059 # Building the name of the member in the archive.
1060 # Backward slashes are converted to forward slashes,
1061 # Absolute paths are turned to relative paths.
1064 arcname
= normpath(arcname
)
1065 drv
, arcname
= os
.path
.splitdrive(arcname
)
1066 while arcname
[0:1] == "/":
1067 arcname
= arcname
[1:]
1069 # Now, fill the TarInfo object with
1070 # information specific for the file.
1073 # Use os.stat or os.lstat, depending on platform
1074 # and if symlinks shall be resolved.
1076 if hasattr(os
, "lstat") and not self
.dereference
:
1077 statres
= os
.lstat(name
)
1079 statres
= os
.stat(name
)
1081 statres
= os
.fstat(fileobj
.fileno())
1084 stmd
= statres
.st_mode
1085 if stat
.S_ISREG(stmd
):
1086 inode
= (statres
.st_ino
, statres
.st_dev
)
1087 if inode
in self
.inodes
and not self
.dereference
:
1088 # Is it a hardlink to an already
1091 linkname
= self
.inodes
[inode
]
1093 # The inode is added only if its valid.
1094 # For win32 it is always 0.
1097 self
.inodes
[inode
] = arcname
1098 elif stat
.S_ISDIR(stmd
):
1100 if arcname
[-1:] != "/":
1102 elif stat
.S_ISFIFO(stmd
):
1104 elif stat
.S_ISLNK(stmd
):
1106 linkname
= os
.readlink(name
)
1107 elif stat
.S_ISCHR(stmd
):
1109 elif stat
.S_ISBLK(stmd
):
1114 # Fill the TarInfo object with all
1115 # information we can get.
1116 tarinfo
.name
= arcname
1118 tarinfo
.uid
= statres
.st_uid
1119 tarinfo
.gid
= statres
.st_gid
1120 tarinfo
.size
= statres
.st_size
1121 tarinfo
.mtime
= statres
.st_mtime
1123 tarinfo
.linkname
= linkname
1126 tarinfo
.uname
= pwd
.getpwuid(tarinfo
.uid
)[0]
1131 tarinfo
.gname
= grp
.getgrgid(tarinfo
.gid
)[0]
1135 if type in (CHRTYPE
, BLKTYPE
):
1136 if hasattr(os
, "major") and hasattr(os
, "minor"):
1137 tarinfo
.devmajor
= os
.major(statres
.st_rdev
)
1138 tarinfo
.devminor
= os
.minor(statres
.st_rdev
)
1141 def list(self
, verbose
=True):
1142 """Print a table of contents to sys.stdout. If `verbose' is False, only
1143 the names of the members are printed. If it is True, an `ls -l'-like
1148 for tarinfo
in self
:
1150 print filemode(tarinfo
.mode
),
1151 print "%s/%s" % (tarinfo
.uname
or tarinfo
.uid
,
1152 tarinfo
.gname
or tarinfo
.gid
),
1153 if tarinfo
.ischr() or tarinfo
.isblk():
1154 print "%10s" % ("%d,%d" \
1155 % (tarinfo
.devmajor
, tarinfo
.devminor
)),
1157 print "%10d" % tarinfo
.size
,
1158 print "%d-%02d-%02d %02d:%02d:%02d" \
1159 % time
.localtime(tarinfo
.mtime
)[:6],
1165 print "->", tarinfo
.linkname
,
1167 print "link to", tarinfo
.linkname
,
1170 def add(self
, name
, arcname
=None, recursive
=True):
1171 """Add the file `name' to the archive. `name' may be any type of file
1172 (directory, fifo, symbolic link, etc.). If given, `arcname'
1173 specifies an alternative name for the file in the archive.
1174 Directories are added recursively by default. This can be avoided by
1175 setting `recursive' to False.
1182 # Skip if somebody tries to archive the archive...
1183 if self
.name
is not None \
1184 and os
.path
.abspath(name
) == os
.path
.abspath(self
.name
):
1185 self
._dbg
(2, "tarfile: Skipped %r" % name
)
1188 # Special case: The user wants to add the current
1189 # working directory.
1194 for f
in os
.listdir("."):
1195 self
.add(f
, os
.path
.join(arcname
, f
))
1200 # Create a TarInfo object from the file.
1201 tarinfo
= self
.gettarinfo(name
, arcname
)
1204 self
._dbg
(1, "tarfile: Unsupported type %r" % name
)
1207 # Append the tar header and data to the archive.
1209 f
= file(name
, "rb")
1210 self
.addfile(tarinfo
, f
)
1213 if tarinfo
.type in (LNKTYPE
, SYMTYPE
, FIFOTYPE
, CHRTYPE
, BLKTYPE
):
1215 self
.addfile(tarinfo
)
1218 self
.addfile(tarinfo
)
1220 for f
in os
.listdir(name
):
1221 self
.add(os
.path
.join(name
, f
), os
.path
.join(arcname
, f
))
1223 def addfile(self
, tarinfo
, fileobj
=None):
1224 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1225 given, tarinfo.size bytes are read from it and added to the archive.
1226 You can create TarInfo objects using gettarinfo().
1227 On Windows platforms, `fileobj' should always be opened with mode
1228 'rb' to avoid irritation about the file size.
1232 tarinfo
.name
= normpath(tarinfo
.name
)
1234 # directories should end with '/'
1237 if tarinfo
.linkname
:
1238 tarinfo
.linkname
= normpath(tarinfo
.linkname
)
1240 if tarinfo
.size
> MAXSIZE_MEMBER
:
1241 raise ValueError, "file is too large (>8GB)"
1243 if len(tarinfo
.linkname
) > LENGTH_LINK
:
1245 raise ValueError, "linkname is too long (>%d)" \
1248 self
._create
_gnulong
(tarinfo
.linkname
, GNUTYPE_LONGLINK
)
1249 tarinfo
.linkname
= tarinfo
.linkname
[:LENGTH_LINK
-1]
1250 self
._dbg
(2, "tarfile: Created GNU tar extension LONGLINK")
1252 if len(tarinfo
.name
) > LENGTH_NAME
:
1254 prefix
= tarinfo
.name
[:LENGTH_PREFIX
+ 1]
1255 while prefix
and prefix
[-1] != "/":
1256 prefix
= prefix
[:-1]
1258 name
= tarinfo
.name
[len(prefix
):]
1259 prefix
= prefix
[:-1]
1261 if not prefix
or len(name
) > LENGTH_NAME
:
1262 raise ValueError, "name is too long (>%d)" \
1266 tarinfo
.prefix
= prefix
1268 self
._create
_gnulong
(tarinfo
.name
, GNUTYPE_LONGNAME
)
1269 tarinfo
.name
= tarinfo
.name
[:LENGTH_NAME
- 1]
1270 self
._dbg
(2, "tarfile: Created GNU tar extension LONGNAME")
1272 self
.fileobj
.write(tarinfo
.tobuf())
1273 self
.offset
+= BLOCKSIZE
1275 # If there's data to follow, append it.
1276 if fileobj
is not None:
1277 copyfileobj(fileobj
, self
.fileobj
, tarinfo
.size
)
1278 blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
1280 self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
1282 self
.offset
+= blocks
* BLOCKSIZE
1284 self
.members
.append(tarinfo
)
1285 self
.membernames
.append(tarinfo
.name
)
1286 self
.chunks
.append(self
.offset
)
1288 def extract(self
, member
, path
=""):
1289 """Extract a member from the archive to the current working directory,
1290 using its full name. Its file information is extracted as accurately
1291 as possible. `member' may be a filename or a TarInfo object. You can
1292 specify a different directory using `path'.
1296 if isinstance(member
, TarInfo
):
1299 tarinfo
= self
.getmember(member
)
1302 self
._extract
_member
(tarinfo
, os
.path
.join(path
, tarinfo
.name
))
1303 except EnvironmentError, e
:
1304 if self
.errorlevel
> 0:
1307 if e
.filename
is None:
1308 self
._dbg
(1, "tarfile: %s" % e
.strerror
)
1310 self
._dbg
(1, "tarfile: %s %r" % (e
.strerror
, e
.filename
))
1311 except ExtractError
, e
:
1312 if self
.errorlevel
> 1:
1315 self
._dbg
(1, "tarfile: %s" % e
)
1317 def extractfile(self
, member
):
1318 """Extract a member from the archive as a file object. `member' may be
1319 a filename or a TarInfo object. If `member' is a regular file, a
1320 file-like object is returned. If `member' is a link, a file-like
1321 object is constructed from the link's target. If `member' is none of
1322 the above, None is returned.
1323 The file-like object is read-only and provides the following
1324 methods: read(), readline(), readlines(), seek() and tell()
1328 if isinstance(member
, TarInfo
):
1331 tarinfo
= self
.getmember(member
)
1334 return self
.fileobject(self
, tarinfo
)
1336 elif tarinfo
.type not in SUPPORTED_TYPES
:
1337 # If a member's type is unknown, it is treated as a
1339 return self
.fileobject(self
, tarinfo
)
1341 elif tarinfo
.islnk() or tarinfo
.issym():
1342 if isinstance(self
.fileobj
, _Stream
):
1343 # A small but ugly workaround for the case that someone tries
1344 # to extract a (sym)link as a file-object from a non-seekable
1345 # stream of tar blocks.
1346 raise StreamError
, "cannot extract (sym)link as file object"
1348 # A (sym)link's file object is it's target's file object.
1349 return self
.extractfile(self
._getmember
(tarinfo
.linkname
,
1352 # If there's no data associated with the member (directory, chrdev,
1353 # blkdev, etc.), return None instead of a file object.
1356 def _extract_member(self
, tarinfo
, targetpath
):
1357 """Extract the TarInfo object tarinfo to a physical
1358 file called targetpath.
1360 # Fetch the TarInfo object for the given name
1361 # and build the destination pathname, replacing
1362 # forward slashes to platform specific separators.
1363 if targetpath
[-1:] == "/":
1364 targetpath
= targetpath
[:-1]
1365 targetpath
= os
.path
.normpath(targetpath
)
1367 # Create all upper directories.
1368 upperdirs
= os
.path
.dirname(targetpath
)
1369 if upperdirs
and not os
.path
.exists(upperdirs
):
1374 ti
.mtime
= tarinfo
.mtime
1375 ti
.uid
= tarinfo
.uid
1376 ti
.gid
= tarinfo
.gid
1377 ti
.uname
= tarinfo
.uname
1378 ti
.gname
= tarinfo
.gname
1380 self
._extract
_member
(ti
, ti
.name
)
1384 if tarinfo
.islnk() or tarinfo
.issym():
1385 self
._dbg
(1, "%s -> %s" % (tarinfo
.name
, tarinfo
.linkname
))
1387 self
._dbg
(1, tarinfo
.name
)
1390 self
.makefile(tarinfo
, targetpath
)
1391 elif tarinfo
.isdir():
1392 self
.makedir(tarinfo
, targetpath
)
1393 elif tarinfo
.isfifo():
1394 self
.makefifo(tarinfo
, targetpath
)
1395 elif tarinfo
.ischr() or tarinfo
.isblk():
1396 self
.makedev(tarinfo
, targetpath
)
1397 elif tarinfo
.islnk() or tarinfo
.issym():
1398 self
.makelink(tarinfo
, targetpath
)
1399 elif tarinfo
.type not in SUPPORTED_TYPES
:
1400 self
.makeunknown(tarinfo
, targetpath
)
1402 self
.makefile(tarinfo
, targetpath
)
1404 self
.chown(tarinfo
, targetpath
)
1405 if not tarinfo
.issym():
1406 self
.chmod(tarinfo
, targetpath
)
1407 self
.utime(tarinfo
, targetpath
)
1409 #--------------------------------------------------------------------------
1410 # Below are the different file methods. They are called via
1411 # _extract_member() when extract() is called. They can be replaced in a
1412 # subclass to implement other functionality.
1414 def makedir(self
, tarinfo
, targetpath
):
1415 """Make a directory called targetpath.
1418 os
.mkdir(targetpath
)
1419 except EnvironmentError, e
:
1420 if e
.errno
!= errno
.EEXIST
:
1423 def makefile(self
, tarinfo
, targetpath
):
1424 """Make a file called targetpath.
1426 source
= self
.extractfile(tarinfo
)
1427 target
= file(targetpath
, "wb")
1428 copyfileobj(source
, target
)
1432 def makeunknown(self
, tarinfo
, targetpath
):
1433 """Make a file from a TarInfo object with an unknown type
1436 self
.makefile(tarinfo
, targetpath
)
1437 self
._dbg
(1, "tarfile: Unknown file type %r, " \
1438 "extracted as regular file." % tarinfo
.type)
1440 def makefifo(self
, tarinfo
, targetpath
):
1441 """Make a fifo called targetpath.
1443 if hasattr(os
, "mkfifo"):
1444 os
.mkfifo(targetpath
)
1446 raise ExtractError
, "fifo not supported by system"
1448 def makedev(self
, tarinfo
, targetpath
):
1449 """Make a character or block device called targetpath.
1451 if not hasattr(os
, "mknod") or not hasattr(os
, "makedev"):
1452 raise ExtractError
, "special devices not supported by system"
1456 mode |
= stat
.S_IFBLK
1458 mode |
= stat
.S_IFCHR
1460 os
.mknod(targetpath
, mode
,
1461 os
.makedev(tarinfo
.devmajor
, tarinfo
.devminor
))
1463 def makelink(self
, tarinfo
, targetpath
):
1464 """Make a (symbolic) link called targetpath. If it cannot be created
1465 (platform limitation), we try to make a copy of the referenced file
1468 linkpath
= tarinfo
.linkname
1471 os
.symlink(linkpath
, targetpath
)
1473 os
.link(linkpath
, targetpath
)
1474 except AttributeError:
1476 linkpath
= os
.path
.join(os
.path
.dirname(tarinfo
.name
),
1478 linkpath
= normpath(linkpath
)
1481 self
._extract
_member
(self
.getmember(linkpath
), targetpath
)
1482 except (EnvironmentError, KeyError), e
:
1483 linkpath
= os
.path
.normpath(linkpath
)
1485 shutil
.copy2(linkpath
, targetpath
)
1486 except EnvironmentError, e
:
1487 raise IOError, "link could not be created"
1489 def chown(self
, tarinfo
, targetpath
):
1490 """Set owner of targetpath according to tarinfo.
1492 if pwd
and hasattr(os
, "geteuid") and os
.geteuid() == 0:
1493 # We have to be root to do so.
1495 g
= grp
.getgrnam(tarinfo
.gname
)[2]
1498 g
= grp
.getgrgid(tarinfo
.gid
)[2]
1502 u
= pwd
.getpwnam(tarinfo
.uname
)[2]
1505 u
= pwd
.getpwuid(tarinfo
.uid
)[2]
1509 if tarinfo
.issym() and hasattr(os
, "lchown"):
1510 os
.lchown(targetpath
, u
, g
)
1512 if sys
.platform
!= "os2emx":
1513 os
.chown(targetpath
, u
, g
)
1514 except EnvironmentError, e
:
1515 raise ExtractError
, "could not change owner"
1517 def chmod(self
, tarinfo
, targetpath
):
1518 """Set file permissions of targetpath according to tarinfo.
1520 if hasattr(os
, 'chmod'):
1522 os
.chmod(targetpath
, tarinfo
.mode
)
1523 except EnvironmentError, e
:
1524 raise ExtractError
, "could not change mode"
1526 def utime(self
, tarinfo
, targetpath
):
1527 """Set modification time of targetpath according to tarinfo.
1529 if not hasattr(os
, 'utime'):
1531 if sys
.platform
== "win32" and tarinfo
.isdir():
1532 # According to msdn.microsoft.com, it is an error (EACCES)
1533 # to use utime() on directories.
1536 os
.utime(targetpath
, (tarinfo
.mtime
, tarinfo
.mtime
))
1537 except EnvironmentError, e
:
1538 raise ExtractError
, "could not change modification time"
1540 #--------------------------------------------------------------------------
1543 """Return the next member of the archive as a TarInfo object, when
1544 TarFile is opened for reading. Return None if there is no more
1548 if self
.firstmember
is not None:
1549 m
= self
.firstmember
1550 self
.firstmember
= None
1553 # Read the next block.
1554 self
.fileobj
.seek(self
.chunks
[-1])
1556 buf
= self
.fileobj
.read(BLOCKSIZE
)
1560 tarinfo
= TarInfo
.frombuf(buf
)
1562 if self
.ignore_zeros
:
1563 if buf
.count(NUL
) == BLOCKSIZE
:
1567 self
._dbg
(2, "0x%X: %s block" % (self
.offset
, adj
))
1568 self
.offset
+= BLOCKSIZE
1571 # Block is empty or unreadable.
1572 if self
.chunks
[-1] == 0:
1573 # If the first block is invalid. That does not
1574 # look like a tar archive we can handle.
1575 raise ReadError
,"empty, unreadable or compressed file"
1579 # We shouldn't rely on this checksum, because some tar programs
1580 # calculate it differently and it is merely validating the
1581 # header block. We could just as well skip this part, which would
1582 # have a slight effect on performance...
1583 if tarinfo
.chksum
!= calc_chksum(buf
):
1584 self
._dbg
(1, "tarfile: Bad Checksum %r" % tarinfo
.name
)
1586 # Set the TarInfo object's offset to the current position of the
1587 # TarFile and set self.offset to the position where the data blocks
1589 tarinfo
.offset
= self
.offset
1590 self
.offset
+= BLOCKSIZE
1592 # Check if the TarInfo object has a typeflag for which a callback
1593 # method is registered in the TYPE_METH. If so, then call it.
1594 if tarinfo
.type in self
.TYPE_METH
:
1595 tarinfo
= self
.TYPE_METH
[tarinfo
.type](self
, tarinfo
)
1597 tarinfo
.offset_data
= self
.offset
1598 if tarinfo
.isreg() or tarinfo
.type not in SUPPORTED_TYPES
:
1599 # Skip the following data blocks.
1600 self
.offset
+= self
._block
(tarinfo
.size
)
1602 if tarinfo
.isreg() and tarinfo
.name
[:-1] == "/":
1603 # some old tar programs don't know DIRTYPE
1604 tarinfo
.type = DIRTYPE
1606 self
.members
.append(tarinfo
)
1607 self
.membernames
.append(tarinfo
.name
)
1608 self
.chunks
.append(self
.offset
)
1611 #--------------------------------------------------------------------------
1612 # Below are some methods which are called for special typeflags in the
1613 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1614 # are registered in TYPE_METH below. You can register your own methods
1615 # with this mapping.
1616 # A registered method is called with a TarInfo object as only argument.
1618 # During its execution the method MUST perform the following tasks:
1619 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1620 # if there is data to follow.
1621 # 2. set self.offset to the position where the next member's header will
1623 # 3. return a valid TarInfo object.
1625 def proc_gnulong(self
, tarinfo
):
1626 """Evaluate the blocks that hold a GNU longname
1632 count
= tarinfo
.size
1634 block
= self
.fileobj
.read(BLOCKSIZE
)
1636 self
.offset
+= BLOCKSIZE
1639 if tarinfo
.type == GNUTYPE_LONGNAME
:
1641 if tarinfo
.type == GNUTYPE_LONGLINK
:
1644 buf
= self
.fileobj
.read(BLOCKSIZE
)
1646 tarinfo
= TarInfo
.frombuf(buf
)
1647 tarinfo
.offset
= self
.offset
1648 self
.offset
+= BLOCKSIZE
1649 tarinfo
.offset_data
= self
.offset
1650 tarinfo
.name
= name
or tarinfo
.name
1651 tarinfo
.linkname
= linkname
or tarinfo
.linkname
1653 if tarinfo
.isreg() or tarinfo
.type not in SUPPORTED_TYPES
:
1654 # Skip the following data blocks.
1655 self
.offset
+= self
._block
(tarinfo
.size
)
1658 def proc_sparse(self
, tarinfo
):
1659 """Analyze a GNU sparse header plus extra headers.
1661 buf
= tarinfo
.tobuf()
1666 # There are 4 possible sparse structs in the
1670 offset
= int(buf
[pos
:pos
+ 12], 8)
1671 numbytes
= int(buf
[pos
+ 12:pos
+ 24], 8)
1674 if offset
> lastpos
:
1675 sp
.append(_hole(lastpos
, offset
- lastpos
))
1676 sp
.append(_data(offset
, numbytes
, realpos
))
1678 lastpos
= offset
+ numbytes
1681 isextended
= ord(buf
[482])
1682 origsize
= int(buf
[483:495], 8)
1684 # If the isextended flag is given,
1685 # there are extra headers to process.
1686 while isextended
== 1:
1687 buf
= self
.fileobj
.read(BLOCKSIZE
)
1688 self
.offset
+= BLOCKSIZE
1690 for i
in xrange(21):
1692 offset
= int(buf
[pos
:pos
+ 12], 8)
1693 numbytes
= int(buf
[pos
+ 12:pos
+ 24], 8)
1696 if offset
> lastpos
:
1697 sp
.append(_hole(lastpos
, offset
- lastpos
))
1698 sp
.append(_data(offset
, numbytes
, realpos
))
1700 lastpos
= offset
+ numbytes
1702 isextended
= ord(buf
[504])
1704 if lastpos
< origsize
:
1705 sp
.append(_hole(lastpos
, origsize
- lastpos
))
1709 tarinfo
.offset_data
= self
.offset
1710 self
.offset
+= self
._block
(tarinfo
.size
)
1711 tarinfo
.size
= origsize
1714 # The type mapping for the next() method. The keys are single character
1715 # strings, the typeflag. The values are methods which are called when
1716 # next() encounters such a typeflag.
1718 GNUTYPE_LONGNAME
: proc_gnulong
,
1719 GNUTYPE_LONGLINK
: proc_gnulong
,
1720 GNUTYPE_SPARSE
: proc_sparse
1723 #--------------------------------------------------------------------------
1724 # Little helper methods:
1726 def _block(self
, count
):
1727 """Round up a byte count by BLOCKSIZE and return it,
1728 e.g. _block(834) => 1024.
1730 blocks
, remainder
= divmod(count
, BLOCKSIZE
)
1733 return blocks
* BLOCKSIZE
1735 def _getmember(self
, name
, tarinfo
=None):
1736 """Find an archive member by name from bottom to top.
1737 If tarinfo is given, it is used as the starting point.
1740 end
= len(self
.members
)
1742 end
= self
.members
.index(tarinfo
)
1744 for i
in xrange(end
- 1, -1, -1):
1745 if name
== self
.membernames
[i
]:
1746 return self
.members
[i
]
1749 """Read through the entire archive file and look for readable
1753 tarinfo
= self
.next()
1758 def _check(self
, mode
=None):
1759 """Check if TarFile is still open, and if the operation's mode
1760 corresponds to TarFile's mode.
1763 raise IOError, "%s is closed" % self
.__class
__.__name
__
1764 if mode
is not None and self
._mode
not in mode
:
1765 raise IOError, "bad operation for mode %r" % self
._mode
1768 """Provide an iterator object.
1771 return iter(self
.members
)
1773 return TarIter(self
)
1775 def _create_gnulong(self
, name
, type):
1776 """Write a GNU longname/longlink member to the TarFile.
1777 It consists of an extended tar header, with the length
1778 of the longname as size, followed by data blocks,
1779 which contain the longname as a null terminated string.
1782 tarinfo
.name
= "././@LongLink"
1785 tarinfo
.size
= len(name
)
1787 # write extended header
1788 self
.fileobj
.write(tarinfo
.tobuf())
1790 self
.fileobj
.write(name
)
1791 blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
1793 self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
1795 self
.offset
+= blocks
* BLOCKSIZE
1797 def _dbg(self
, level
, msg
):
1798 """Write debugging output to sys.stderr.
1800 if level
<= self
.debug
:
1801 print >> sys
.stderr
, msg
1807 for tarinfo in TarFile(...):
1811 def __init__(self
, tarfile
):
1812 """Construct a TarIter object.
1814 self
.tarfile
= tarfile
1816 """Return iterator object.
1820 """Return the next item using TarFile's next() method.
1821 When all members have been read, set TarFile as _loaded.
1823 tarinfo
= self
.tarfile
.next()
1825 self
.tarfile
._loaded
= True
1829 # Helper classes for sparse file support
1831 """Base class for _data and _hole.
1833 def __init__(self
, offset
, size
):
1834 self
.offset
= offset
1836 def __contains__(self
, offset
):
1837 return self
.offset
<= offset
< self
.offset
+ self
.size
1839 class _data(_section
):
1840 """Represent a data section in a sparse file.
1842 def __init__(self
, offset
, size
, realpos
):
1843 _section
.__init
__(self
, offset
, size
)
1844 self
.realpos
= realpos
1846 class _hole(_section
):
1847 """Represent a hole section in a sparse file.
1851 class _ringbuffer(list):
1852 """Ringbuffer class which increases performance
1853 over a regular list.
1857 def find(self
, offset
):
1864 if idx
== len(self
):
1872 #---------------------------------------------
1873 # zipfile compatible TarFile class
1874 #---------------------------------------------
1875 TAR_PLAIN
= 0 # zipfile.ZIP_STORED
1876 TAR_GZIPPED
= 8 # zipfile.ZIP_DEFLATED
1877 class TarFileCompat
:
1878 """TarFile class compatible with standard module zipfile's
1881 def __init__(self
, file, mode
="r", compression
=TAR_PLAIN
):
1882 if compression
== TAR_PLAIN
:
1883 self
.tarfile
= TarFile
.taropen(file, mode
)
1884 elif compression
== TAR_GZIPPED
:
1885 self
.tarfile
= TarFile
.gzopen(file, mode
)
1887 raise ValueError, "unknown compression constant"
1888 if mode
[0:1] == "r":
1889 members
= self
.tarfile
.getmembers()
1890 for i
in xrange(len(members
)):
1893 m
.file_size
= m
.size
1894 m
.date_time
= time
.gmtime(m
.mtime
)[:6]
1896 return map(lambda m
: m
.name
, self
.infolist())
1898 return filter(lambda m
: m
.type in REGULAR_TYPES
,
1899 self
.tarfile
.getmembers())
1904 def getinfo(self
, name
):
1905 return self
.tarfile
.getmember(name
)
1906 def read(self
, name
):
1907 return self
.tarfile
.extractfile(self
.tarfile
.getmember(name
)).read()
1908 def write(self
, filename
, arcname
=None, compress_type
=None):
1909 self
.tarfile
.add(filename
, arcname
)
1910 def writestr(self
, zinfo
, bytes
):
1913 zinfo
.name
= zinfo
.filename
1914 zinfo
.size
= zinfo
.file_size
1915 zinfo
.mtime
= calendar
.timegm(zinfo
.date_time
)
1916 self
.tarfile
.addfile(zinfo
, StringIO
.StringIO(bytes
))
1918 self
.tarfile
.close()
1919 #class TarFileCompat
1921 #--------------------
1922 # exported functions
1923 #--------------------
1924 def is_tarfile(name
):
1925 """Return True if name points to a tar archive that we
1926 are able to handle, else return False.