2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
33 __version__
= "$Revision$"
37 __author__
= "Lars Gustäbel (lars@gustaebel.de)"
40 __credits__
= "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
53 if sys
.platform
== 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
65 # from tarfile import *
66 __all__
= ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68 #---------------------------------------------------------
70 #---------------------------------------------------------
71 NUL
= "\0" # the null character
72 BLOCKSIZE
= 512 # length of processing blocks
73 RECORDSIZE
= BLOCKSIZE
* 20 # length of records
74 MAGIC
= "ustar" # magic tar string
75 VERSION
= "00" # version number
77 LENGTH_NAME
= 100 # maximum length of a filename
78 LENGTH_LINK
= 100 # maximum length of a linkname
79 LENGTH_PREFIX
= 155 # maximum length of the prefix field
80 MAXSIZE_MEMBER
= 077777777777L # maximum size of a file (11 octal digits)
82 REGTYPE
= "0" # regular file
83 AREGTYPE
= "\0" # regular file
84 LNKTYPE
= "1" # link (inside tarfile)
85 SYMTYPE
= "2" # symbolic link
86 CHRTYPE
= "3" # character special device
87 BLKTYPE
= "4" # block special device
88 DIRTYPE
= "5" # directory
89 FIFOTYPE
= "6" # fifo special device
90 CONTTYPE
= "7" # contiguous file
92 GNUTYPE_LONGNAME
= "L" # GNU tar extension for longnames
93 GNUTYPE_LONGLINK
= "K" # GNU tar extension for longlink
94 GNUTYPE_SPARSE
= "S" # GNU tar extension for sparse file
96 #---------------------------------------------------------
98 #---------------------------------------------------------
99 SUPPORTED_TYPES
= (REGTYPE
, AREGTYPE
, LNKTYPE
, # file types that tarfile
100 SYMTYPE
, DIRTYPE
, FIFOTYPE
, # can cope with.
101 CONTTYPE
, CHRTYPE
, BLKTYPE
,
102 GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
,
105 REGULAR_TYPES
= (REGTYPE
, AREGTYPE
, # file types that somehow
106 CONTTYPE
, GNUTYPE_SPARSE
) # represent regular files
108 #---------------------------------------------------------
109 # Bits used in the mode field, values in octal.
110 #---------------------------------------------------------
111 S_IFLNK
= 0120000 # symbolic link
112 S_IFREG
= 0100000 # regular file
113 S_IFBLK
= 0060000 # block device
114 S_IFDIR
= 0040000 # directory
115 S_IFCHR
= 0020000 # character device
116 S_IFIFO
= 0010000 # fifo
118 TSUID
= 04000 # set UID on execution
119 TSGID
= 02000 # set GID on execution
120 TSVTX
= 01000 # reserved
122 TUREAD
= 0400 # read by owner
123 TUWRITE
= 0200 # write by owner
124 TUEXEC
= 0100 # execute/search by owner
125 TGREAD
= 0040 # read by group
126 TGWRITE
= 0020 # write by group
127 TGEXEC
= 0010 # execute/search by group
128 TOREAD
= 0004 # read by other
129 TOWRITE
= 0002 # write by other
130 TOEXEC
= 0001 # execute/search by other
132 #---------------------------------------------------------
133 # Some useful functions
134 #---------------------------------------------------------
136 """Convert a null-terminated string buffer to a python string.
138 return s
.split(NUL
, 1)[0]
140 def calc_chksum(buf
):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
145 chk
= 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c
in buf
[:148]: chk
+= ord(c
) # sum up all bytes before chksum
148 for c
in buf
[156:]: chk
+= ord(c
) # sum up all bytes after chksum
151 def copyfileobj(src
, dst
, length
=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
158 shutil
.copyfileobj(src
, dst
)
162 blocks
, remainder
= divmod(length
, BUFSIZE
)
163 for b
in xrange(blocks
):
164 buf
= src
.read(BUFSIZE
)
165 if len(buf
) < BUFSIZE
:
166 raise IOError, "end of file reached"
170 buf
= src
.read(remainder
)
171 if len(buf
) < remainder
:
172 raise IOError, "end of file reached"
185 (TUEXEC
, "x", TSUID
, "S", TUEXEC|TSUID
, "s"),
188 (TGEXEC
, "x", TSGID
, "S", TGEXEC|TSGID
, "s"),
191 (TOEXEC
, "x", TSVTX
, "T", TOEXEC|TSVTX
, "t"))
194 """Convert a file's mode to a string of the form
196 Used by TarFile.list()
199 for t
in filemode_table
:
201 if mode
& t
[0] == t
[0]:
212 normpath
= lambda path
: os
.path
.normpath(path
).replace(os
.sep
, "/")
214 normpath
= os
.path
.normpath
216 class TarError(Exception):
217 """Base exception."""
219 class ExtractError(TarError
):
220 """General exception for extract errors."""
222 class ReadError(TarError
):
223 """Exception for unreadble tar archives."""
225 class CompressionError(TarError
):
226 """Exception for unavailable compression methods."""
228 class StreamError(TarError
):
229 """Exception for unsupported operations on stream-like TarFiles."""
232 #---------------------------
233 # internal stream interface
234 #---------------------------
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
241 def __init__(self
, name
, mode
):
244 "w": os
.O_WRONLY | os
.O_CREAT | os
.O_TRUNC
,
246 if hasattr(os
, "O_BINARY"):
248 self
.fd
= os
.open(name
, mode
)
253 def read(self
, size
):
254 return os
.read(self
.fd
, size
)
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
267 _Stream is intended to be used only internally.
270 def __init__(self
, name
, mode
, type, fileobj
, bufsize
):
271 """Construct a _Stream object.
273 self
._extfileobj
= True
275 fileobj
= _LowLevelFile(name
, mode
)
276 self
._extfileobj
= False
278 self
.name
= name
or ""
281 self
.fileobj
= fileobj
282 self
.bufsize
= bufsize
291 raise CompressionError
, "zlib module is not available"
293 self
.crc
= zlib
.crc32("")
297 self
._init
_write
_gz
()
303 raise CompressionError
, "bz2 module is not available"
306 self
.cmp = bz2
.BZ2Decompressor()
308 self
.cmp = bz2
.BZ2Compressor()
314 def _init_write_gz(self
):
315 """Initialize for writing with gzip compression.
317 self
.cmp = self
.zlib
.compressobj(9, self
.zlib
.DEFLATED
,
318 -self
.zlib
.MAX_WBITS
,
319 self
.zlib
.DEF_MEM_LEVEL
,
321 timestamp
= struct
.pack("<L", long(time
.time()))
322 self
.__write
("\037\213\010\010%s\002\377" % timestamp
)
323 if self
.name
.endswith(".gz"):
324 self
.name
= self
.name
[:-3]
325 self
.__write
(self
.name
+ NUL
)
328 """Write string s to the stream.
330 if self
.type == "gz":
331 self
.crc
= self
.zlib
.crc32(s
, self
.crc
)
333 if self
.type != "tar":
334 s
= self
.cmp.compress(s
)
337 def __write(self
, s
):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
342 while len(self
.buf
) > self
.bufsize
:
343 self
.fileobj
.write(self
.buf
[:self
.bufsize
])
344 self
.buf
= self
.buf
[self
.bufsize
:]
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
353 if self
.mode
== "w" and self
.buf
:
354 if self
.type != "tar":
355 self
.buf
+= self
.cmp.flush()
356 self
.fileobj
.write(self
.buf
)
358 if self
.type == "gz":
359 self
.fileobj
.write(struct
.pack("<l", self
.crc
))
360 self
.fileobj
.write(struct
.pack("<L", self
.pos
))
362 if not self
._extfileobj
:
367 def _init_read_gz(self
):
368 """Initialize for reading a gzip compressed fileobj.
370 self
.cmp = self
.zlib
.decompressobj(-self
.zlib
.MAX_WBITS
)
373 # taken from gzip.GzipFile with some alterations
374 if self
.__read
(2) != "\037\213":
375 raise ReadError
, "not a gzip file"
376 if self
.__read
(1) != "\010":
377 raise CompressionError
, "unsupported compression method"
379 flag
= ord(self
.__read
(1))
383 xlen
= ord(self
.__read
(1)) + 256 * ord(self
.__read
(1))
388 if not s
or s
== NUL
:
393 if not s
or s
== NUL
:
399 """Return the stream's file pointer position.
403 def seek(self
, pos
=0):
404 """Set the stream's file pointer to pos. Negative seeking
407 if pos
- self
.pos
>= 0:
408 blocks
, remainder
= divmod(pos
- self
.pos
, self
.bufsize
)
409 for i
in xrange(blocks
):
410 self
.read(self
.bufsize
)
413 raise StreamError
, "seeking backwards is not allowed"
416 def read(self
, size
=None):
417 """Return the next size number of bytes from the stream.
418 If size is not defined, return all bytes of the stream
424 buf
= self
._read
(self
.bufsize
)
430 buf
= self
._read
(size
)
434 def _read(self
, size
):
435 """Return size bytes from the stream.
437 if self
.type == "tar":
438 return self
.__read
(size
)
443 buf
= self
.__read
(self
.bufsize
)
446 buf
= self
.cmp.decompress(buf
)
453 def __read(self
, size
):
454 """Return size bytes from stream. If internal buffer is empty,
455 read another block from the stream.
460 buf
= self
.fileobj
.read(self
.bufsize
)
470 #------------------------
471 # Extraction file object
472 #------------------------
473 class ExFileObject(object):
474 """File-like object for reading an archive member.
475 Is returned by TarFile.extractfile(). Support for
476 sparse files included.
479 def __init__(self
, tarfile
, tarinfo
):
480 self
.fileobj
= tarfile
.fileobj
481 self
.name
= tarinfo
.name
484 self
.offset
= tarinfo
.offset_data
485 self
.size
= tarinfo
.size
488 if tarinfo
.issparse():
489 self
.sparse
= tarinfo
.sparse
490 self
.read
= self
._readsparse
492 self
.read
= self
._readnormal
494 def __read(self
, size
):
495 """Overloadable read method.
497 return self
.fileobj
.read(size
)
499 def readline(self
, size
=-1):
500 """Read a line with approx. size. If size is negative,
501 read a whole line. readline() and read() must not
507 nl
= self
.linebuffer
.find("\n")
511 size
-= len(self
.linebuffer
)
513 buf
= self
.read(min(size
, 100))
516 self
.linebuffer
+= buf
520 nl
= self
.linebuffer
.find("\n")
525 buf
= self
.linebuffer
[:nl
]
526 self
.linebuffer
= self
.linebuffer
[nl
+ 1:]
527 while buf
[-1:] == "\r":
532 """Return a list with all (following) lines.
536 line
= self
.readline()
541 def _readnormal(self
, size
=None):
542 """Read operation for regular files.
545 raise ValueError, "file is closed"
546 self
.fileobj
.seek(self
.offset
+ self
.pos
)
547 bytesleft
= self
.size
- self
.pos
549 bytestoread
= bytesleft
551 bytestoread
= min(size
, bytesleft
)
552 self
.pos
+= bytestoread
553 return self
.__read
(bytestoread
)
555 def _readsparse(self
, size
=None):
556 """Read operation for sparse files.
559 raise ValueError, "file is closed"
562 size
= self
.size
- self
.pos
566 buf
= self
._readsparsesection
(size
)
573 def _readsparsesection(self
, size
):
574 """Read a single section of a sparse file.
576 section
= self
.sparse
.find(self
.pos
)
581 toread
= min(size
, section
.offset
+ section
.size
- self
.pos
)
582 if isinstance(section
, _data
):
583 realpos
= section
.realpos
+ self
.pos
- section
.offset
585 self
.fileobj
.seek(self
.offset
+ realpos
)
586 return self
.__read
(toread
)
592 """Return the current file position.
596 def seek(self
, pos
, whence
=0):
597 """Seek to a position in the file.
601 self
.pos
= min(max(pos
, 0), self
.size
)
604 self
.pos
= max(self
.pos
+ pos
, 0)
606 self
.pos
= min(self
.pos
+ pos
, self
.size
)
608 self
.pos
= max(min(self
.size
+ pos
, self
.size
), 0)
611 """Close the file object.
619 class TarInfo(object):
620 """Informational class which holds the details about an
621 archive member given by a tar header block.
622 TarInfo objects are returned by TarFile.getmember(),
623 TarFile.getmembers() and TarFile.gettarinfo() and are
624 usually created internally.
627 def __init__(self
, name
=""):
628 """Construct a TarInfo object. name is the optional name
632 self
.name
= name
# member name (dirnames must end with '/')
633 self
.mode
= 0666 # file permissions
634 self
.uid
= 0 # user id
635 self
.gid
= 0 # group id
636 self
.size
= 0 # file size
637 self
.mtime
= 0 # modification time
638 self
.chksum
= 0 # header checksum
639 self
.type = REGTYPE
# member type
640 self
.linkname
= "" # link name
641 self
.uname
= "user" # user name
642 self
.gname
= "group" # group name
644 self
.devminor
= 0 #-for use with CHRTYPE and BLKTYPE
645 self
.prefix
= "" # prefix to filename or holding information
648 self
.offset
= 0 # the tar header starts here
649 self
.offset_data
= 0 # the file's data starts here
652 return "<%s %r at %#x>" % (self
.__class
__.__name
__,self
.name
,id(self
))
654 def frombuf(cls
, buf
):
655 """Construct a TarInfo object from a 512 byte string buffer.
658 tarinfo
.name
= nts(buf
[0:100])
659 tarinfo
.mode
= int(buf
[100:108], 8)
660 tarinfo
.uid
= int(buf
[108:116],8)
661 tarinfo
.gid
= int(buf
[116:124],8)
662 tarinfo
.size
= long(buf
[124:136], 8)
663 tarinfo
.mtime
= long(buf
[136:148], 8)
664 tarinfo
.chksum
= int(buf
[148:156], 8)
665 tarinfo
.type = buf
[156:157]
666 tarinfo
.linkname
= nts(buf
[157:257])
667 tarinfo
.uname
= nts(buf
[265:297])
668 tarinfo
.gname
= nts(buf
[297:329])
670 tarinfo
.devmajor
= int(buf
[329:337], 8)
671 tarinfo
.devminor
= int(buf
[337:345], 8)
673 tarinfo
.devmajor
= tarinfo
.devmajor
= 0
674 tarinfo
.prefix
= buf
[345:500]
676 # The prefix field is used for filenames > 100 in
677 # the POSIX standard.
678 # name = prefix + '/' + name
679 if tarinfo
.type != GNUTYPE_SPARSE
:
680 tarinfo
.name
= normpath(os
.path
.join(nts(tarinfo
.prefix
), tarinfo
.name
))
682 # Directory names should have a '/' at the end.
683 if tarinfo
.isdir() and tarinfo
.name
[-1:] != "/":
687 frombuf
= classmethod(frombuf
)
690 """Return a tar header block as a 512 byte string.
694 # The following code was contributed by Detlef Lannert.
696 for value
, fieldsize
in (
698 ("%07o" % (self
.mode
& 07777), 8),
699 ("%07o" % self
.uid
, 8),
700 ("%07o" % self
.gid
, 8),
701 ("%011o" % self
.size
, 12),
702 ("%011o" % self
.mtime
, 12),
705 (self
.linkname
, 100),
710 ("%07o" % self
.devmajor
, 8),
711 ("%07o" % self
.devminor
, 8),
715 parts
.append(value
+ (fieldsize
- l
) * NUL
)
718 chksum
= calc_chksum(buf
)
719 buf
= buf
[:148] + "%06o\0" % chksum
+ buf
[155:]
720 buf
+= (BLOCKSIZE
- len(buf
)) * NUL
725 return self
.type in REGULAR_TYPES
729 return self
.type == DIRTYPE
731 return self
.type == SYMTYPE
733 return self
.type == LNKTYPE
735 return self
.type == CHRTYPE
737 return self
.type == BLKTYPE
739 return self
.type == FIFOTYPE
741 return self
.type == GNUTYPE_SPARSE
743 return self
.type in (CHRTYPE
, BLKTYPE
, FIFOTYPE
)
746 class TarFile(object):
747 """The TarFile Class provides an interface to tar archives.
750 debug
= 0 # May be set from 0 (no msgs) to 3 (all msgs)
752 dereference
= False # If true, add content of linked file to the
753 # tar file, else the link.
755 ignore_zeros
= False # If true, skips empty or invalid blocks and
756 # continues processing.
758 errorlevel
= 0 # If 0, fatal errors only appear in debug
759 # messages (if debug >= 0). If > 0, errors
760 # are passed to the caller as exceptions.
762 posix
= True # If True, generates POSIX.1-1990-compliant
763 # archives (no GNU extensions!)
765 fileobject
= ExFileObject
767 def __init__(self
, name
=None, mode
="r", fileobj
=None):
768 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
769 read from an existing archive, 'a' to append data to an existing
770 file or 'w' to create a new file overwriting an existing one. `mode'
772 If `fileobj' is given, it is used for reading or writing data. If it
773 can be determined, `mode' is overridden by `fileobj's mode.
774 `fileobj' is not closed, when TarFile is closed.
778 if len(mode
) > 1 or mode
not in "raw":
779 raise ValueError, "mode must be 'r', 'a' or 'w'"
781 self
.mode
= {"r": "rb", "a": "r+b", "w": "wb"}[mode
]
784 fileobj
= file(self
.name
, self
.mode
)
785 self
._extfileobj
= False
787 if self
.name
is None and hasattr(fileobj
, "name"):
788 self
.name
= fileobj
.name
789 if hasattr(fileobj
, "mode"):
790 self
.mode
= fileobj
.mode
791 self
._extfileobj
= True
792 self
.fileobj
= fileobj
794 # Init datastructures
796 self
.members
= [] # list of members as TarInfo objects
797 self
.membernames
= [] # names of members
798 self
.chunks
= [0] # chunk cache
799 self
._loaded
= False # flag if all members have been read
800 self
.offset
= 0L # current position in the archive file
801 self
.inodes
= {} # dictionary caching the inodes of
802 # archive members already added
804 if self
._mode
== "r":
805 self
.firstmember
= None
806 self
.firstmember
= self
.next()
808 if self
._mode
== "a":
809 # Move to the end of the archive,
810 # before the first empty block.
811 self
.firstmember
= None
814 tarinfo
= self
.next()
819 self
.fileobj
.seek(- BLOCKSIZE
, 1)
822 if self
._mode
in "aw":
825 #--------------------------------------------------------------------------
826 # Below are the classmethods which act as alternate constructors to the
827 # TarFile class. The open() method is the only one that is needed for
828 # public use; it is the "super"-constructor and is able to select an
829 # adequate "sub"-constructor for a particular compression using the mapping
832 # This concept allows one to subclass TarFile without losing the comfort of
833 # the super-constructor. A sub-constructor is registered and made available
834 # by adding it to the mapping in OPEN_METH.
836 def open(cls
, name
=None, mode
="r", fileobj
=None, bufsize
=20*512):
837 """Open a tar archive for reading, writing or appending. Return
838 an appropriate TarFile class.
841 'r' open for reading with transparent compression
842 'r:' open for reading exclusively uncompressed
843 'r:gz' open for reading with gzip compression
844 'r:bz2' open for reading with bzip2 compression
845 'a' or 'a:' open for appending
846 'w' or 'w:' open for writing without compression
847 'w:gz' open for writing with gzip compression
848 'w:bz2' open for writing with bzip2 compression
849 'r|' open an uncompressed stream of tar blocks for reading
850 'r|gz' open a gzip compressed stream of tar blocks
851 'r|bz2' open a bzip2 compressed stream of tar blocks
852 'w|' open an uncompressed stream for writing
853 'w|gz' open a gzip compressed stream for writing
854 'w|bz2' open a bzip2 compressed stream for writing
857 if not name
and not fileobj
:
858 raise ValueError, "nothing to open"
861 filemode
, comptype
= mode
.split(":", 1)
862 filemode
= filemode
or "r"
863 comptype
= comptype
or "tar"
865 # Select the *open() function according to
867 if comptype
in cls
.OPEN_METH
:
868 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
870 raise CompressionError
, "unknown compression type %r" % comptype
871 return func(name
, filemode
, fileobj
)
874 filemode
, comptype
= mode
.split("|", 1)
875 filemode
= filemode
or "r"
876 comptype
= comptype
or "tar"
878 if filemode
not in "rw":
879 raise ValueError, "mode must be 'r' or 'w'"
881 t
= cls(name
, filemode
,
882 _Stream(name
, filemode
, comptype
, fileobj
, bufsize
))
883 t
._extfileobj
= False
887 # Find out which *open() is appropriate for opening the file.
888 for comptype
in cls
.OPEN_METH
:
889 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
891 return func(name
, "r", fileobj
)
892 except (ReadError
, CompressionError
):
894 raise ReadError
, "file could not be opened successfully"
897 return cls
.taropen(name
, mode
, fileobj
)
899 raise ValueError, "undiscernible mode"
901 open = classmethod(open)
903 def taropen(cls
, name
, mode
="r", fileobj
=None):
904 """Open uncompressed tar archive name for reading or writing.
906 if len(mode
) > 1 or mode
not in "raw":
907 raise ValueError, "mode must be 'r', 'a' or 'w'"
908 return cls(name
, mode
, fileobj
)
910 taropen
= classmethod(taropen
)
912 def gzopen(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
913 """Open gzip compressed tar archive name for reading or writing.
914 Appending is not allowed.
916 if len(mode
) > 1 or mode
not in "rw":
917 raise ValueError, "mode must be 'r' or 'w'"
922 except (ImportError, AttributeError):
923 raise CompressionError
, "gzip module is not available"
925 pre
, ext
= os
.path
.splitext(name
)
926 pre
= os
.path
.basename(pre
)
934 fileobj
= file(name
, mode
+ "b")
940 t
= cls
.taropen(tarname
, mode
,
941 gzip
.GzipFile(name
, mode
, compresslevel
, fileobj
)
944 raise ReadError
, "not a gzip file"
945 t
._extfileobj
= False
948 gzopen
= classmethod(gzopen
)
950 def bz2open(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
951 """Open bzip2 compressed tar archive name for reading or writing.
952 Appending is not allowed.
954 if len(mode
) > 1 or mode
not in "rw":
955 raise ValueError, "mode must be 'r' or 'w'."
960 raise CompressionError
, "bz2 module is not available"
962 pre
, ext
= os
.path
.splitext(name
)
963 pre
= os
.path
.basename(pre
)
970 if fileobj
is not None:
971 raise ValueError, "no support for external file objects"
974 t
= cls
.taropen(tarname
, mode
, bz2
.BZ2File(name
, mode
, compresslevel
=compresslevel
))
976 raise ReadError
, "not a bzip2 file"
977 t
._extfileobj
= False
980 bz2open
= classmethod(bz2open
)
982 # All *open() methods are registered here.
984 "tar": "taropen", # uncompressed tar
985 "gz": "gzopen", # gzip compressed tar
986 "bz2": "bz2open" # bzip2 compressed tar
989 #--------------------------------------------------------------------------
990 # The public methods which TarFile provides:
993 """Close the TarFile. In write-mode, two finishing zero blocks are
994 appended to the archive.
999 if self
._mode
in "aw":
1000 self
.fileobj
.write(NUL
* (BLOCKSIZE
* 2))
1001 self
.offset
+= (BLOCKSIZE
* 2)
1002 # fill up the end with zero-blocks
1003 # (like option -b20 for tar does)
1004 blocks
, remainder
= divmod(self
.offset
, RECORDSIZE
)
1006 self
.fileobj
.write(NUL
* (RECORDSIZE
- remainder
))
1008 if not self
._extfileobj
:
1009 self
.fileobj
.close()
1012 def getmember(self
, name
):
1013 """Return a TarInfo object for member `name'. If `name' can not be
1014 found in the archive, KeyError is raised. If a member occurs more
1015 than once in the archive, its last occurence is assumed to be the
1016 most up-to-date version.
1019 if name
not in self
.membernames
and not self
._loaded
:
1021 if name
not in self
.membernames
:
1022 raise KeyError, "filename %r not found" % name
1023 return self
._getmember
(name
)
1025 def getmembers(self
):
1026 """Return the members of the archive as a list of TarInfo objects. The
1027 list has the same order as the members in the archive.
1030 if not self
._loaded
: # if we want to obtain a list of
1031 self
._load
() # all members, we first have to
1032 # scan the whole archive.
1036 """Return the members of the archive as a list of their names. It has
1037 the same order as the list returned by getmembers().
1040 if not self
._loaded
:
1042 return self
.membernames
1044 def gettarinfo(self
, name
=None, arcname
=None, fileobj
=None):
1045 """Create a TarInfo object for either the file `name' or the file
1046 object `fileobj' (using os.fstat on its file descriptor). You can
1047 modify some of the TarInfo's attributes before you add it using
1048 addfile(). If given, `arcname' specifies an alternative name for the
1049 file in the archive.
1053 # When fileobj is given, replace name by
1054 # fileobj's real name.
1055 if fileobj
is not None:
1058 # Building the name of the member in the archive.
1059 # Backward slashes are converted to forward slashes,
1060 # Absolute paths are turned to relative paths.
1063 arcname
= normpath(arcname
)
1064 drv
, arcname
= os
.path
.splitdrive(arcname
)
1065 while arcname
[0:1] == "/":
1066 arcname
= arcname
[1:]
1068 # Now, fill the TarInfo object with
1069 # information specific for the file.
1072 # Use os.stat or os.lstat, depending on platform
1073 # and if symlinks shall be resolved.
1075 if hasattr(os
, "lstat") and not self
.dereference
:
1076 statres
= os
.lstat(name
)
1078 statres
= os
.stat(name
)
1080 statres
= os
.fstat(fileobj
.fileno())
1083 stmd
= statres
.st_mode
1084 if stat
.S_ISREG(stmd
):
1085 inode
= (statres
.st_ino
, statres
.st_dev
)
1086 if inode
in self
.inodes
and not self
.dereference
:
1087 # Is it a hardlink to an already
1090 linkname
= self
.inodes
[inode
]
1092 # The inode is added only if its valid.
1093 # For win32 it is always 0.
1096 self
.inodes
[inode
] = arcname
1097 elif stat
.S_ISDIR(stmd
):
1099 if arcname
[-1:] != "/":
1101 elif stat
.S_ISFIFO(stmd
):
1103 elif stat
.S_ISLNK(stmd
):
1105 linkname
= os
.readlink(name
)
1106 elif stat
.S_ISCHR(stmd
):
1108 elif stat
.S_ISBLK(stmd
):
1113 # Fill the TarInfo object with all
1114 # information we can get.
1115 tarinfo
.name
= arcname
1117 tarinfo
.uid
= statres
.st_uid
1118 tarinfo
.gid
= statres
.st_gid
1119 tarinfo
.size
= statres
.st_size
1120 tarinfo
.mtime
= statres
.st_mtime
1122 tarinfo
.linkname
= linkname
1125 tarinfo
.uname
= pwd
.getpwuid(tarinfo
.uid
)[0]
1130 tarinfo
.gname
= grp
.getgrgid(tarinfo
.gid
)[0]
1134 if type in (CHRTYPE
, BLKTYPE
):
1135 if hasattr(os
, "major") and hasattr(os
, "minor"):
1136 tarinfo
.devmajor
= os
.major(statres
.st_rdev
)
1137 tarinfo
.devminor
= os
.minor(statres
.st_rdev
)
1140 def list(self
, verbose
=True):
1141 """Print a table of contents to sys.stdout. If `verbose' is False, only
1142 the names of the members are printed. If it is True, an `ls -l'-like
1147 for tarinfo
in self
:
1149 print filemode(tarinfo
.mode
),
1150 print "%s/%s" % (tarinfo
.uname
or tarinfo
.uid
,
1151 tarinfo
.gname
or tarinfo
.gid
),
1152 if tarinfo
.ischr() or tarinfo
.isblk():
1153 print "%10s" % ("%d,%d" \
1154 % (tarinfo
.devmajor
, tarinfo
.devminor
)),
1156 print "%10d" % tarinfo
.size
,
1157 print "%d-%02d-%02d %02d:%02d:%02d" \
1158 % time
.localtime(tarinfo
.mtime
)[:6],
1164 print "->", tarinfo
.linkname
,
1166 print "link to", tarinfo
.linkname
,
1169 def add(self
, name
, arcname
=None, recursive
=True):
1170 """Add the file `name' to the archive. `name' may be any type of file
1171 (directory, fifo, symbolic link, etc.). If given, `arcname'
1172 specifies an alternative name for the file in the archive.
1173 Directories are added recursively by default. This can be avoided by
1174 setting `recursive' to False.
1181 # Skip if somebody tries to archive the archive...
1182 if self
.name
is not None \
1183 and os
.path
.abspath(name
) == os
.path
.abspath(self
.name
):
1184 self
._dbg
(2, "tarfile: Skipped %r" % name
)
1187 # Special case: The user wants to add the current
1188 # working directory.
1193 for f
in os
.listdir("."):
1194 self
.add(f
, os
.path
.join(arcname
, f
))
1199 # Create a TarInfo object from the file.
1200 tarinfo
= self
.gettarinfo(name
, arcname
)
1203 self
._dbg
(1, "tarfile: Unsupported type %r" % name
)
1206 # Append the tar header and data to the archive.
1208 f
= file(name
, "rb")
1209 self
.addfile(tarinfo
, f
)
1212 if tarinfo
.type in (LNKTYPE
, SYMTYPE
, FIFOTYPE
, CHRTYPE
, BLKTYPE
):
1214 self
.addfile(tarinfo
)
1217 self
.addfile(tarinfo
)
1219 for f
in os
.listdir(name
):
1220 self
.add(os
.path
.join(name
, f
), os
.path
.join(arcname
, f
))
1222 def addfile(self
, tarinfo
, fileobj
=None):
1223 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1224 given, tarinfo.size bytes are read from it and added to the archive.
1225 You can create TarInfo objects using gettarinfo().
1226 On Windows platforms, `fileobj' should always be opened with mode
1227 'rb' to avoid irritation about the file size.
1231 tarinfo
.name
= normpath(tarinfo
.name
)
1233 # directories should end with '/'
1236 if tarinfo
.linkname
:
1237 tarinfo
.linkname
= normpath(tarinfo
.linkname
)
1239 if tarinfo
.size
> MAXSIZE_MEMBER
:
1240 raise ValueError, "file is too large (>8GB)"
1242 if len(tarinfo
.linkname
) > LENGTH_LINK
:
1244 raise ValueError, "linkname is too long (>%d)" \
1247 self
._create
_gnulong
(tarinfo
.linkname
, GNUTYPE_LONGLINK
)
1248 tarinfo
.linkname
= tarinfo
.linkname
[:LENGTH_LINK
-1]
1249 self
._dbg
(2, "tarfile: Created GNU tar extension LONGLINK")
1251 if len(tarinfo
.name
) > LENGTH_NAME
:
1253 prefix
= tarinfo
.name
[:LENGTH_PREFIX
+ 1]
1254 while prefix
and prefix
[-1] != "/":
1255 prefix
= prefix
[:-1]
1257 name
= tarinfo
.name
[len(prefix
):]
1258 prefix
= prefix
[:-1]
1260 if not prefix
or len(name
) > LENGTH_NAME
:
1261 raise ValueError, "name is too long (>%d)" \
1265 tarinfo
.prefix
= prefix
1267 self
._create
_gnulong
(tarinfo
.name
, GNUTYPE_LONGNAME
)
1268 tarinfo
.name
= tarinfo
.name
[:LENGTH_NAME
- 1]
1269 self
._dbg
(2, "tarfile: Created GNU tar extension LONGNAME")
1271 self
.fileobj
.write(tarinfo
.tobuf())
1272 self
.offset
+= BLOCKSIZE
1274 # If there's data to follow, append it.
1275 if fileobj
is not None:
1276 copyfileobj(fileobj
, self
.fileobj
, tarinfo
.size
)
1277 blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
1279 self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
1281 self
.offset
+= blocks
* BLOCKSIZE
1283 self
.members
.append(tarinfo
)
1284 self
.membernames
.append(tarinfo
.name
)
1285 self
.chunks
.append(self
.offset
)
1287 def extract(self
, member
, path
=""):
1288 """Extract a member from the archive to the current working directory,
1289 using its full name. Its file information is extracted as accurately
1290 as possible. `member' may be a filename or a TarInfo object. You can
1291 specify a different directory using `path'.
1295 if isinstance(member
, TarInfo
):
1298 tarinfo
= self
.getmember(member
)
1301 self
._extract
_member
(tarinfo
, os
.path
.join(path
, tarinfo
.name
))
1302 except EnvironmentError, e
:
1303 if self
.errorlevel
> 0:
1306 if e
.filename
is None:
1307 self
._dbg
(1, "tarfile: %s" % e
.strerror
)
1309 self
._dbg
(1, "tarfile: %s %r" % (e
.strerror
, e
.filename
))
1310 except ExtractError
, e
:
1311 if self
.errorlevel
> 1:
1314 self
._dbg
(1, "tarfile: %s" % e
)
1316 def extractfile(self
, member
):
1317 """Extract a member from the archive as a file object. `member' may be
1318 a filename or a TarInfo object. If `member' is a regular file, a
1319 file-like object is returned. If `member' is a link, a file-like
1320 object is constructed from the link's target. If `member' is none of
1321 the above, None is returned.
1322 The file-like object is read-only and provides the following
1323 methods: read(), readline(), readlines(), seek() and tell()
1327 if isinstance(member
, TarInfo
):
1330 tarinfo
= self
.getmember(member
)
1333 return self
.fileobject(self
, tarinfo
)
1335 elif tarinfo
.type not in SUPPORTED_TYPES
:
1336 # If a member's type is unknown, it is treated as a
1338 return self
.fileobject(self
, tarinfo
)
1340 elif tarinfo
.islnk() or tarinfo
.issym():
1341 if isinstance(self
.fileobj
, _Stream
):
1342 # A small but ugly workaround for the case that someone tries
1343 # to extract a (sym)link as a file-object from a non-seekable
1344 # stream of tar blocks.
1345 raise StreamError
, "cannot extract (sym)link as file object"
1347 # A (sym)link's file object is it's target's file object.
1348 return self
.extractfile(self
._getmember
(tarinfo
.linkname
,
1351 # If there's no data associated with the member (directory, chrdev,
1352 # blkdev, etc.), return None instead of a file object.
1355 def _extract_member(self
, tarinfo
, targetpath
):
1356 """Extract the TarInfo object tarinfo to a physical
1357 file called targetpath.
1359 # Fetch the TarInfo object for the given name
1360 # and build the destination pathname, replacing
1361 # forward slashes to platform specific separators.
1362 if targetpath
[-1:] == "/":
1363 targetpath
= targetpath
[:-1]
1364 targetpath
= os
.path
.normpath(targetpath
)
1366 # Create all upper directories.
1367 upperdirs
= os
.path
.dirname(targetpath
)
1368 if upperdirs
and not os
.path
.exists(upperdirs
):
1373 ti
.mtime
= tarinfo
.mtime
1374 ti
.uid
= tarinfo
.uid
1375 ti
.gid
= tarinfo
.gid
1376 ti
.uname
= tarinfo
.uname
1377 ti
.gname
= tarinfo
.gname
1379 self
._extract
_member
(ti
, ti
.name
)
1383 if tarinfo
.islnk() or tarinfo
.issym():
1384 self
._dbg
(1, "%s -> %s" % (tarinfo
.name
, tarinfo
.linkname
))
1386 self
._dbg
(1, tarinfo
.name
)
1389 self
.makefile(tarinfo
, targetpath
)
1390 elif tarinfo
.isdir():
1391 self
.makedir(tarinfo
, targetpath
)
1392 elif tarinfo
.isfifo():
1393 self
.makefifo(tarinfo
, targetpath
)
1394 elif tarinfo
.ischr() or tarinfo
.isblk():
1395 self
.makedev(tarinfo
, targetpath
)
1396 elif tarinfo
.islnk() or tarinfo
.issym():
1397 self
.makelink(tarinfo
, targetpath
)
1398 elif tarinfo
.type not in SUPPORTED_TYPES
:
1399 self
.makeunknown(tarinfo
, targetpath
)
1401 self
.makefile(tarinfo
, targetpath
)
1403 self
.chown(tarinfo
, targetpath
)
1404 if not tarinfo
.issym():
1405 self
.chmod(tarinfo
, targetpath
)
1406 self
.utime(tarinfo
, targetpath
)
1408 #--------------------------------------------------------------------------
1409 # Below are the different file methods. They are called via
1410 # _extract_member() when extract() is called. They can be replaced in a
1411 # subclass to implement other functionality.
1413 def makedir(self
, tarinfo
, targetpath
):
1414 """Make a directory called targetpath.
1417 os
.mkdir(targetpath
)
1418 except EnvironmentError, e
:
1419 if e
.errno
!= errno
.EEXIST
:
1422 def makefile(self
, tarinfo
, targetpath
):
1423 """Make a file called targetpath.
1425 source
= self
.extractfile(tarinfo
)
1426 target
= file(targetpath
, "wb")
1427 copyfileobj(source
, target
)
1431 def makeunknown(self
, tarinfo
, targetpath
):
1432 """Make a file from a TarInfo object with an unknown type
1435 self
.makefile(tarinfo
, targetpath
)
1436 self
._dbg
(1, "tarfile: Unknown file type %r, " \
1437 "extracted as regular file." % tarinfo
.type)
1439 def makefifo(self
, tarinfo
, targetpath
):
1440 """Make a fifo called targetpath.
1442 if hasattr(os
, "mkfifo"):
1443 os
.mkfifo(targetpath
)
1445 raise ExtractError
, "fifo not supported by system"
1447 def makedev(self
, tarinfo
, targetpath
):
1448 """Make a character or block device called targetpath.
1450 if not hasattr(os
, "mknod") or not hasattr(os
, "makedev"):
1451 raise ExtractError
, "special devices not supported by system"
1455 mode |
= stat
.S_IFBLK
1457 mode |
= stat
.S_IFCHR
1459 os
.mknod(targetpath
, mode
,
1460 os
.makedev(tarinfo
.devmajor
, tarinfo
.devminor
))
1462 def makelink(self
, tarinfo
, targetpath
):
1463 """Make a (symbolic) link called targetpath. If it cannot be created
1464 (platform limitation), we try to make a copy of the referenced file
1467 linkpath
= tarinfo
.linkname
1470 os
.symlink(linkpath
, targetpath
)
1472 os
.link(linkpath
, targetpath
)
1473 except AttributeError:
1475 linkpath
= os
.path
.join(os
.path
.dirname(tarinfo
.name
),
1477 linkpath
= normpath(linkpath
)
1480 self
._extract
_member
(self
.getmember(linkpath
), targetpath
)
1481 except (EnvironmentError, KeyError), e
:
1482 linkpath
= os
.path
.normpath(linkpath
)
1484 shutil
.copy2(linkpath
, targetpath
)
1485 except EnvironmentError, e
:
1486 raise IOError, "link could not be created"
1488 def chown(self
, tarinfo
, targetpath
):
1489 """Set owner of targetpath according to tarinfo.
1491 if pwd
and hasattr(os
, "geteuid") and os
.geteuid() == 0:
1492 # We have to be root to do so.
1494 g
= grp
.getgrnam(tarinfo
.gname
)[2]
1497 g
= grp
.getgrgid(tarinfo
.gid
)[2]
1501 u
= pwd
.getpwnam(tarinfo
.uname
)[2]
1504 u
= pwd
.getpwuid(tarinfo
.uid
)[2]
1508 if tarinfo
.issym() and hasattr(os
, "lchown"):
1509 os
.lchown(targetpath
, u
, g
)
1511 if sys
.platform
!= "os2emx":
1512 os
.chown(targetpath
, u
, g
)
1513 except EnvironmentError, e
:
1514 raise ExtractError
, "could not change owner"
1516 def chmod(self
, tarinfo
, targetpath
):
1517 """Set file permissions of targetpath according to tarinfo.
1519 if hasattr(os
, 'chmod'):
1521 os
.chmod(targetpath
, tarinfo
.mode
)
1522 except EnvironmentError, e
:
1523 raise ExtractError
, "could not change mode"
1525 def utime(self
, tarinfo
, targetpath
):
1526 """Set modification time of targetpath according to tarinfo.
1528 if not hasattr(os
, 'utime'):
1530 if sys
.platform
== "win32" and tarinfo
.isdir():
1531 # According to msdn.microsoft.com, it is an error (EACCES)
1532 # to use utime() on directories.
1535 os
.utime(targetpath
, (tarinfo
.mtime
, tarinfo
.mtime
))
1536 except EnvironmentError, e
:
1537 raise ExtractError
, "could not change modification time"
1539 #--------------------------------------------------------------------------
1542 """Return the next member of the archive as a TarInfo object, when
1543 TarFile is opened for reading. Return None if there is no more
1547 if self
.firstmember
is not None:
1548 m
= self
.firstmember
1549 self
.firstmember
= None
1552 # Read the next block.
1553 self
.fileobj
.seek(self
.chunks
[-1])
1555 buf
= self
.fileobj
.read(BLOCKSIZE
)
1559 tarinfo
= TarInfo
.frombuf(buf
)
1561 if self
.ignore_zeros
:
1562 if buf
.count(NUL
) == BLOCKSIZE
:
1566 self
._dbg
(2, "0x%X: %s block" % (self
.offset
, adj
))
1567 self
.offset
+= BLOCKSIZE
1570 # Block is empty or unreadable.
1571 if self
.chunks
[-1] == 0:
1572 # If the first block is invalid. That does not
1573 # look like a tar archive we can handle.
1574 raise ReadError
,"empty, unreadable or compressed file"
1578 # We shouldn't rely on this checksum, because some tar programs
1579 # calculate it differently and it is merely validating the
1580 # header block. We could just as well skip this part, which would
1581 # have a slight effect on performance...
1582 if tarinfo
.chksum
!= calc_chksum(buf
):
1583 self
._dbg
(1, "tarfile: Bad Checksum %r" % tarinfo
.name
)
1585 # Set the TarInfo object's offset to the current position of the
1586 # TarFile and set self.offset to the position where the data blocks
1588 tarinfo
.offset
= self
.offset
1589 self
.offset
+= BLOCKSIZE
1591 # Check if the TarInfo object has a typeflag for which a callback
1592 # method is registered in the TYPE_METH. If so, then call it.
1593 if tarinfo
.type in self
.TYPE_METH
:
1594 tarinfo
= self
.TYPE_METH
[tarinfo
.type](self
, tarinfo
)
1596 tarinfo
.offset_data
= self
.offset
1597 if tarinfo
.isreg() or tarinfo
.type not in SUPPORTED_TYPES
:
1598 # Skip the following data blocks.
1599 self
.offset
+= self
._block
(tarinfo
.size
)
1601 if tarinfo
.isreg() and tarinfo
.name
[:-1] == "/":
1602 # some old tar programs don't know DIRTYPE
1603 tarinfo
.type = DIRTYPE
1605 self
.members
.append(tarinfo
)
1606 self
.membernames
.append(tarinfo
.name
)
1607 self
.chunks
.append(self
.offset
)
1610 #--------------------------------------------------------------------------
1611 # Below are some methods which are called for special typeflags in the
1612 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1613 # are registered in TYPE_METH below. You can register your own methods
1614 # with this mapping.
1615 # A registered method is called with a TarInfo object as only argument.
1617 # During its execution the method MUST perform the following tasks:
1618 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1619 # if there is data to follow.
1620 # 2. set self.offset to the position where the next member's header will
1622 # 3. return a valid TarInfo object.
1624 def proc_gnulong(self
, tarinfo
):
1625 """Evaluate the blocks that hold a GNU longname
1631 count
= tarinfo
.size
1633 block
= self
.fileobj
.read(BLOCKSIZE
)
1635 self
.offset
+= BLOCKSIZE
1638 if tarinfo
.type == GNUTYPE_LONGNAME
:
1640 if tarinfo
.type == GNUTYPE_LONGLINK
:
1643 buf
= self
.fileobj
.read(BLOCKSIZE
)
1645 tarinfo
= TarInfo
.frombuf(buf
)
1646 tarinfo
.offset
= self
.offset
1647 self
.offset
+= BLOCKSIZE
1648 tarinfo
.offset_data
= self
.offset
1649 tarinfo
.name
= name
or tarinfo
.name
1650 tarinfo
.linkname
= linkname
or tarinfo
.linkname
1652 if tarinfo
.isreg() or tarinfo
.type not in SUPPORTED_TYPES
:
1653 # Skip the following data blocks.
1654 self
.offset
+= self
._block
(tarinfo
.size
)
1657 def proc_sparse(self
, tarinfo
):
1658 """Analyze a GNU sparse header plus extra headers.
1660 buf
= tarinfo
.tobuf()
1665 # There are 4 possible sparse structs in the
1669 offset
= int(buf
[pos
:pos
+ 12], 8)
1670 numbytes
= int(buf
[pos
+ 12:pos
+ 24], 8)
1673 if offset
> lastpos
:
1674 sp
.append(_hole(lastpos
, offset
- lastpos
))
1675 sp
.append(_data(offset
, numbytes
, realpos
))
1677 lastpos
= offset
+ numbytes
1680 isextended
= ord(buf
[482])
1681 origsize
= int(buf
[483:495], 8)
1683 # If the isextended flag is given,
1684 # there are extra headers to process.
1685 while isextended
== 1:
1686 buf
= self
.fileobj
.read(BLOCKSIZE
)
1687 self
.offset
+= BLOCKSIZE
1689 for i
in xrange(21):
1691 offset
= int(buf
[pos
:pos
+ 12], 8)
1692 numbytes
= int(buf
[pos
+ 12:pos
+ 24], 8)
1695 if offset
> lastpos
:
1696 sp
.append(_hole(lastpos
, offset
- lastpos
))
1697 sp
.append(_data(offset
, numbytes
, realpos
))
1699 lastpos
= offset
+ numbytes
1701 isextended
= ord(buf
[504])
1703 if lastpos
< origsize
:
1704 sp
.append(_hole(lastpos
, origsize
- lastpos
))
1708 tarinfo
.offset_data
= self
.offset
1709 self
.offset
+= self
._block
(tarinfo
.size
)
1710 tarinfo
.size
= origsize
1713 # The type mapping for the next() method. The keys are single character
1714 # strings, the typeflag. The values are methods which are called when
1715 # next() encounters such a typeflag.
1717 GNUTYPE_LONGNAME
: proc_gnulong
,
1718 GNUTYPE_LONGLINK
: proc_gnulong
,
1719 GNUTYPE_SPARSE
: proc_sparse
1722 #--------------------------------------------------------------------------
1723 # Little helper methods:
1725 def _block(self
, count
):
1726 """Round up a byte count by BLOCKSIZE and return it,
1727 e.g. _block(834) => 1024.
1729 blocks
, remainder
= divmod(count
, BLOCKSIZE
)
1732 return blocks
* BLOCKSIZE
1734 def _getmember(self
, name
, tarinfo
=None):
1735 """Find an archive member by name from bottom to top.
1736 If tarinfo is given, it is used as the starting point.
1739 end
= len(self
.members
)
1741 end
= self
.members
.index(tarinfo
)
1743 for i
in xrange(end
- 1, -1, -1):
1744 if name
== self
.membernames
[i
]:
1745 return self
.members
[i
]
1748 """Read through the entire archive file and look for readable
1752 tarinfo
= self
.next()
1757 def _check(self
, mode
=None):
1758 """Check if TarFile is still open, and if the operation's mode
1759 corresponds to TarFile's mode.
1762 raise IOError, "%s is closed" % self
.__class
__.__name
__
1763 if mode
is not None and self
._mode
not in mode
:
1764 raise IOError, "bad operation for mode %r" % self
._mode
1767 """Provide an iterator object.
1770 return iter(self
.members
)
1772 return TarIter(self
)
1774 def _create_gnulong(self
, name
, type):
1775 """Write a GNU longname/longlink member to the TarFile.
1776 It consists of an extended tar header, with the length
1777 of the longname as size, followed by data blocks,
1778 which contain the longname as a null terminated string.
1781 tarinfo
.name
= "././@LongLink"
1784 tarinfo
.size
= len(name
)
1786 # write extended header
1787 self
.fileobj
.write(tarinfo
.tobuf())
1789 self
.fileobj
.write(name
)
1790 blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
1792 self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
1794 self
.offset
+= blocks
* BLOCKSIZE
1796 def _dbg(self
, level
, msg
):
1797 """Write debugging output to sys.stderr.
1799 if level
<= self
.debug
:
1800 print >> sys
.stderr
, msg
1806 for tarinfo in TarFile(...):
1810 def __init__(self
, tarfile
):
1811 """Construct a TarIter object.
1813 self
.tarfile
= tarfile
1815 """Return iterator object.
1819 """Return the next item using TarFile's next() method.
1820 When all members have been read, set TarFile as _loaded.
1822 tarinfo
= self
.tarfile
.next()
1824 self
.tarfile
._loaded
= True
1828 # Helper classes for sparse file support
1830 """Base class for _data and _hole.
1832 def __init__(self
, offset
, size
):
1833 self
.offset
= offset
1835 def __contains__(self
, offset
):
1836 return self
.offset
<= offset
< self
.offset
+ self
.size
1838 class _data(_section
):
1839 """Represent a data section in a sparse file.
1841 def __init__(self
, offset
, size
, realpos
):
1842 _section
.__init
__(self
, offset
, size
)
1843 self
.realpos
= realpos
1845 class _hole(_section
):
1846 """Represent a hole section in a sparse file.
1850 class _ringbuffer(list):
1851 """Ringbuffer class which increases performance
1852 over a regular list.
1856 def find(self
, offset
):
1863 if idx
== len(self
):
1871 #---------------------------------------------
1872 # zipfile compatible TarFile class
1873 #---------------------------------------------
1874 TAR_PLAIN
= 0 # zipfile.ZIP_STORED
1875 TAR_GZIPPED
= 8 # zipfile.ZIP_DEFLATED
1876 class TarFileCompat
:
1877 """TarFile class compatible with standard module zipfile's
1880 def __init__(self
, file, mode
="r", compression
=TAR_PLAIN
):
1881 if compression
== TAR_PLAIN
:
1882 self
.tarfile
= TarFile
.taropen(file, mode
)
1883 elif compression
== TAR_GZIPPED
:
1884 self
.tarfile
= TarFile
.gzopen(file, mode
)
1886 raise ValueError, "unknown compression constant"
1887 if mode
[0:1] == "r":
1888 members
= self
.tarfile
.getmembers()
1889 for i
in xrange(len(members
)):
1892 m
.file_size
= m
.size
1893 m
.date_time
= time
.gmtime(m
.mtime
)[:6]
1895 return map(lambda m
: m
.name
, self
.infolist())
1897 return filter(lambda m
: m
.type in REGULAR_TYPES
,
1898 self
.tarfile
.getmembers())
1903 def getinfo(self
, name
):
1904 return self
.tarfile
.getmember(name
)
1905 def read(self
, name
):
1906 return self
.tarfile
.extractfile(self
.tarfile
.getmember(name
)).read()
1907 def write(self
, filename
, arcname
=None, compress_type
=None):
1908 self
.tarfile
.add(filename
, arcname
)
1909 def writestr(self
, zinfo
, bytes
):
1912 zinfo
.name
= zinfo
.filename
1913 zinfo
.size
= zinfo
.file_size
1914 zinfo
.mtime
= calendar
.timegm(zinfo
.date_time
)
1915 self
.tarfile
.addfile(zinfo
, StringIO
.StringIO(bytes
))
1917 self
.tarfile
.close()
1918 #class TarFileCompat
1920 #--------------------
1921 # exported functions
1922 #--------------------
1923 def is_tarfile(name
):
1924 """Return True if name points to a tar archive that we
1925 are able to handle, else return False.