Lib/tarfile.py

   1 #!/usr/bin/env python
   2 # -*- coding: iso-8859-1 -*-
   3 #-------------------------------------------------------------------
   4 # tarfile.py
   5 #-------------------------------------------------------------------
   6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
   7 # All rights reserved.
   8 #
   9 # Permission  is  hereby granted,  free  of charge,  to  any person
  10 # obtaining a  copy of  this software  and associated documentation
  11 # files  (the  "Software"),  to   deal  in  the  Software   without
  12 # restriction,  including  without limitation  the  rights to  use,
  13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 # copies  of  the  Software,  and to  permit  persons  to  whom the
  15 # Software  is  furnished  to  do  so,  subject  to  the  following
  16 # conditions:
  17 #
  18 # The above copyright  notice and this  permission notice shall  be
  19 # included in all copies or substantial portions of the Software.
  20 #
  21 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
  22 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
  23 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
  24 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
  25 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
  26 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
  27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  28 # OTHER DEALINGS IN THE SOFTWARE.
  29 #
  30 """Read from and write to tar format archives.
  31 """
  32
  33 __version__ = "$Revision$"
  34 # $Source$
  35
  36 version     = "0.6.4"
  37 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
  38 __date__    = "$Date$"
  39 __cvsid__   = "$Id$"
  40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
  41
  42 #---------
  43 # Imports
  44 #---------
  45 import sys
  46 import os
  47 import shutil
  48 import stat
  49 import errno
  50 import time
  51 import struct
  52
  53 if sys.platform == 'mac':
  54     # This module needs work for MacOS9, especially in the area of pathname
  55     # handling. In many places it is assumed a simple substitution of / by the
  56     # local os.path.sep is good enough to convert pathnames, but this does not
  57     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
  58     raise ImportError, "tarfile does not work for platform==mac"
  59
  60 try:
  61     import grp, pwd
  62 except ImportError:
  63     grp = pwd = None
  64
  65 # from tarfile import *
  66 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
  67
  68 #---------------------------------------------------------
  69 # tar constants
  70 #---------------------------------------------------------
  71 NUL        = "\0"               # the null character
  72 BLOCKSIZE  = 512                # length of processing blocks
  73 RECORDSIZE = BLOCKSIZE * 20     # length of records
  74 MAGIC      = "ustar"            # magic tar string
  75 VERSION    = "00"               # version number
  76
  77 LENGTH_NAME    = 100            # maximum length of a filename
  78 LENGTH_LINK    = 100            # maximum length of a linkname
  79 LENGTH_PREFIX  = 155            # maximum length of the prefix field
  80 MAXSIZE_MEMBER = 077777777777L  # maximum size of a file (11 octal digits)
  81
  82 REGTYPE  = "0"                  # regular file
  83 AREGTYPE = "\0"                 # regular file
  84 LNKTYPE  = "1"                  # link (inside tarfile)
  85 SYMTYPE  = "2"                  # symbolic link
  86 CHRTYPE  = "3"                  # character special device
  87 BLKTYPE  = "4"                  # block special device
  88 DIRTYPE  = "5"                  # directory
  89 FIFOTYPE = "6"                  # fifo special device
  90 CONTTYPE = "7"                  # contiguous file
  91
  92 GNUTYPE_LONGNAME = "L"          # GNU tar extension for longnames
  93 GNUTYPE_LONGLINK = "K"          # GNU tar extension for longlink
  94 GNUTYPE_SPARSE   = "S"          # GNU tar extension for sparse file
  95
  96 #---------------------------------------------------------
  97 # tarfile constants
  98 #---------------------------------------------------------
  99 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,  # file types that tarfile
 100                    SYMTYPE, DIRTYPE, FIFOTYPE,  # can cope with.
 101                    CONTTYPE, CHRTYPE, BLKTYPE,
 102                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 103                    GNUTYPE_SPARSE)
 104
 105 REGULAR_TYPES = (REGTYPE, AREGTYPE,             # file types that somehow
 106                  CONTTYPE, GNUTYPE_SPARSE)      # represent regular files
 107
 108 #---------------------------------------------------------
 109 # Bits used in the mode field, values in octal.
 110 #---------------------------------------------------------
 111 S_IFLNK = 0120000        # symbolic link
 112 S_IFREG = 0100000        # regular file
 113 S_IFBLK = 0060000        # block device
 114 S_IFDIR = 0040000        # directory
 115 S_IFCHR = 0020000        # character device
 116 S_IFIFO = 0010000        # fifo
 117
 118 TSUID   = 04000          # set UID on execution
 119 TSGID   = 02000          # set GID on execution
 120 TSVTX   = 01000          # reserved
 121
 122 TUREAD  = 0400           # read by owner
 123 TUWRITE = 0200           # write by owner
 124 TUEXEC  = 0100           # execute/search by owner
 125 TGREAD  = 0040           # read by group
 126 TGWRITE = 0020           # write by group
 127 TGEXEC  = 0010           # execute/search by group
 128 TOREAD  = 0004           # read by other
 129 TOWRITE = 0002           # write by other
 130 TOEXEC  = 0001           # execute/search by other
 131
 132 #---------------------------------------------------------
 133 # Some useful functions
 134 #---------------------------------------------------------
 135 def nts(s):
 136     """Convert a null-terminated string buffer to a python string.
 137     """
 138     return s.split(NUL, 1)[0]
 139
 140 def calc_chksum(buf):
 141     """Calculate the checksum for a member's header. It's a simple addition
 142        of all bytes, treating the chksum field as if filled with spaces.
 143        buf is a 512 byte long string buffer which holds the header.
 144     """
 145     chk = 256                           # chksum field is treated as blanks,
 146                                         # so the initial value is 8 * ord(" ")
 147     for c in buf[:148]: chk += ord(c)   # sum up all bytes before chksum
 148     for c in buf[156:]: chk += ord(c)   # sum up all bytes after chksum
 149     return chk
 150
 151 def copyfileobj(src, dst, length=None):
 152     """Copy length bytes from fileobj src to fileobj dst.
 153        If length is None, copy the entire content.
 154     """
 155     if length == 0:
 156         return
 157     if length is None:
 158         shutil.copyfileobj(src, dst)
 159         return
 160
 161     BUFSIZE = 16 * 1024
 162     blocks, remainder = divmod(length, BUFSIZE)
 163     for b in xrange(blocks):
 164         buf = src.read(BUFSIZE)
 165         if len(buf) < BUFSIZE:
 166             raise IOError, "end of file reached"
 167         dst.write(buf)
 168
 169     if remainder != 0:
 170         buf = src.read(remainder)
 171         if len(buf) < remainder:
 172             raise IOError, "end of file reached"
 173         dst.write(buf)
 174     return
 175
 176 filemode_table = (
 177     (S_IFLNK, "l",
 178      S_IFREG, "-",
 179      S_IFBLK, "b",
 180      S_IFDIR, "d",
 181      S_IFCHR, "c",
 182      S_IFIFO, "p"),
 183     (TUREAD,  "r"),
 184     (TUWRITE, "w"),
 185     (TUEXEC,  "x", TSUID, "S", TUEXEC|TSUID, "s"),
 186     (TGREAD,  "r"),
 187     (TGWRITE, "w"),
 188     (TGEXEC,  "x", TSGID, "S", TGEXEC|TSGID, "s"),
 189     (TOREAD,  "r"),
 190     (TOWRITE, "w"),
 191     (TOEXEC,  "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
 192
 193 def filemode(mode):
 194     """Convert a file's mode to a string of the form
 195        -rwxrwxrwx.
 196        Used by TarFile.list()
 197     """
 198     s = ""
 199     for t in filemode_table:
 200         while True:
 201             if mode & t[0] == t[0]:
 202                 s += t[1]
 203             elif len(t) > 2:
 204                 t = t[2:]
 205                 continue
 206             else:
 207                 s += "-"
 208             break
 209     return s
 210
 211 if os.sep != "/":
 212     normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
 213 else:
 214     normpath = os.path.normpath
 215
 216 class TarError(Exception):
 217     """Base exception."""
 218     pass
 219 class ExtractError(TarError):
 220     """General exception for extract errors."""
 221     pass
 222 class ReadError(TarError):
 223     """Exception for unreadble tar archives."""
 224     pass
 225 class CompressionError(TarError):
 226     """Exception for unavailable compression methods."""
 227     pass
 228 class StreamError(TarError):
 229     """Exception for unsupported operations on stream-like TarFiles."""
 230     pass
 231
 232 #---------------------------
 233 # internal stream interface
 234 #---------------------------
 235 class _LowLevelFile:
 236     """Low-level file object. Supports reading and writing.
 237        It is used instead of a regular file object for streaming
 238        access.
 239     """
 240
 241     def __init__(self, name, mode):
 242         mode = {
 243             "r": os.O_RDONLY,
 244             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
 245         }[mode]
 246         if hasattr(os, "O_BINARY"):
 247             mode |= os.O_BINARY
 248         self.fd = os.open(name, mode)
 249
 250     def close(self):
 251         os.close(self.fd)
 252
 253     def read(self, size):
 254         return os.read(self.fd, size)
 255
 256     def write(self, s):
 257         os.write(self.fd, s)
 258
 259 class _Stream:
 260     """Class that serves as an adapter between TarFile and
 261        a stream-like object.  The stream-like object only
 262        needs to have a read() or write() method and is accessed
 263        blockwise.  Use of gzip or bzip2 compression is possible.
 264        A stream-like object could be for example: sys.stdin,
 265        sys.stdout, a socket, a tape device etc.
 266
 267        _Stream is intended to be used only internally.
 268     """
 269
 270     def __init__(self, name, mode, type, fileobj, bufsize):
 271         """Construct a _Stream object.
 272         """
 273         self._extfileobj = True
 274         if fileobj is None:
 275             fileobj = _LowLevelFile(name, mode)
 276             self._extfileobj = False
 277
 278         self.name    = name or ""
 279         self.mode    = mode
 280         self.type    = type
 281         self.fileobj = fileobj
 282         self.bufsize = bufsize
 283         self.buf     = ""
 284         self.pos     = 0L
 285         self.closed  = False
 286
 287         if type == "gz":
 288             try:
 289                 import zlib
 290             except ImportError:
 291                 raise CompressionError, "zlib module is not available"
 292             self.zlib = zlib
 293             self.crc = zlib.crc32("")
 294             if mode == "r":
 295                 self._init_read_gz()
 296             else:
 297                 self._init_write_gz()
 298
 299         if type == "bz2":
 300             try:
 301                 import bz2
 302             except ImportError:
 303                 raise CompressionError, "bz2 module is not available"
 304             if mode == "r":
 305                 self.dbuf = ""
 306                 self.cmp = bz2.BZ2Decompressor()
 307             else:
 308                 self.cmp = bz2.BZ2Compressor()
 309
 310     def __del__(self):
 311         if not self.closed:
 312             self.close()
 313
 314     def _init_write_gz(self):
 315         """Initialize for writing with gzip compression.
 316         """
 317         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
 318                                             -self.zlib.MAX_WBITS,
 319                                             self.zlib.DEF_MEM_LEVEL,
 320                                             0)
 321         timestamp = struct.pack("<L", long(time.time()))
 322         self.__write("\037\213\010\010%s\002\377" % timestamp)
 323         if self.name.endswith(".gz"):
 324             self.name = self.name[:-3]
 325         self.__write(self.name + NUL)
 326
 327     def write(self, s):
 328         """Write string s to the stream.
 329         """
 330         if self.type == "gz":
 331             self.crc = self.zlib.crc32(s, self.crc)
 332         self.pos += len(s)
 333         if self.type != "tar":
 334             s = self.cmp.compress(s)
 335         self.__write(s)
 336
 337     def __write(self, s):
 338         """Write string s to the stream if a whole new block
 339            is ready to be written.
 340         """
 341         self.buf += s
 342         while len(self.buf) > self.bufsize:
 343             self.fileobj.write(self.buf[:self.bufsize])
 344             self.buf = self.buf[self.bufsize:]
 345
 346     def close(self):
 347         """Close the _Stream object. No operation should be
 348            done on it afterwards.
 349         """
 350         if self.closed:
 351             return
 352
 353         if self.mode == "w" and self.buf:
 354             if self.type != "tar":
 355                 self.buf += self.cmp.flush()
 356             self.__write("")            # Write remaining blocks to output
 357             self.fileobj.write(self.buf)
 358             self.buf = ""
 359             if self.type == "gz":
 360                 self.fileobj.write(struct.pack("<l", self.crc))
 361                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
 362
 363         if not self._extfileobj:
 364             self.fileobj.close()
 365
 366         self.closed = True
 367
 368     def _init_read_gz(self):
 369         """Initialize for reading a gzip compressed fileobj.
 370         """
 371         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
 372         self.dbuf = ""
 373
 374         # taken from gzip.GzipFile with some alterations
 375         if self.__read(2) != "\037\213":
 376             raise ReadError, "not a gzip file"
 377         if self.__read(1) != "\010":
 378             raise CompressionError, "unsupported compression method"
 379
 380         flag = ord(self.__read(1))
 381         self.__read(6)
 382
 383         if flag & 4:
 384             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
 385             self.read(xlen)
 386         if flag & 8:
 387             while True:
 388                 s = self.__read(1)
 389                 if not s or s == NUL:
 390                     break
 391         if flag & 16:
 392             while True:
 393                 s = self.__read(1)
 394                 if not s or s == NUL:
 395                     break
 396         if flag & 2:
 397             self.__read(2)
 398
 399     def tell(self):
 400         """Return the stream's file pointer position.
 401         """
 402         return self.pos
 403
 404     def seek(self, pos=0):
 405         """Set the stream's file pointer to pos. Negative seeking
 406            is forbidden.
 407         """
 408         if pos - self.pos >= 0:
 409             blocks, remainder = divmod(pos - self.pos, self.bufsize)
 410             for i in xrange(blocks):
 411                 self.read(self.bufsize)
 412             self.read(remainder)
 413         else:
 414             raise StreamError, "seeking backwards is not allowed"
 415         return self.pos
 416
 417     def read(self, size=None):
 418         """Return the next size number of bytes from the stream.
 419            If size is not defined, return all bytes of the stream
 420            up to EOF.
 421         """
 422         if size is None:
 423             t = []
 424             while True:
 425                 buf = self._read(self.bufsize)
 426                 if not buf:
 427                     break
 428                 t.append(buf)
 429             buf = "".join(t)
 430         else:
 431             buf = self._read(size)
 432         self.pos += len(buf)
 433         return buf
 434
 435     def _read(self, size):
 436         """Return size bytes from the stream.
 437         """
 438         if self.type == "tar":
 439             return self.__read(size)
 440
 441         c = len(self.dbuf)
 442         t = [self.dbuf]
 443         while c < size:
 444             buf = self.__read(self.bufsize)
 445             if not buf:
 446                 break
 447             buf = self.cmp.decompress(buf)
 448             t.append(buf)
 449             c += len(buf)
 450         t = "".join(t)
 451         self.dbuf = t[size:]
 452         return t[:size]
 453
 454     def __read(self, size):
 455         """Return size bytes from stream. If internal buffer is empty,
 456            read another block from the stream.
 457         """
 458         c = len(self.buf)
 459         t = [self.buf]
 460         while c < size:
 461             buf = self.fileobj.read(self.bufsize)
 462             if not buf:
 463                 break
 464             t.append(buf)
 465             c += len(buf)
 466         t = "".join(t)
 467         self.buf = t[size:]
 468         return t[:size]
 469 # class _Stream
 470
 471 #------------------------
 472 # Extraction file object
 473 #------------------------
 474 class ExFileObject(object):
 475     """File-like object for reading an archive member.
 476        Is returned by TarFile.extractfile(). Support for
 477        sparse files included.
 478     """
 479
 480     def __init__(self, tarfile, tarinfo):
 481         self.fileobj = tarfile.fileobj
 482         self.name    = tarinfo.name
 483         self.mode    = "r"
 484         self.closed  = False
 485         self.offset  = tarinfo.offset_data
 486         self.size    = tarinfo.size
 487         self.pos     = 0L
 488         self.linebuffer = ""
 489         if tarinfo.issparse():
 490             self.sparse = tarinfo.sparse
 491             self.read = self._readsparse
 492         else:
 493             self.read = self._readnormal
 494
 495     def __read(self, size):
 496         """Overloadable read method.
 497         """
 498         return self.fileobj.read(size)
 499
 500     def readline(self, size=-1):
 501         """Read a line with approx. size. If size is negative,
 502            read a whole line. readline() and read() must not
 503            be mixed up (!).
 504         """
 505         if size < 0:
 506             size = sys.maxint
 507
 508         nl = self.linebuffer.find("\n")
 509         if nl >= 0:
 510             nl = min(nl, size)
 511         else:
 512             size -= len(self.linebuffer)
 513             while nl < 0:
 514                 buf = self.read(min(size, 100))
 515                 if not buf:
 516                     break
 517                 self.linebuffer += buf
 518                 size -= len(buf)
 519                 if size <= 0:
 520                     break
 521                 nl = self.linebuffer.find("\n")
 522             if nl == -1:
 523                 s = self.linebuffer
 524                 self.linebuffer = ""
 525                 return s
 526         buf = self.linebuffer[:nl]
 527         self.linebuffer = self.linebuffer[nl + 1:]
 528         while buf[-1:] == "\r":
 529             buf = buf[:-1]
 530         return buf + "\n"
 531
 532     def readlines(self):
 533         """Return a list with all (following) lines.
 534         """
 535         result = []
 536         while True:
 537             line = self.readline()
 538             if not line: break
 539             result.append(line)
 540         return result
 541
 542     def _readnormal(self, size=None):
 543         """Read operation for regular files.
 544         """
 545         if self.closed:
 546             raise ValueError, "file is closed"
 547         self.fileobj.seek(self.offset + self.pos)
 548         bytesleft = self.size - self.pos
 549         if size is None:
 550             bytestoread = bytesleft
 551         else:
 552             bytestoread = min(size, bytesleft)
 553         self.pos += bytestoread
 554         return self.__read(bytestoread)
 555
 556     def _readsparse(self, size=None):
 557         """Read operation for sparse files.
 558         """
 559         if self.closed:
 560             raise ValueError, "file is closed"
 561
 562         if size is None:
 563             size = self.size - self.pos
 564
 565         data = []
 566         while size > 0:
 567             buf = self._readsparsesection(size)
 568             if not buf:
 569                 break
 570             size -= len(buf)
 571             data.append(buf)
 572         return "".join(data)
 573
 574     def _readsparsesection(self, size):
 575         """Read a single section of a sparse file.
 576         """
 577         section = self.sparse.find(self.pos)
 578
 579         if section is None:
 580             return ""
 581
 582         toread = min(size, section.offset + section.size - self.pos)
 583         if isinstance(section, _data):
 584             realpos = section.realpos + self.pos - section.offset
 585             self.pos += toread
 586             self.fileobj.seek(self.offset + realpos)
 587             return self.__read(toread)
 588         else:
 589             self.pos += toread
 590             return NUL * toread
 591
 592     def tell(self):
 593         """Return the current file position.
 594         """
 595         return self.pos
 596
 597     def seek(self, pos, whence=0):
 598         """Seek to a position in the file.
 599         """
 600         self.linebuffer = ""
 601         if whence == 0:
 602             self.pos = min(max(pos, 0), self.size)
 603         if whence == 1:
 604             if pos < 0:
 605                 self.pos = max(self.pos + pos, 0)
 606             else:
 607                 self.pos = min(self.pos + pos, self.size)
 608         if whence == 2:
 609             self.pos = max(min(self.size + pos, self.size), 0)
 610
 611     def close(self):
 612         """Close the file object.
 613         """
 614         self.closed = True
 615 #class ExFileObject
 616
 617 #------------------
 618 # Exported Classes
 619 #------------------
 620 class TarInfo(object):
 621     """Informational class which holds the details about an
 622        archive member given by a tar header block.
 623        TarInfo objects are returned by TarFile.getmember(),
 624        TarFile.getmembers() and TarFile.gettarinfo() and are
 625        usually created internally.
 626     """
 627
 628     def __init__(self, name=""):
 629         """Construct a TarInfo object. name is the optional name
 630            of the member.
 631         """
 632
 633         self.name     = name       # member name (dirnames must end with '/')
 634         self.mode     = 0666       # file permissions
 635         self.uid      = 0          # user id
 636         self.gid      = 0          # group id
 637         self.size     = 0          # file size
 638         self.mtime    = 0          # modification time
 639         self.chksum   = 0          # header checksum
 640         self.type     = REGTYPE    # member type
 641         self.linkname = ""         # link name
 642         self.uname    = "user"     # user name
 643         self.gname    = "group"    # group name
 644         self.devmajor = 0          #-
 645         self.devminor = 0          #-for use with CHRTYPE and BLKTYPE
 646         self.prefix   = ""         # prefix to filename or holding information
 647                                    # about sparse files
 648
 649         self.offset   = 0          # the tar header starts here
 650         self.offset_data = 0       # the file's data starts here
 651
 652     def __repr__(self):
 653         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 654
 655     def frombuf(cls, buf):
 656         """Construct a TarInfo object from a 512 byte string buffer.
 657         """
 658         tarinfo = cls()
 659         tarinfo.name   =  nts(buf[0:100])
 660         tarinfo.mode   = int(buf[100:108], 8)
 661         tarinfo.uid    = int(buf[108:116],8)
 662         tarinfo.gid    = int(buf[116:124],8)
 663         tarinfo.size   = long(buf[124:136], 8)
 664         tarinfo.mtime  = long(buf[136:148], 8)
 665         tarinfo.chksum = int(buf[148:156], 8)
 666         tarinfo.type   = buf[156:157]
 667         tarinfo.linkname = nts(buf[157:257])
 668         tarinfo.uname  = nts(buf[265:297])
 669         tarinfo.gname  = nts(buf[297:329])
 670         try:
 671             tarinfo.devmajor = int(buf[329:337], 8)
 672             tarinfo.devminor = int(buf[337:345], 8)
 673         except ValueError:
 674             tarinfo.devmajor = tarinfo.devmajor = 0
 675         tarinfo.prefix = buf[345:500]
 676
 677         # The prefix field is used for filenames > 100 in
 678         # the POSIX standard.
 679         # name = prefix + '/' + name
 680         if tarinfo.type != GNUTYPE_SPARSE:
 681             tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
 682
 683         # Directory names should have a '/' at the end.
 684         if tarinfo.isdir() and tarinfo.name[-1:] != "/":
 685             tarinfo.name += "/"
 686         return tarinfo
 687
 688     frombuf = classmethod(frombuf)
 689
 690     def tobuf(self):
 691         """Return a tar header block as a 512 byte string.
 692         """
 693         name = self.name
 694
 695         # The following code was contributed by Detlef Lannert.
 696         parts = []
 697         for value, fieldsize in (
 698                 (name, 100),
 699                 ("%07o" % (self.mode & 07777), 8),
 700                 ("%07o" % self.uid, 8),
 701                 ("%07o" % self.gid, 8),
 702                 ("%011o" % self.size, 12),
 703                 ("%011o" % self.mtime, 12),
 704                 ("        ", 8),
 705                 (self.type, 1),
 706                 (self.linkname, 100),
 707                 (MAGIC, 6),
 708                 (VERSION, 2),
 709                 (self.uname, 32),
 710                 (self.gname, 32),
 711                 ("%07o" % self.devmajor, 8),
 712                 ("%07o" % self.devminor, 8),
 713                 (self.prefix, 155)
 714             ):
 715             l = len(value)
 716             parts.append(value + (fieldsize - l) * NUL)
 717
 718         buf = "".join(parts)
 719         chksum = calc_chksum(buf)
 720         buf = buf[:148] + "%06o\0" % chksum + buf[155:]
 721         buf += (BLOCKSIZE - len(buf)) * NUL
 722         self.buf = buf
 723         return buf
 724
 725     def isreg(self):
 726         return self.type in REGULAR_TYPES
 727     def isfile(self):
 728         return self.isreg()
 729     def isdir(self):
 730         return self.type == DIRTYPE
 731     def issym(self):
 732         return self.type == SYMTYPE
 733     def islnk(self):
 734         return self.type == LNKTYPE
 735     def ischr(self):
 736         return self.type == CHRTYPE
 737     def isblk(self):
 738         return self.type == BLKTYPE
 739     def isfifo(self):
 740         return self.type == FIFOTYPE
 741     def issparse(self):
 742         return self.type == GNUTYPE_SPARSE
 743     def isdev(self):
 744         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
 745 # class TarInfo
 746
 747 class TarFile(object):
 748     """The TarFile Class provides an interface to tar archives.
 749     """
 750
 751     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
 752
 753     dereference = False         # If true, add content of linked file to the
 754                                 # tar file, else the link.
 755
 756     ignore_zeros = False        # If true, skips empty or invalid blocks and
 757                                 # continues processing.
 758
 759     errorlevel = 0              # If 0, fatal errors only appear in debug
 760                                 # messages (if debug >= 0). If > 0, errors
 761                                 # are passed to the caller as exceptions.
 762
 763     posix = True                # If True, generates POSIX.1-1990-compliant
 764                                 # archives (no GNU extensions!)
 765
 766     fileobject = ExFileObject
 767
 768     def __init__(self, name=None, mode="r", fileobj=None):
 769         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
 770            read from an existing archive, 'a' to append data to an existing
 771            file or 'w' to create a new file overwriting an existing one. `mode'
 772            defaults to 'r'.
 773            If `fileobj' is given, it is used for reading or writing data. If it
 774            can be determined, `mode' is overridden by `fileobj's mode.
 775            `fileobj' is not closed, when TarFile is closed.
 776         """
 777         self.name = name
 778
 779         if len(mode) > 1 or mode not in "raw":
 780             raise ValueError, "mode must be 'r', 'a' or 'w'"
 781         self._mode = mode
 782         self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
 783
 784         if not fileobj:
 785             fileobj = file(self.name, self.mode)
 786             self._extfileobj = False
 787         else:
 788             if self.name is None and hasattr(fileobj, "name"):
 789                 self.name = fileobj.name
 790             if hasattr(fileobj, "mode"):
 791                 self.mode = fileobj.mode
 792             self._extfileobj = True
 793         self.fileobj = fileobj
 794
 795         # Init datastructures
 796         self.closed      = False
 797         self.members     = []       # list of members as TarInfo objects
 798         self.membernames = []       # names of members
 799         self.chunks      = [0]      # chunk cache
 800         self._loaded     = False    # flag if all members have been read
 801         self.offset      = 0L       # current position in the archive file
 802         self.inodes      = {}       # dictionary caching the inodes of
 803                                     # archive members already added
 804
 805         if self._mode == "r":
 806             self.firstmember = None
 807             self.firstmember = self.next()
 808
 809         if self._mode == "a":
 810             # Move to the end of the archive,
 811             # before the first empty block.
 812             self.firstmember = None
 813             while True:
 814                 try:
 815                     tarinfo = self.next()
 816                 except ReadError:
 817                     self.fileobj.seek(0)
 818                     break
 819                 if tarinfo is None:
 820                     self.fileobj.seek(- BLOCKSIZE, 1)
 821                     break
 822
 823         if self._mode in "aw":
 824             self._loaded = True
 825
 826     #--------------------------------------------------------------------------
 827     # Below are the classmethods which act as alternate constructors to the
 828     # TarFile class. The open() method is the only one that is needed for
 829     # public use; it is the "super"-constructor and is able to select an
 830     # adequate "sub"-constructor for a particular compression using the mapping
 831     # from OPEN_METH.
 832     #
 833     # This concept allows one to subclass TarFile without losing the comfort of
 834     # the super-constructor. A sub-constructor is registered and made available
 835     # by adding it to the mapping in OPEN_METH.
 836
 837     def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
 838         """Open a tar archive for reading, writing or appending. Return
 839            an appropriate TarFile class.
 840
 841            mode:
 842            'r'          open for reading with transparent compression
 843            'r:'         open for reading exclusively uncompressed
 844            'r:gz'       open for reading with gzip compression
 845            'r:bz2'      open for reading with bzip2 compression
 846            'a' or 'a:'  open for appending
 847            'w' or 'w:'  open for writing without compression
 848            'w:gz'       open for writing with gzip compression
 849            'w:bz2'      open for writing with bzip2 compression
 850            'r|'         open an uncompressed stream of tar blocks for reading
 851            'r|gz'       open a gzip compressed stream of tar blocks
 852            'r|bz2'      open a bzip2 compressed stream of tar blocks
 853            'w|'         open an uncompressed stream for writing
 854            'w|gz'       open a gzip compressed stream for writing
 855            'w|bz2'      open a bzip2 compressed stream for writing
 856         """
 857
 858         if not name and not fileobj:
 859             raise ValueError, "nothing to open"
 860
 861         if ":" in mode:
 862             filemode, comptype = mode.split(":", 1)
 863             filemode = filemode or "r"
 864             comptype = comptype or "tar"
 865
 866             # Select the *open() function according to
 867             # given compression.
 868             if comptype in cls.OPEN_METH:
 869                 func = getattr(cls, cls.OPEN_METH[comptype])
 870             else:
 871                 raise CompressionError, "unknown compression type %r" % comptype
 872             return func(name, filemode, fileobj)
 873
 874         elif "|" in mode:
 875             filemode, comptype = mode.split("|", 1)
 876             filemode = filemode or "r"
 877             comptype = comptype or "tar"
 878
 879             if filemode not in "rw":
 880                 raise ValueError, "mode must be 'r' or 'w'"
 881
 882             t = cls(name, filemode,
 883                     _Stream(name, filemode, comptype, fileobj, bufsize))
 884             t._extfileobj = False
 885             return t
 886
 887         elif mode == "r":
 888             # Find out which *open() is appropriate for opening the file.
 889             for comptype in cls.OPEN_METH:
 890                 func = getattr(cls, cls.OPEN_METH[comptype])
 891                 try:
 892                     return func(name, "r", fileobj)
 893                 except (ReadError, CompressionError):
 894                     continue
 895             raise ReadError, "file could not be opened successfully"
 896
 897         elif mode in "aw":
 898             return cls.taropen(name, mode, fileobj)
 899
 900         raise ValueError, "undiscernible mode"
 901
 902     open = classmethod(open)
 903
 904     def taropen(cls, name, mode="r", fileobj=None):
 905         """Open uncompressed tar archive name for reading or writing.
 906         """
 907         if len(mode) > 1 or mode not in "raw":
 908             raise ValueError, "mode must be 'r', 'a' or 'w'"
 909         return cls(name, mode, fileobj)
 910
 911     taropen = classmethod(taropen)
 912
 913     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
 914         """Open gzip compressed tar archive name for reading or writing.
 915            Appending is not allowed.
 916         """
 917         if len(mode) > 1 or mode not in "rw":
 918             raise ValueError, "mode must be 'r' or 'w'"
 919
 920         try:
 921             import gzip
 922             gzip.GzipFile
 923         except (ImportError, AttributeError):
 924             raise CompressionError, "gzip module is not available"
 925
 926         pre, ext = os.path.splitext(name)
 927         pre = os.path.basename(pre)
 928         if ext == ".tgz":
 929             ext = ".tar"
 930         if ext == ".gz":
 931             ext = ""
 932         tarname = pre + ext
 933
 934         if fileobj is None:
 935             fileobj = file(name, mode + "b")
 936
 937         if mode != "r":
 938             name = tarname
 939
 940         try:
 941             t = cls.taropen(tarname, mode,
 942                 gzip.GzipFile(name, mode, compresslevel, fileobj)
 943             )
 944         except IOError:
 945             raise ReadError, "not a gzip file"
 946         t._extfileobj = False
 947         return t
 948
 949     gzopen = classmethod(gzopen)
 950
 951     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
 952         """Open bzip2 compressed tar archive name for reading or writing.
 953            Appending is not allowed.
 954         """
 955         if len(mode) > 1 or mode not in "rw":
 956             raise ValueError, "mode must be 'r' or 'w'."
 957
 958         try:
 959             import bz2
 960         except ImportError:
 961             raise CompressionError, "bz2 module is not available"
 962
 963         pre, ext = os.path.splitext(name)
 964         pre = os.path.basename(pre)
 965         if ext == ".tbz2":
 966             ext = ".tar"
 967         if ext == ".bz2":
 968             ext = ""
 969         tarname = pre + ext
 970
 971         if fileobj is not None:
 972             raise ValueError, "no support for external file objects"
 973
 974         try:
 975             t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
 976         except IOError:
 977             raise ReadError, "not a bzip2 file"
 978         t._extfileobj = False
 979         return t
 980
 981     bz2open = classmethod(bz2open)
 982
 983     # All *open() methods are registered here.
 984     OPEN_METH = {
 985         "tar": "taropen",   # uncompressed tar
 986         "gz":  "gzopen",    # gzip compressed tar
 987         "bz2": "bz2open"    # bzip2 compressed tar
 988     }
 989
 990     #--------------------------------------------------------------------------
 991     # The public methods which TarFile provides:
 992
 993     def close(self):
 994         """Close the TarFile. In write-mode, two finishing zero blocks are
 995            appended to the archive.
 996         """
 997         if self.closed:
 998             return
 999
1000         if self._mode in "aw":
1001             self.fileobj.write(NUL * (BLOCKSIZE * 2))
1002             self.offset += (BLOCKSIZE * 2)
1003             # fill up the end with zero-blocks
1004             # (like option -b20 for tar does)
1005             blocks, remainder = divmod(self.offset, RECORDSIZE)
1006             if remainder > 0:
1007                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1008
1009         if not self._extfileobj:
1010             self.fileobj.close()
1011         self.closed = True
1012
1013     def getmember(self, name):
1014         """Return a TarInfo object for member `name'. If `name' can not be
1015            found in the archive, KeyError is raised. If a member occurs more
1016            than once in the archive, its last occurence is assumed to be the
1017            most up-to-date version.
1018         """
1019         self._check()
1020         if name not in self.membernames and not self._loaded:
1021             self._load()
1022         if name not in self.membernames:
1023             raise KeyError, "filename %r not found" % name
1024         return self._getmember(name)
1025
1026     def getmembers(self):
1027         """Return the members of the archive as a list of TarInfo objects. The
1028            list has the same order as the members in the archive.
1029         """
1030         self._check()
1031         if not self._loaded:    # if we want to obtain a list of
1032             self._load()        # all members, we first have to
1033                                 # scan the whole archive.
1034         return self.members
1035
1036     def getnames(self):
1037         """Return the members of the archive as a list of their names. It has
1038            the same order as the list returned by getmembers().
1039         """
1040         self._check()
1041         if not self._loaded:
1042             self._load()
1043         return self.membernames
1044
1045     def gettarinfo(self, name=None, arcname=None, fileobj=None):
1046         """Create a TarInfo object for either the file `name' or the file
1047            object `fileobj' (using os.fstat on its file descriptor). You can
1048            modify some of the TarInfo's attributes before you add it using
1049            addfile(). If given, `arcname' specifies an alternative name for the
1050            file in the archive.
1051         """
1052         self._check("aw")
1053
1054         # When fileobj is given, replace name by
1055         # fileobj's real name.
1056         if fileobj is not None:
1057             name = fileobj.name
1058
1059         # Building the name of the member in the archive.
1060         # Backward slashes are converted to forward slashes,
1061         # Absolute paths are turned to relative paths.
1062         if arcname is None:
1063             arcname = name
1064         arcname = normpath(arcname)
1065         drv, arcname = os.path.splitdrive(arcname)
1066         while arcname[0:1] == "/":
1067             arcname = arcname[1:]
1068
1069         # Now, fill the TarInfo object with
1070         # information specific for the file.
1071         tarinfo = TarInfo()
1072
1073         # Use os.stat or os.lstat, depending on platform
1074         # and if symlinks shall be resolved.
1075         if fileobj is None:
1076             if hasattr(os, "lstat") and not self.dereference:
1077                 statres = os.lstat(name)
1078             else:
1079                 statres = os.stat(name)
1080         else:
1081             statres = os.fstat(fileobj.fileno())
1082         linkname = ""
1083
1084         stmd = statres.st_mode
1085         if stat.S_ISREG(stmd):
1086             inode = (statres.st_ino, statres.st_dev)
1087             if inode in self.inodes and not self.dereference:
1088                 # Is it a hardlink to an already
1089                 # archived file?
1090                 type = LNKTYPE
1091                 linkname = self.inodes[inode]
1092             else:
1093                 # The inode is added only if its valid.
1094                 # For win32 it is always 0.
1095                 type = REGTYPE
1096                 if inode[0]:
1097                     self.inodes[inode] = arcname
1098         elif stat.S_ISDIR(stmd):
1099             type = DIRTYPE
1100             if arcname[-1:] != "/":
1101                 arcname += "/"
1102         elif stat.S_ISFIFO(stmd):
1103             type = FIFOTYPE
1104         elif stat.S_ISLNK(stmd):
1105             type = SYMTYPE
1106             linkname = os.readlink(name)
1107         elif stat.S_ISCHR(stmd):
1108             type = CHRTYPE
1109         elif stat.S_ISBLK(stmd):
1110             type = BLKTYPE
1111         else:
1112             return None
1113
1114         # Fill the TarInfo object with all
1115         # information we can get.
1116         tarinfo.name  = arcname
1117         tarinfo.mode  = stmd
1118         tarinfo.uid   = statres.st_uid
1119         tarinfo.gid   = statres.st_gid
1120         tarinfo.size  = statres.st_size
1121         tarinfo.mtime = statres.st_mtime
1122         tarinfo.type  = type
1123         tarinfo.linkname = linkname
1124         if pwd:
1125             try:
1126                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1127             except KeyError:
1128                 pass
1129         if grp:
1130             try:
1131                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1132             except KeyError:
1133                 pass
1134
1135         if type in (CHRTYPE, BLKTYPE):
1136             if hasattr(os, "major") and hasattr(os, "minor"):
1137                 tarinfo.devmajor = os.major(statres.st_rdev)
1138                 tarinfo.devminor = os.minor(statres.st_rdev)
1139         return tarinfo
1140
1141     def list(self, verbose=True):
1142         """Print a table of contents to sys.stdout. If `verbose' is False, only
1143            the names of the members are printed. If it is True, an `ls -l'-like
1144            output is produced.
1145         """
1146         self._check()
1147
1148         for tarinfo in self:
1149             if verbose:
1150                 print filemode(tarinfo.mode),
1151                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1152                                  tarinfo.gname or tarinfo.gid),
1153                 if tarinfo.ischr() or tarinfo.isblk():
1154                     print "%10s" % ("%d,%d" \
1155                                     % (tarinfo.devmajor, tarinfo.devminor)),
1156                 else:
1157                     print "%10d" % tarinfo.size,
1158                 print "%d-%02d-%02d %02d:%02d:%02d" \
1159                       % time.localtime(tarinfo.mtime)[:6],
1160
1161             print tarinfo.name,
1162
1163             if verbose:
1164                 if tarinfo.issym():
1165                     print "->", tarinfo.linkname,
1166                 if tarinfo.islnk():
1167                     print "link to", tarinfo.linkname,
1168             print
1169
1170     def add(self, name, arcname=None, recursive=True):
1171         """Add the file `name' to the archive. `name' may be any type of file
1172            (directory, fifo, symbolic link, etc.). If given, `arcname'
1173            specifies an alternative name for the file in the archive.
1174            Directories are added recursively by default. This can be avoided by
1175            setting `recursive' to False.
1176         """
1177         self._check("aw")
1178
1179         if arcname is None:
1180             arcname = name
1181
1182         # Skip if somebody tries to archive the archive...
1183         if self.name is not None \
1184             and os.path.abspath(name) == os.path.abspath(self.name):
1185             self._dbg(2, "tarfile: Skipped %r" % name)
1186             return
1187
1188         # Special case: The user wants to add the current
1189         # working directory.
1190         if name == ".":
1191             if recursive:
1192                 if arcname == ".":
1193                     arcname = ""
1194                 for f in os.listdir("."):
1195                     self.add(f, os.path.join(arcname, f))
1196             return
1197
1198         self._dbg(1, name)
1199
1200         # Create a TarInfo object from the file.
1201         tarinfo = self.gettarinfo(name, arcname)
1202
1203         if tarinfo is None:
1204             self._dbg(1, "tarfile: Unsupported type %r" % name)
1205             return
1206
1207         # Append the tar header and data to the archive.
1208         if tarinfo.isreg():
1209             f = file(name, "rb")
1210             self.addfile(tarinfo, f)
1211             f.close()
1212
1213         if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1214             tarinfo.size = 0L
1215             self.addfile(tarinfo)
1216
1217         if tarinfo.isdir():
1218             self.addfile(tarinfo)
1219             if recursive:
1220                 for f in os.listdir(name):
1221                     self.add(os.path.join(name, f), os.path.join(arcname, f))
1222
1223     def addfile(self, tarinfo, fileobj=None):
1224         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1225            given, tarinfo.size bytes are read from it and added to the archive.
1226            You can create TarInfo objects using gettarinfo().
1227            On Windows platforms, `fileobj' should always be opened with mode
1228            'rb' to avoid irritation about the file size.
1229         """
1230         self._check("aw")
1231
1232         tarinfo.name = normpath(tarinfo.name)
1233         if tarinfo.isdir():
1234             # directories should end with '/'
1235             tarinfo.name += "/"
1236
1237         if tarinfo.linkname:
1238             tarinfo.linkname = normpath(tarinfo.linkname)
1239
1240         if tarinfo.size > MAXSIZE_MEMBER:
1241             raise ValueError, "file is too large (>8GB)"
1242
1243         if len(tarinfo.linkname) > LENGTH_LINK:
1244             if self.posix:
1245                 raise ValueError, "linkname is too long (>%d)" \
1246                                   % (LENGTH_LINK)
1247             else:
1248                 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1249                 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1250                 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1251
1252         if len(tarinfo.name) > LENGTH_NAME:
1253             if self.posix:
1254                 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1255                 while prefix and prefix[-1] != "/":
1256                     prefix = prefix[:-1]
1257
1258                 name = tarinfo.name[len(prefix):]
1259                 prefix = prefix[:-1]
1260
1261                 if not prefix or len(name) > LENGTH_NAME:
1262                     raise ValueError, "name is too long (>%d)" \
1263                                       % (LENGTH_NAME)
1264
1265                 tarinfo.name   = name
1266                 tarinfo.prefix = prefix
1267             else:
1268                 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1269                 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1270                 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1271
1272         self.fileobj.write(tarinfo.tobuf())
1273         self.offset += BLOCKSIZE
1274
1275         # If there's data to follow, append it.
1276         if fileobj is not None:
1277             copyfileobj(fileobj, self.fileobj, tarinfo.size)
1278             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1279             if remainder > 0:
1280                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1281                 blocks += 1
1282             self.offset += blocks * BLOCKSIZE
1283
1284         self.members.append(tarinfo)
1285         self.membernames.append(tarinfo.name)
1286         self.chunks.append(self.offset)
1287
1288     def extract(self, member, path=""):
1289         """Extract a member from the archive to the current working directory,
1290            using its full name. Its file information is extracted as accurately
1291            as possible. `member' may be a filename or a TarInfo object. You can
1292            specify a different directory using `path'.
1293         """
1294         self._check("r")
1295
1296         if isinstance(member, TarInfo):
1297             tarinfo = member
1298         else:
1299             tarinfo = self.getmember(member)
1300
1301         try:
1302             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1303         except EnvironmentError, e:
1304             if self.errorlevel > 0:
1305                 raise
1306             else:
1307                 if e.filename is None:
1308                     self._dbg(1, "tarfile: %s" % e.strerror)
1309                 else:
1310                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1311         except ExtractError, e:
1312             if self.errorlevel > 1:
1313                 raise
1314             else:
1315                 self._dbg(1, "tarfile: %s" % e)
1316
1317     def extractfile(self, member):
1318         """Extract a member from the archive as a file object. `member' may be
1319            a filename or a TarInfo object. If `member' is a regular file, a
1320            file-like object is returned. If `member' is a link, a file-like
1321            object is constructed from the link's target. If `member' is none of
1322            the above, None is returned.
1323            The file-like object is read-only and provides the following
1324            methods: read(), readline(), readlines(), seek() and tell()
1325         """
1326         self._check("r")
1327
1328         if isinstance(member, TarInfo):
1329             tarinfo = member
1330         else:
1331             tarinfo = self.getmember(member)
1332
1333         if tarinfo.isreg():
1334             return self.fileobject(self, tarinfo)
1335
1336         elif tarinfo.type not in SUPPORTED_TYPES:
1337             # If a member's type is unknown, it is treated as a
1338             # regular file.
1339             return self.fileobject(self, tarinfo)
1340
1341         elif tarinfo.islnk() or tarinfo.issym():
1342             if isinstance(self.fileobj, _Stream):
1343                 # A small but ugly workaround for the case that someone tries
1344                 # to extract a (sym)link as a file-object from a non-seekable
1345                 # stream of tar blocks.
1346                 raise StreamError, "cannot extract (sym)link as file object"
1347             else:
1348                 # A (sym)link's file object is it's target's file object.
1349                 return self.extractfile(self._getmember(tarinfo.linkname,
1350                                                         tarinfo))
1351         else:
1352             # If there's no data associated with the member (directory, chrdev,
1353             # blkdev, etc.), return None instead of a file object.
1354             return None
1355
1356     def _extract_member(self, tarinfo, targetpath):
1357         """Extract the TarInfo object tarinfo to a physical
1358            file called targetpath.
1359         """
1360         # Fetch the TarInfo object for the given name
1361         # and build the destination pathname, replacing
1362         # forward slashes to platform specific separators.
1363         if targetpath[-1:] == "/":
1364             targetpath = targetpath[:-1]
1365         targetpath = os.path.normpath(targetpath)
1366
1367         # Create all upper directories.
1368         upperdirs = os.path.dirname(targetpath)
1369         if upperdirs and not os.path.exists(upperdirs):
1370             ti = TarInfo()
1371             ti.name  = upperdirs
1372             ti.type  = DIRTYPE
1373             ti.mode  = 0777
1374             ti.mtime = tarinfo.mtime
1375             ti.uid   = tarinfo.uid
1376             ti.gid   = tarinfo.gid
1377             ti.uname = tarinfo.uname
1378             ti.gname = tarinfo.gname
1379             try:
1380                 self._extract_member(ti, ti.name)
1381             except:
1382                 pass
1383
1384         if tarinfo.islnk() or tarinfo.issym():
1385             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1386         else:
1387             self._dbg(1, tarinfo.name)
1388
1389         if tarinfo.isreg():
1390             self.makefile(tarinfo, targetpath)
1391         elif tarinfo.isdir():
1392             self.makedir(tarinfo, targetpath)
1393         elif tarinfo.isfifo():
1394             self.makefifo(tarinfo, targetpath)
1395         elif tarinfo.ischr() or tarinfo.isblk():
1396             self.makedev(tarinfo, targetpath)
1397         elif tarinfo.islnk() or tarinfo.issym():
1398             self.makelink(tarinfo, targetpath)
1399         elif tarinfo.type not in SUPPORTED_TYPES:
1400             self.makeunknown(tarinfo, targetpath)
1401         else:
1402             self.makefile(tarinfo, targetpath)
1403
1404         self.chown(tarinfo, targetpath)
1405         if not tarinfo.issym():
1406             self.chmod(tarinfo, targetpath)
1407             self.utime(tarinfo, targetpath)
1408
1409     #--------------------------------------------------------------------------
1410     # Below are the different file methods. They are called via
1411     # _extract_member() when extract() is called. They can be replaced in a
1412     # subclass to implement other functionality.
1413
1414     def makedir(self, tarinfo, targetpath):
1415         """Make a directory called targetpath.
1416         """
1417         try:
1418             os.mkdir(targetpath)
1419         except EnvironmentError, e:
1420             if e.errno != errno.EEXIST:
1421                 raise
1422
1423     def makefile(self, tarinfo, targetpath):
1424         """Make a file called targetpath.
1425         """
1426         source = self.extractfile(tarinfo)
1427         target = file(targetpath, "wb")
1428         copyfileobj(source, target)
1429         source.close()
1430         target.close()
1431
1432     def makeunknown(self, tarinfo, targetpath):
1433         """Make a file from a TarInfo object with an unknown type
1434            at targetpath.
1435         """
1436         self.makefile(tarinfo, targetpath)
1437         self._dbg(1, "tarfile: Unknown file type %r, " \
1438                      "extracted as regular file." % tarinfo.type)
1439
1440     def makefifo(self, tarinfo, targetpath):
1441         """Make a fifo called targetpath.
1442         """
1443         if hasattr(os, "mkfifo"):
1444             os.mkfifo(targetpath)
1445         else:
1446             raise ExtractError, "fifo not supported by system"
1447
1448     def makedev(self, tarinfo, targetpath):
1449         """Make a character or block device called targetpath.
1450         """
1451         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1452             raise ExtractError, "special devices not supported by system"
1453
1454         mode = tarinfo.mode
1455         if tarinfo.isblk():
1456             mode |= stat.S_IFBLK
1457         else:
1458             mode |= stat.S_IFCHR
1459
1460         os.mknod(targetpath, mode,
1461                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
1462
1463     def makelink(self, tarinfo, targetpath):
1464         """Make a (symbolic) link called targetpath. If it cannot be created
1465           (platform limitation), we try to make a copy of the referenced file
1466           instead of a link.
1467         """
1468         linkpath = tarinfo.linkname
1469         try:
1470             if tarinfo.issym():
1471                 os.symlink(linkpath, targetpath)
1472             else:
1473                 os.link(linkpath, targetpath)
1474         except AttributeError:
1475             if tarinfo.issym():
1476                 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1477                                         linkpath)
1478                 linkpath = normpath(linkpath)
1479
1480             try:
1481                 self._extract_member(self.getmember(linkpath), targetpath)
1482             except (EnvironmentError, KeyError), e:
1483                 linkpath = os.path.normpath(linkpath)
1484                 try:
1485                     shutil.copy2(linkpath, targetpath)
1486                 except EnvironmentError, e:
1487                     raise IOError, "link could not be created"
1488
1489     def chown(self, tarinfo, targetpath):
1490         """Set owner of targetpath according to tarinfo.
1491         """
1492         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1493             # We have to be root to do so.
1494             try:
1495                 g = grp.getgrnam(tarinfo.gname)[2]
1496             except KeyError:
1497                 try:
1498                     g = grp.getgrgid(tarinfo.gid)[2]
1499                 except KeyError:
1500                     g = os.getgid()
1501             try:
1502                 u = pwd.getpwnam(tarinfo.uname)[2]
1503             except KeyError:
1504                 try:
1505                     u = pwd.getpwuid(tarinfo.uid)[2]
1506                 except KeyError:
1507                     u = os.getuid()
1508             try:
1509                 if tarinfo.issym() and hasattr(os, "lchown"):
1510                     os.lchown(targetpath, u, g)
1511                 else:
1512                     if sys.platform != "os2emx":
1513                         os.chown(targetpath, u, g)
1514             except EnvironmentError, e:
1515                 raise ExtractError, "could not change owner"
1516
1517     def chmod(self, tarinfo, targetpath):
1518         """Set file permissions of targetpath according to tarinfo.
1519         """
1520         if hasattr(os, 'chmod'):
1521             try:
1522                 os.chmod(targetpath, tarinfo.mode)
1523             except EnvironmentError, e:
1524                 raise ExtractError, "could not change mode"
1525
1526     def utime(self, tarinfo, targetpath):
1527         """Set modification time of targetpath according to tarinfo.
1528         """
1529         if not hasattr(os, 'utime'):
1530             return
1531         if sys.platform == "win32" and tarinfo.isdir():
1532             # According to msdn.microsoft.com, it is an error (EACCES)
1533             # to use utime() on directories.
1534             return
1535         try:
1536             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1537         except EnvironmentError, e:
1538             raise ExtractError, "could not change modification time"
1539
1540     #--------------------------------------------------------------------------
1541
1542     def next(self):
1543         """Return the next member of the archive as a TarInfo object, when
1544            TarFile is opened for reading. Return None if there is no more
1545            available.
1546         """
1547         self._check("ra")
1548         if self.firstmember is not None:
1549             m = self.firstmember
1550             self.firstmember = None
1551             return m
1552
1553         # Read the next block.
1554         self.fileobj.seek(self.chunks[-1])
1555         while True:
1556             buf = self.fileobj.read(BLOCKSIZE)
1557             if not buf:
1558                 return None
1559             try:
1560                 tarinfo = TarInfo.frombuf(buf)
1561             except ValueError:
1562                 if self.ignore_zeros:
1563                     if buf.count(NUL) == BLOCKSIZE:
1564                         adj = "empty"
1565                     else:
1566                         adj = "invalid"
1567                     self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1568                     self.offset += BLOCKSIZE
1569                     continue
1570                 else:
1571                     # Block is empty or unreadable.
1572                     if self.chunks[-1] == 0:
1573                         # If the first block is invalid. That does not
1574                         # look like a tar archive we can handle.
1575                         raise ReadError,"empty, unreadable or compressed file"
1576                     return None
1577             break
1578
1579         # We shouldn't rely on this checksum, because some tar programs
1580         # calculate it differently and it is merely validating the
1581         # header block. We could just as well skip this part, which would
1582         # have a slight effect on performance...
1583         if tarinfo.chksum != calc_chksum(buf):
1584             self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1585
1586         # Set the TarInfo object's offset to the current position of the
1587         # TarFile and set self.offset to the position where the data blocks
1588         # should begin.
1589         tarinfo.offset = self.offset
1590         self.offset += BLOCKSIZE
1591
1592         # Check if the TarInfo object has a typeflag for which a callback
1593         # method is registered in the TYPE_METH. If so, then call it.
1594         if tarinfo.type in self.TYPE_METH:
1595             tarinfo = self.TYPE_METH[tarinfo.type](self, tarinfo)
1596         else:
1597             tarinfo.offset_data = self.offset
1598             if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1599                 # Skip the following data blocks.
1600                 self.offset += self._block(tarinfo.size)
1601
1602         if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1603             # some old tar programs don't know DIRTYPE
1604             tarinfo.type = DIRTYPE
1605
1606         self.members.append(tarinfo)
1607         self.membernames.append(tarinfo.name)
1608         self.chunks.append(self.offset)
1609         return tarinfo
1610
1611     #--------------------------------------------------------------------------
1612     # Below are some methods which are called for special typeflags in the
1613     # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1614     # are registered in TYPE_METH below. You can register your own methods
1615     # with this mapping.
1616     # A registered method is called with a TarInfo object as only argument.
1617     #
1618     # During its execution the method MUST perform the following tasks:
1619     # 1. set tarinfo.offset_data to the position where the data blocks begin,
1620     #    if there is data to follow.
1621     # 2. set self.offset to the position where the next member's header will
1622     #    begin.
1623     # 3. return a valid TarInfo object.
1624
1625     def proc_gnulong(self, tarinfo):
1626         """Evaluate the blocks that hold a GNU longname
1627            or longlink member.
1628         """
1629         buf = ""
1630         name = None
1631         linkname = None
1632         count = tarinfo.size
1633         while count > 0:
1634             block = self.fileobj.read(BLOCKSIZE)
1635             buf += block
1636             self.offset += BLOCKSIZE
1637             count -= BLOCKSIZE
1638
1639         if tarinfo.type == GNUTYPE_LONGNAME:
1640             name = nts(buf)
1641         if tarinfo.type == GNUTYPE_LONGLINK:
1642             linkname = nts(buf)
1643
1644         buf = self.fileobj.read(BLOCKSIZE)
1645
1646         tarinfo = TarInfo.frombuf(buf)
1647         tarinfo.offset = self.offset
1648         self.offset += BLOCKSIZE
1649         tarinfo.offset_data = self.offset
1650         tarinfo.name = name or tarinfo.name
1651         tarinfo.linkname = linkname or tarinfo.linkname
1652
1653         if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1654             # Skip the following data blocks.
1655             self.offset += self._block(tarinfo.size)
1656         return tarinfo
1657
1658     def proc_sparse(self, tarinfo):
1659         """Analyze a GNU sparse header plus extra headers.
1660         """
1661         buf = tarinfo.tobuf()
1662         sp = _ringbuffer()
1663         pos = 386
1664         lastpos = 0L
1665         realpos = 0L
1666         # There are 4 possible sparse structs in the
1667         # first header.
1668         for i in xrange(4):
1669             try:
1670                 offset = int(buf[pos:pos + 12], 8)
1671                 numbytes = int(buf[pos + 12:pos + 24], 8)
1672             except ValueError:
1673                 break
1674             if offset > lastpos:
1675                 sp.append(_hole(lastpos, offset - lastpos))
1676             sp.append(_data(offset, numbytes, realpos))
1677             realpos += numbytes
1678             lastpos = offset + numbytes
1679             pos += 24
1680
1681         isextended = ord(buf[482])
1682         origsize = int(buf[483:495], 8)
1683
1684         # If the isextended flag is given,
1685         # there are extra headers to process.
1686         while isextended == 1:
1687             buf = self.fileobj.read(BLOCKSIZE)
1688             self.offset += BLOCKSIZE
1689             pos = 0
1690             for i in xrange(21):
1691                 try:
1692                     offset = int(buf[pos:pos + 12], 8)
1693                     numbytes = int(buf[pos + 12:pos + 24], 8)
1694                 except ValueError:
1695                     break
1696                 if offset > lastpos:
1697                     sp.append(_hole(lastpos, offset - lastpos))
1698                 sp.append(_data(offset, numbytes, realpos))
1699                 realpos += numbytes
1700                 lastpos = offset + numbytes
1701                 pos += 24
1702             isextended = ord(buf[504])
1703
1704         if lastpos < origsize:
1705             sp.append(_hole(lastpos, origsize - lastpos))
1706
1707         tarinfo.sparse = sp
1708
1709         tarinfo.offset_data = self.offset
1710         self.offset += self._block(tarinfo.size)
1711         tarinfo.size = origsize
1712         return tarinfo
1713
1714     # The type mapping for the next() method. The keys are single character
1715     # strings, the typeflag. The values are methods which are called when
1716     # next() encounters such a typeflag.
1717     TYPE_METH = {
1718         GNUTYPE_LONGNAME: proc_gnulong,
1719         GNUTYPE_LONGLINK: proc_gnulong,
1720         GNUTYPE_SPARSE:   proc_sparse
1721     }
1722
1723     #--------------------------------------------------------------------------
1724     # Little helper methods:
1725
1726     def _block(self, count):
1727         """Round up a byte count by BLOCKSIZE and return it,
1728            e.g. _block(834) => 1024.
1729         """
1730         blocks, remainder = divmod(count, BLOCKSIZE)
1731         if remainder:
1732             blocks += 1
1733         return blocks * BLOCKSIZE
1734
1735     def _getmember(self, name, tarinfo=None):
1736         """Find an archive member by name from bottom to top.
1737            If tarinfo is given, it is used as the starting point.
1738         """
1739         if tarinfo is None:
1740             end = len(self.members)
1741         else:
1742             end = self.members.index(tarinfo)
1743
1744         for i in xrange(end - 1, -1, -1):
1745             if name == self.membernames[i]:
1746                 return self.members[i]
1747
1748     def _load(self):
1749         """Read through the entire archive file and look for readable
1750            members.
1751         """
1752         while True:
1753             tarinfo = self.next()
1754             if tarinfo is None:
1755                 break
1756         self._loaded = True
1757
1758     def _check(self, mode=None):
1759         """Check if TarFile is still open, and if the operation's mode
1760            corresponds to TarFile's mode.
1761         """
1762         if self.closed:
1763             raise IOError, "%s is closed" % self.__class__.__name__
1764         if mode is not None and self._mode not in mode:
1765             raise IOError, "bad operation for mode %r" % self._mode
1766
1767     def __iter__(self):
1768         """Provide an iterator object.
1769         """
1770         if self._loaded:
1771             return iter(self.members)
1772         else:
1773             return TarIter(self)
1774
1775     def _create_gnulong(self, name, type):
1776         """Write a GNU longname/longlink member to the TarFile.
1777            It consists of an extended tar header, with the length
1778            of the longname as size, followed by data blocks,
1779            which contain the longname as a null terminated string.
1780         """
1781         tarinfo = TarInfo()
1782         tarinfo.name = "././@LongLink"
1783         tarinfo.type = type
1784         tarinfo.mode = 0
1785         tarinfo.size = len(name)
1786
1787         # write extended header
1788         self.fileobj.write(tarinfo.tobuf())
1789         # write name blocks
1790         self.fileobj.write(name)
1791         blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1792         if remainder > 0:
1793             self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1794             blocks += 1
1795         self.offset += blocks * BLOCKSIZE
1796
1797     def _dbg(self, level, msg):
1798         """Write debugging output to sys.stderr.
1799         """
1800         if level <= self.debug:
1801             print >> sys.stderr, msg
1802 # class TarFile
1803
1804 class TarIter:
1805     """Iterator Class.
1806
1807        for tarinfo in TarFile(...):
1808            suite...
1809     """
1810
1811     def __init__(self, tarfile):
1812         """Construct a TarIter object.
1813         """
1814         self.tarfile = tarfile
1815     def __iter__(self):
1816         """Return iterator object.
1817         """
1818         return self
1819     def next(self):
1820         """Return the next item using TarFile's next() method.
1821            When all members have been read, set TarFile as _loaded.
1822         """
1823         tarinfo = self.tarfile.next()
1824         if not tarinfo:
1825             self.tarfile._loaded = True
1826             raise StopIteration
1827         return tarinfo
1828
1829 # Helper classes for sparse file support
1830 class _section:
1831     """Base class for _data and _hole.
1832     """
1833     def __init__(self, offset, size):
1834         self.offset = offset
1835         self.size = size
1836     def __contains__(self, offset):
1837         return self.offset <= offset < self.offset + self.size
1838
1839 class _data(_section):
1840     """Represent a data section in a sparse file.
1841     """
1842     def __init__(self, offset, size, realpos):
1843         _section.__init__(self, offset, size)
1844         self.realpos = realpos
1845
1846 class _hole(_section):
1847     """Represent a hole section in a sparse file.
1848     """
1849     pass
1850
1851 class _ringbuffer(list):
1852     """Ringbuffer class which increases performance
1853        over a regular list.
1854     """
1855     def __init__(self):
1856         self.idx = 0
1857     def find(self, offset):
1858         idx = self.idx
1859         while True:
1860             item = self[idx]
1861             if offset in item:
1862                 break
1863             idx += 1
1864             if idx == len(self):
1865                 idx = 0
1866             if idx == self.idx:
1867                 # End of File
1868                 return None
1869         self.idx = idx
1870         return item
1871
1872 #---------------------------------------------
1873 # zipfile compatible TarFile class
1874 #---------------------------------------------
1875 TAR_PLAIN = 0           # zipfile.ZIP_STORED
1876 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
1877 class TarFileCompat:
1878     """TarFile class compatible with standard module zipfile's
1879        ZipFile class.
1880     """
1881     def __init__(self, file, mode="r", compression=TAR_PLAIN):
1882         if compression == TAR_PLAIN:
1883             self.tarfile = TarFile.taropen(file, mode)
1884         elif compression == TAR_GZIPPED:
1885             self.tarfile = TarFile.gzopen(file, mode)
1886         else:
1887             raise ValueError, "unknown compression constant"
1888         if mode[0:1] == "r":
1889             members = self.tarfile.getmembers()
1890             for i in xrange(len(members)):
1891                 m = members[i]
1892                 m.filename = m.name
1893                 m.file_size = m.size
1894                 m.date_time = time.gmtime(m.mtime)[:6]
1895     def namelist(self):
1896         return map(lambda m: m.name, self.infolist())
1897     def infolist(self):
1898         return filter(lambda m: m.type in REGULAR_TYPES,
1899                       self.tarfile.getmembers())
1900     def printdir(self):
1901         self.tarfile.list()
1902     def testzip(self):
1903         return
1904     def getinfo(self, name):
1905         return self.tarfile.getmember(name)
1906     def read(self, name):
1907         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1908     def write(self, filename, arcname=None, compress_type=None):
1909         self.tarfile.add(filename, arcname)
1910     def writestr(self, zinfo, bytes):
1911         import StringIO
1912         import calendar
1913         zinfo.name = zinfo.filename
1914         zinfo.size = zinfo.file_size
1915         zinfo.mtime = calendar.timegm(zinfo.date_time)
1916         self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1917     def close(self):
1918         self.tarfile.close()
1919 #class TarFileCompat
1920
1921 #--------------------
1922 # exported functions
1923 #--------------------
1924 def is_tarfile(name):
1925     """Return True if name points to a tar archive that we
1926        are able to handle, else return False.
1927     """
1928     try:
1929         t = open(name)
1930         t.close()
1931         return True
1932     except TarError:
1933         return False
1934
1935 open = TarFile.open