Lib/tarfile.py

   1 #!/usr/bin/env python
   2 #-------------------------------------------------------------------
   3 # tarfile.py
   4 #-------------------------------------------------------------------
   5 # Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
   6 # All rights reserved.
   7 #
   8 # Permission  is  hereby granted,  free  of charge,  to  any person
   9 # obtaining a  copy of  this software  and associated documentation
  10 # files  (the  "Software"),  to   deal  in  the  Software   without
  11 # restriction,  including  without limitation  the  rights to  use,
  12 # copy, modify, merge, publish, distribute, sublicense, and/or sell
  13 # copies  of  the  Software,  and to  permit  persons  to  whom the
  14 # Software  is  furnished  to  do  so,  subject  to  the  following
  15 # conditions:
  16 #
  17 # The above copyright  notice and this  permission notice shall  be
  18 # included in all copies or substantial portions of the Software.
  19 #
  20 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
  21 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
  22 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
  23 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
  24 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
  25 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
  26 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  27 # OTHER DEALINGS IN THE SOFTWARE.
  28 #
  29 """Read from and write to tar format archives.
  30 """
  31
  32 __version__ = "$Revision$"
  33
  34 version     = "0.9.0"
  35 __author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"
  36 __date__    = "$Date$"
  37 __cvsid__   = "$Id$"
  38 __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
  39
  40 #---------
  41 # Imports
  42 #---------
  43 import sys
  44 import os
  45 import shutil
  46 import stat
  47 import errno
  48 import time
  49 import struct
  50 import copy
  51 import re
  52
  53 if sys.platform == 'mac':
  54     # This module needs work for MacOS9, especially in the area of pathname
  55     # handling. In many places it is assumed a simple substitution of / by the
  56     # local os.path.sep is good enough to convert pathnames, but this does not
  57     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
  58     raise ImportError("tarfile does not work for platform==mac")
  59
  60 try:
  61     import grp, pwd
  62 except ImportError:
  63     grp = pwd = None
  64
  65 # from tarfile import *
  66 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
  67
  68 from builtins import open as _open # Since 'open' is TarFile.open
  69
  70 #---------------------------------------------------------
  71 # tar constants
  72 #---------------------------------------------------------
  73 NUL = b"\0"                     # the null character
  74 BLOCKSIZE = 512                 # length of processing blocks
  75 RECORDSIZE = BLOCKSIZE * 20     # length of records
  76 GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
  77 POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
  78
  79 LENGTH_NAME = 100               # maximum length of a filename
  80 LENGTH_LINK = 100               # maximum length of a linkname
  81 LENGTH_PREFIX = 155             # maximum length of the prefix field
  82
  83 REGTYPE = b"0"                  # regular file
  84 AREGTYPE = b"\0"                # regular file
  85 LNKTYPE = b"1"                  # link (inside tarfile)
  86 SYMTYPE = b"2"                  # symbolic link
  87 CHRTYPE = b"3"                  # character special device
  88 BLKTYPE = b"4"                  # block special device
  89 DIRTYPE = b"5"                  # directory
  90 FIFOTYPE = b"6"                 # fifo special device
  91 CONTTYPE = b"7"                 # contiguous file
  92
  93 GNUTYPE_LONGNAME = b"L"         # GNU tar longname
  94 GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
  95 GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
  96
  97 XHDTYPE = b"x"                  # POSIX.1-2001 extended header
  98 XGLTYPE = b"g"                  # POSIX.1-2001 global header
  99 SOLARIS_XHDTYPE = b"X"          # Solaris extended header
 100
 101 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
 102 GNU_FORMAT = 1                  # GNU tar format
 103 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
 104 DEFAULT_FORMAT = GNU_FORMAT
 105
 106 #---------------------------------------------------------
 107 # tarfile constants
 108 #---------------------------------------------------------
 109 # File types that tarfile supports:
 110 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
 111                    SYMTYPE, DIRTYPE, FIFOTYPE,
 112                    CONTTYPE, CHRTYPE, BLKTYPE,
 113                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 114                    GNUTYPE_SPARSE)
 115
 116 # File types that will be treated as a regular file.
 117 REGULAR_TYPES = (REGTYPE, AREGTYPE,
 118                  CONTTYPE, GNUTYPE_SPARSE)
 119
 120 # File types that are part of the GNU tar format.
 121 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 122              GNUTYPE_SPARSE)
 123
 124 # Fields from a pax header that override a TarInfo attribute.
 125 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
 126               "uid", "gid", "uname", "gname")
 127
 128 # Fields in a pax header that are numbers, all other fields
 129 # are treated as strings.
 130 PAX_NUMBER_FIELDS = {
 131     "atime": float,
 132     "ctime": float,
 133     "mtime": float,
 134     "uid": int,
 135     "gid": int,
 136     "size": int
 137 }
 138
 139 #---------------------------------------------------------
 140 # Bits used in the mode field, values in octal.
 141 #---------------------------------------------------------
 142 S_IFLNK = 0o120000        # symbolic link
 143 S_IFREG = 0o100000        # regular file
 144 S_IFBLK = 0o060000        # block device
 145 S_IFDIR = 0o040000        # directory
 146 S_IFCHR = 0o020000        # character device
 147 S_IFIFO = 0o010000        # fifo
 148
 149 TSUID   = 0o4000          # set UID on execution
 150 TSGID   = 0o2000          # set GID on execution
 151 TSVTX   = 0o1000          # reserved
 152
 153 TUREAD  = 0o400           # read by owner
 154 TUWRITE = 0o200           # write by owner
 155 TUEXEC  = 0o100           # execute/search by owner
 156 TGREAD  = 0o040           # read by group
 157 TGWRITE = 0o020           # write by group
 158 TGEXEC  = 0o010           # execute/search by group
 159 TOREAD  = 0o004           # read by other
 160 TOWRITE = 0o002           # write by other
 161 TOEXEC  = 0o001           # execute/search by other
 162
 163 #---------------------------------------------------------
 164 # initialization
 165 #---------------------------------------------------------
 166 ENCODING = sys.getfilesystemencoding()
 167 if ENCODING is None:
 168     ENCODING = "ascii"
 169
 170 #---------------------------------------------------------
 171 # Some useful functions
 172 #---------------------------------------------------------
 173
 174 def stn(s, length, encoding, errors):
 175     """Convert a string to a null-terminated bytes object.
 176     """
 177     s = s.encode(encoding, errors)
 178     return s[:length] + (length - len(s)) * NUL
 179
 180 def nts(s, encoding, errors):
 181     """Convert a null-terminated bytes object to a string.
 182     """
 183     p = s.find(b"\0")
 184     if p != -1:
 185         s = s[:p]
 186     return s.decode(encoding, errors)
 187
 188 def nti(s):
 189     """Convert a number field to a python number.
 190     """
 191     # There are two possible encodings for a number field, see
 192     # itn() below.
 193     if s[0] != chr(0o200):
 194         try:
 195             n = int(nts(s, "ascii", "strict") or "0", 8)
 196         except ValueError:
 197             raise HeaderError("invalid header")
 198     else:
 199         n = 0
 200         for i in range(len(s) - 1):
 201             n <<= 8
 202             n += ord(s[i + 1])
 203     return n
 204
 205 def itn(n, digits=8, format=DEFAULT_FORMAT):
 206     """Convert a python number to a number field.
 207     """
 208     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
 209     # octal digits followed by a null-byte, this allows values up to
 210     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
 211     # that if necessary. A leading 0o200 byte indicates this particular
 212     # encoding, the following digits-1 bytes are a big-endian
 213     # representation. This allows values up to (256**(digits-1))-1.
 214     if 0 <= n < 8 ** (digits - 1):
 215         s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
 216     else:
 217         if format != GNU_FORMAT or n >= 256 ** (digits - 1):
 218             raise ValueError("overflow in number field")
 219
 220         if n < 0:
 221             # XXX We mimic GNU tar's behaviour with negative numbers,
 222             # this could raise OverflowError.
 223             n = struct.unpack("L", struct.pack("l", n))[0]
 224
 225         s = bytearray()
 226         for i in range(digits - 1):
 227             s.insert(0, n & 0o377)
 228             n >>= 8
 229         s.insert(0, 0o200)
 230     return s
 231
 232 def calc_chksums(buf):
 233     """Calculate the checksum for a member's header by summing up all
 234        characters except for the chksum field which is treated as if
 235        it was filled with spaces. According to the GNU tar sources,
 236        some tars (Sun and NeXT) calculate chksum with signed char,
 237        which will be different if there are chars in the buffer with
 238        the high bit set. So we calculate two checksums, unsigned and
 239        signed.
 240     """
 241     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
 242     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
 243     return unsigned_chksum, signed_chksum
 244
 245 def copyfileobj(src, dst, length=None):
 246     """Copy length bytes from fileobj src to fileobj dst.
 247        If length is None, copy the entire content.
 248     """
 249     if length == 0:
 250         return
 251     if length is None:
 252         shutil.copyfileobj(src, dst)
 253         return
 254
 255     BUFSIZE = 16 * 1024
 256     blocks, remainder = divmod(length, BUFSIZE)
 257     for b in range(blocks):
 258         buf = src.read(BUFSIZE)
 259         if len(buf) < BUFSIZE:
 260             raise IOError("end of file reached")
 261         dst.write(buf)
 262
 263     if remainder != 0:
 264         buf = src.read(remainder)
 265         if len(buf) < remainder:
 266             raise IOError("end of file reached")
 267         dst.write(buf)
 268     return
 269
 270 filemode_table = (
 271     ((S_IFLNK,      "l"),
 272      (S_IFREG,      "-"),
 273      (S_IFBLK,      "b"),
 274      (S_IFDIR,      "d"),
 275      (S_IFCHR,      "c"),
 276      (S_IFIFO,      "p")),
 277
 278     ((TUREAD,       "r"),),
 279     ((TUWRITE,      "w"),),
 280     ((TUEXEC|TSUID, "s"),
 281      (TSUID,        "S"),
 282      (TUEXEC,       "x")),
 283
 284     ((TGREAD,       "r"),),
 285     ((TGWRITE,      "w"),),
 286     ((TGEXEC|TSGID, "s"),
 287      (TSGID,        "S"),
 288      (TGEXEC,       "x")),
 289
 290     ((TOREAD,       "r"),),
 291     ((TOWRITE,      "w"),),
 292     ((TOEXEC|TSVTX, "t"),
 293      (TSVTX,        "T"),
 294      (TOEXEC,       "x"))
 295 )
 296
 297 def filemode(mode):
 298     """Convert a file's mode to a string of the form
 299        -rwxrwxrwx.
 300        Used by TarFile.list()
 301     """
 302     perm = []
 303     for table in filemode_table:
 304         for bit, char in table:
 305             if mode & bit == bit:
 306                 perm.append(char)
 307                 break
 308         else:
 309             perm.append("-")
 310     return "".join(perm)
 311
 312 if os.sep != "/":
 313     normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
 314 else:
 315     normpath = os.path.normpath
 316
 317 class TarError(Exception):
 318     """Base exception."""
 319     pass
 320 class ExtractError(TarError):
 321     """General exception for extract errors."""
 322     pass
 323 class ReadError(TarError):
 324     """Exception for unreadble tar archives."""
 325     pass
 326 class CompressionError(TarError):
 327     """Exception for unavailable compression methods."""
 328     pass
 329 class StreamError(TarError):
 330     """Exception for unsupported operations on stream-like TarFiles."""
 331     pass
 332 class HeaderError(TarError):
 333     """Exception for invalid headers."""
 334     pass
 335
 336 #---------------------------
 337 # internal stream interface
 338 #---------------------------
 339 class _LowLevelFile:
 340     """Low-level file object. Supports reading and writing.
 341        It is used instead of a regular file object for streaming
 342        access.
 343     """
 344
 345     def __init__(self, name, mode):
 346         mode = {
 347             "r": os.O_RDONLY,
 348             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
 349         }[mode]
 350         if hasattr(os, "O_BINARY"):
 351             mode |= os.O_BINARY
 352         self.fd = os.open(name, mode)
 353
 354     def close(self):
 355         os.close(self.fd)
 356
 357     def read(self, size):
 358         return os.read(self.fd, size)
 359
 360     def write(self, s):
 361         os.write(self.fd, s)
 362
 363 class _Stream:
 364     """Class that serves as an adapter between TarFile and
 365        a stream-like object.  The stream-like object only
 366        needs to have a read() or write() method and is accessed
 367        blockwise.  Use of gzip or bzip2 compression is possible.
 368        A stream-like object could be for example: sys.stdin,
 369        sys.stdout, a socket, a tape device etc.
 370
 371        _Stream is intended to be used only internally.
 372     """
 373
 374     def __init__(self, name, mode, comptype, fileobj, bufsize):
 375         """Construct a _Stream object.
 376         """
 377         self._extfileobj = True
 378         if fileobj is None:
 379             fileobj = _LowLevelFile(name, mode)
 380             self._extfileobj = False
 381
 382         if comptype == '*':
 383             # Enable transparent compression detection for the
 384             # stream interface
 385             fileobj = _StreamProxy(fileobj)
 386             comptype = fileobj.getcomptype()
 387
 388         self.name     = name or ""
 389         self.mode     = mode
 390         self.comptype = comptype
 391         self.fileobj  = fileobj
 392         self.bufsize  = bufsize
 393         self.buf      = b""
 394         self.pos      = 0
 395         self.closed   = False
 396
 397         if comptype == "gz":
 398             try:
 399                 import zlib
 400             except ImportError:
 401                 raise CompressionError("zlib module is not available")
 402             self.zlib = zlib
 403             self.crc = zlib.crc32("")
 404             if mode == "r":
 405                 self._init_read_gz()
 406             else:
 407                 self._init_write_gz()
 408
 409         if comptype == "bz2":
 410             try:
 411                 import bz2
 412             except ImportError:
 413                 raise CompressionError("bz2 module is not available")
 414             if mode == "r":
 415                 self.dbuf = b""
 416                 self.cmp = bz2.BZ2Decompressor()
 417             else:
 418                 self.cmp = bz2.BZ2Compressor()
 419
 420     def __del__(self):
 421         if hasattr(self, "closed") and not self.closed:
 422             self.close()
 423
 424     def _init_write_gz(self):
 425         """Initialize for writing with gzip compression.
 426         """
 427         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
 428                                             -self.zlib.MAX_WBITS,
 429                                             self.zlib.DEF_MEM_LEVEL,
 430                                             0)
 431         timestamp = struct.pack("<L", int(time.time()))
 432         self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
 433         if self.name.endswith(".gz"):
 434             self.name = self.name[:-3]
 435         # RFC1952 says we must use ISO-8859-1 for the FNAME field.
 436         self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
 437
 438     def write(self, s):
 439         """Write string s to the stream.
 440         """
 441         if self.comptype == "gz":
 442             self.crc = self.zlib.crc32(s, self.crc)
 443         self.pos += len(s)
 444         if self.comptype != "tar":
 445             s = self.cmp.compress(s)
 446         self.__write(s)
 447
 448     def __write(self, s):
 449         """Write string s to the stream if a whole new block
 450            is ready to be written.
 451         """
 452         self.buf += s
 453         while len(self.buf) > self.bufsize:
 454             self.fileobj.write(self.buf[:self.bufsize])
 455             self.buf = self.buf[self.bufsize:]
 456
 457     def close(self):
 458         """Close the _Stream object. No operation should be
 459            done on it afterwards.
 460         """
 461         if self.closed:
 462             return
 463
 464         if self.mode == "w" and self.comptype != "tar":
 465             self.buf += self.cmp.flush()
 466
 467         if self.mode == "w" and self.buf:
 468             self.fileobj.write(self.buf)
 469             self.buf = b""
 470             if self.comptype == "gz":
 471                 # The native zlib crc is an unsigned 32-bit integer, but
 472                 # the Python wrapper implicitly casts that to a signed C
 473                 # long.  So, on a 32-bit box self.crc may "look negative",
 474                 # while the same crc on a 64-bit box may "look positive".
 475                 # To avoid irksome warnings from the `struct` module, force
 476                 # it to look positive on all boxes.
 477                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
 478                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
 479
 480         if not self._extfileobj:
 481             self.fileobj.close()
 482
 483         self.closed = True
 484
 485     def _init_read_gz(self):
 486         """Initialize for reading a gzip compressed fileobj.
 487         """
 488         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
 489         self.dbuf = b""
 490
 491         # taken from gzip.GzipFile with some alterations
 492         if self.__read(2) != b"\037\213":
 493             raise ReadError("not a gzip file")
 494         if self.__read(1) != b"\010":
 495             raise CompressionError("unsupported compression method")
 496
 497         flag = ord(self.__read(1))
 498         self.__read(6)
 499
 500         if flag & 4:
 501             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
 502             self.read(xlen)
 503         if flag & 8:
 504             while True:
 505                 s = self.__read(1)
 506                 if not s or s == NUL:
 507                     break
 508         if flag & 16:
 509             while True:
 510                 s = self.__read(1)
 511                 if not s or s == NUL:
 512                     break
 513         if flag & 2:
 514             self.__read(2)
 515
 516     def tell(self):
 517         """Return the stream's file pointer position.
 518         """
 519         return self.pos
 520
 521     def seek(self, pos=0):
 522         """Set the stream's file pointer to pos. Negative seeking
 523            is forbidden.
 524         """
 525         if pos - self.pos >= 0:
 526             blocks, remainder = divmod(pos - self.pos, self.bufsize)
 527             for i in range(blocks):
 528                 self.read(self.bufsize)
 529             self.read(remainder)
 530         else:
 531             raise StreamError("seeking backwards is not allowed")
 532         return self.pos
 533
 534     def read(self, size=None):
 535         """Return the next size number of bytes from the stream.
 536            If size is not defined, return all bytes of the stream
 537            up to EOF.
 538         """
 539         if size is None:
 540             t = []
 541             while True:
 542                 buf = self._read(self.bufsize)
 543                 if not buf:
 544                     break
 545                 t.append(buf)
 546             buf = "".join(t)
 547         else:
 548             buf = self._read(size)
 549         self.pos += len(buf)
 550         return buf
 551
 552     def _read(self, size):
 553         """Return size bytes from the stream.
 554         """
 555         if self.comptype == "tar":
 556             return self.__read(size)
 557
 558         c = len(self.dbuf)
 559         while c < size:
 560             buf = self.__read(self.bufsize)
 561             if not buf:
 562                 break
 563             try:
 564                 buf = self.cmp.decompress(buf)
 565             except IOError:
 566                 raise ReadError("invalid compressed data")
 567             self.dbuf += buf
 568             c += len(buf)
 569         buf = self.dbuf[:size]
 570         self.dbuf = self.dbuf[size:]
 571         return buf
 572
 573     def __read(self, size):
 574         """Return size bytes from stream. If internal buffer is empty,
 575            read another block from the stream.
 576         """
 577         c = len(self.buf)
 578         while c < size:
 579             buf = self.fileobj.read(self.bufsize)
 580             if not buf:
 581                 break
 582             self.buf += buf
 583             c += len(buf)
 584         buf = self.buf[:size]
 585         self.buf = self.buf[size:]
 586         return buf
 587 # class _Stream
 588
 589 class _StreamProxy(object):
 590     """Small proxy class that enables transparent compression
 591        detection for the Stream interface (mode 'r|*').
 592     """
 593
 594     def __init__(self, fileobj):
 595         self.fileobj = fileobj
 596         self.buf = self.fileobj.read(BLOCKSIZE)
 597
 598     def read(self, size):
 599         self.read = self.fileobj.read
 600         return self.buf
 601
 602     def getcomptype(self):
 603         if self.buf.startswith(b"\037\213\010"):
 604             return "gz"
 605         if self.buf.startswith(b"BZh91"):
 606             return "bz2"
 607         return "tar"
 608
 609     def close(self):
 610         self.fileobj.close()
 611 # class StreamProxy
 612
 613 class _BZ2Proxy(object):
 614     """Small proxy class that enables external file object
 615        support for "r:bz2" and "w:bz2" modes. This is actually
 616        a workaround for a limitation in bz2 module's BZ2File
 617        class which (unlike gzip.GzipFile) has no support for
 618        a file object argument.
 619     """
 620
 621     blocksize = 16 * 1024
 622
 623     def __init__(self, fileobj, mode):
 624         self.fileobj = fileobj
 625         self.mode = mode
 626         self.name = getattr(self.fileobj, "name", None)
 627         self.init()
 628
 629     def init(self):
 630         import bz2
 631         self.pos = 0
 632         if self.mode == "r":
 633             self.bz2obj = bz2.BZ2Decompressor()
 634             self.fileobj.seek(0)
 635             self.buf = b""
 636         else:
 637             self.bz2obj = bz2.BZ2Compressor()
 638
 639     def read(self, size):
 640         x = len(self.buf)
 641         while x < size:
 642             try:
 643                 raw = self.fileobj.read(self.blocksize)
 644                 data = self.bz2obj.decompress(raw)
 645                 self.buf += data
 646             except EOFError:
 647                 break
 648             x += len(data)
 649
 650         buf = self.buf[:size]
 651         self.buf = self.buf[size:]
 652         self.pos += len(buf)
 653         return buf
 654
 655     def seek(self, pos):
 656         if pos < self.pos:
 657             self.init()
 658         self.read(pos - self.pos)
 659
 660     def tell(self):
 661         return self.pos
 662
 663     def write(self, data):
 664         self.pos += len(data)
 665         raw = self.bz2obj.compress(data)
 666         self.fileobj.write(raw)
 667
 668     def close(self):
 669         if self.mode == "w":
 670             raw = self.bz2obj.flush()
 671             self.fileobj.write(raw)
 672         self.fileobj.close()
 673 # class _BZ2Proxy
 674
 675 #------------------------
 676 # Extraction file object
 677 #------------------------
 678 class _FileInFile(object):
 679     """A thin wrapper around an existing file object that
 680        provides a part of its data as an individual file
 681        object.
 682     """
 683
 684     def __init__(self, fileobj, offset, size, sparse=None):
 685         self.fileobj = fileobj
 686         self.offset = offset
 687         self.size = size
 688         self.sparse = sparse
 689         self.position = 0
 690
 691     def seekable(self):
 692         if not hasattr(self.fileobj, "seekable"):
 693             # XXX gzip.GzipFile and bz2.BZ2File
 694             return True
 695         return self.fileobj.seekable()
 696
 697     def tell(self):
 698         """Return the current file position.
 699         """
 700         return self.position
 701
 702     def seek(self, position):
 703         """Seek to a position in the file.
 704         """
 705         self.position = position
 706
 707     def read(self, size=None):
 708         """Read data from the file.
 709         """
 710         if size is None:
 711             size = self.size - self.position
 712         else:
 713             size = min(size, self.size - self.position)
 714
 715         if self.sparse is None:
 716             return self.readnormal(size)
 717         else:
 718             return self.readsparse(size)
 719
 720     def readnormal(self, size):
 721         """Read operation for regular files.
 722         """
 723         self.fileobj.seek(self.offset + self.position)
 724         self.position += size
 725         return self.fileobj.read(size)
 726
 727     def readsparse(self, size):
 728         """Read operation for sparse files.
 729         """
 730         data = b""
 731         while size > 0:
 732             buf = self.readsparsesection(size)
 733             if not buf:
 734                 break
 735             size -= len(buf)
 736             data += buf
 737         return data
 738
 739     def readsparsesection(self, size):
 740         """Read a single section of a sparse file.
 741         """
 742         section = self.sparse.find(self.position)
 743
 744         if section is None:
 745             return b""
 746
 747         size = min(size, section.offset + section.size - self.position)
 748
 749         if isinstance(section, _data):
 750             realpos = section.realpos + self.position - section.offset
 751             self.fileobj.seek(self.offset + realpos)
 752             self.position += size
 753             return self.fileobj.read(size)
 754         else:
 755             self.position += size
 756             return NUL * size
 757 #class _FileInFile
 758
 759
 760 class ExFileObject(object):
 761     """File-like object for reading an archive member.
 762        Is returned by TarFile.extractfile().
 763     """
 764     blocksize = 1024
 765
 766     def __init__(self, tarfile, tarinfo):
 767         self.fileobj = _FileInFile(tarfile.fileobj,
 768                                    tarinfo.offset_data,
 769                                    tarinfo.size,
 770                                    tarinfo.sparse)
 771         self.name = tarinfo.name
 772         self.mode = "r"
 773         self.closed = False
 774         self.size = tarinfo.size
 775
 776         self.position = 0
 777         self.buffer = b""
 778
 779     def readable(self):
 780         return True
 781
 782     def writable(self):
 783         return False
 784
 785     def seekable(self):
 786         return self.fileobj.seekable()
 787
 788     def read(self, size=None):
 789         """Read at most size bytes from the file. If size is not
 790            present or None, read all data until EOF is reached.
 791         """
 792         if self.closed:
 793             raise ValueError("I/O operation on closed file")
 794
 795         buf = b""
 796         if self.buffer:
 797             if size is None:
 798                 buf = self.buffer
 799                 self.buffer = b""
 800             else:
 801                 buf = self.buffer[:size]
 802                 self.buffer = self.buffer[size:]
 803
 804         if size is None:
 805             buf += self.fileobj.read()
 806         else:
 807             buf += self.fileobj.read(size - len(buf))
 808
 809         self.position += len(buf)
 810         return buf
 811
 812     # XXX TextIOWrapper uses the read1() method.
 813     read1 = read
 814
 815     def readline(self, size=-1):
 816         """Read one entire line from the file. If size is present
 817            and non-negative, return a string with at most that
 818            size, which may be an incomplete line.
 819         """
 820         if self.closed:
 821             raise ValueError("I/O operation on closed file")
 822
 823         pos = self.buffer.find(b"\n") + 1
 824         if pos == 0:
 825             # no newline found.
 826             while True:
 827                 buf = self.fileobj.read(self.blocksize)
 828                 self.buffer += buf
 829                 if not buf or b"\n" in buf:
 830                     pos = self.buffer.find(b"\n") + 1
 831                     if pos == 0:
 832                         # no newline found.
 833                         pos = len(self.buffer)
 834                     break
 835
 836         if size != -1:
 837             pos = min(size, pos)
 838
 839         buf = self.buffer[:pos]
 840         self.buffer = self.buffer[pos:]
 841         self.position += len(buf)
 842         return buf
 843
 844     def readlines(self):
 845         """Return a list with all remaining lines.
 846         """
 847         result = []
 848         while True:
 849             line = self.readline()
 850             if not line: break
 851             result.append(line)
 852         return result
 853
 854     def tell(self):
 855         """Return the current file position.
 856         """
 857         if self.closed:
 858             raise ValueError("I/O operation on closed file")
 859
 860         return self.position
 861
 862     def seek(self, pos, whence=os.SEEK_SET):
 863         """Seek to a position in the file.
 864         """
 865         if self.closed:
 866             raise ValueError("I/O operation on closed file")
 867
 868         if whence == os.SEEK_SET:
 869             self.position = min(max(pos, 0), self.size)
 870         elif whence == os.SEEK_CUR:
 871             if pos < 0:
 872                 self.position = max(self.position + pos, 0)
 873             else:
 874                 self.position = min(self.position + pos, self.size)
 875         elif whence == os.SEEK_END:
 876             self.position = max(min(self.size + pos, self.size), 0)
 877         else:
 878             raise ValueError("Invalid argument")
 879
 880         self.buffer = b""
 881         self.fileobj.seek(self.position)
 882
 883     def close(self):
 884         """Close the file object.
 885         """
 886         self.closed = True
 887
 888     def __iter__(self):
 889         """Get an iterator over the file's lines.
 890         """
 891         while True:
 892             line = self.readline()
 893             if not line:
 894                 break
 895             yield line
 896 #class ExFileObject
 897
 898 #------------------
 899 # Exported Classes
 900 #------------------
 901 class TarInfo(object):
 902     """Informational class which holds the details about an
 903        archive member given by a tar header block.
 904        TarInfo objects are returned by TarFile.getmember(),
 905        TarFile.getmembers() and TarFile.gettarinfo() and are
 906        usually created internally.
 907     """
 908
 909     __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
 910                  "chksum", "type", "linkname", "uname", "gname",
 911                  "devmajor", "devminor",
 912                  "offset", "offset_data", "pax_headers", "sparse",
 913                  "tarfile", "_sparse_structs", "_link_target")
 914
 915     def __init__(self, name=""):
 916         """Construct a TarInfo object. name is the optional name
 917            of the member.
 918         """
 919         self.name = name        # member name
 920         self.mode = 0o644       # file permissions
 921         self.uid = 0            # user id
 922         self.gid = 0            # group id
 923         self.size = 0           # file size
 924         self.mtime = 0          # modification time
 925         self.chksum = 0         # header checksum
 926         self.type = REGTYPE     # member type
 927         self.linkname = ""      # link name
 928         self.uname = "root"     # user name
 929         self.gname = "root"     # group name
 930         self.devmajor = 0       # device major number
 931         self.devminor = 0       # device minor number
 932
 933         self.offset = 0         # the tar header starts here
 934         self.offset_data = 0    # the file's data starts here
 935
 936         self.sparse = None      # sparse member information
 937         self.pax_headers = {}   # pax header information
 938
 939     # In pax headers the "name" and "linkname" field are called
 940     # "path" and "linkpath".
 941     def _getpath(self):
 942         return self.name
 943     def _setpath(self, name):
 944         self.name = name
 945     path = property(_getpath, _setpath)
 946
 947     def _getlinkpath(self):
 948         return self.linkname
 949     def _setlinkpath(self, linkname):
 950         self.linkname = linkname
 951     linkpath = property(_getlinkpath, _setlinkpath)
 952
 953     def __repr__(self):
 954         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 955
 956     def get_info(self):
 957         """Return the TarInfo's attributes as a dictionary.
 958         """
 959         info = {
 960             "name":     normpath(self.name),
 961             "mode":     self.mode & 0o7777,
 962             "uid":      self.uid,
 963             "gid":      self.gid,
 964             "size":     self.size,
 965             "mtime":    self.mtime,
 966             "chksum":   self.chksum,
 967             "type":     self.type,
 968             "linkname": normpath(self.linkname) if self.linkname else "",
 969             "uname":    self.uname,
 970             "gname":    self.gname,
 971             "devmajor": self.devmajor,
 972             "devminor": self.devminor
 973         }
 974
 975         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
 976             info["name"] += "/"
 977
 978         return info
 979
 980     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
 981         """Return a tar header as a string of 512 byte blocks.
 982         """
 983         info = self.get_info()
 984
 985         if format == USTAR_FORMAT:
 986             return self.create_ustar_header(info, encoding, errors)
 987         elif format == GNU_FORMAT:
 988             return self.create_gnu_header(info, encoding, errors)
 989         elif format == PAX_FORMAT:
 990             return self.create_pax_header(info)
 991         else:
 992             raise ValueError("invalid format")
 993
 994     def create_ustar_header(self, info, encoding, errors):
 995         """Return the object as a ustar header block.
 996         """
 997         info["magic"] = POSIX_MAGIC
 998
 999         if len(info["linkname"]) > LENGTH_LINK:
1000             raise ValueError("linkname is too long")
1001
1002         if len(info["name"]) > LENGTH_NAME:
1003             info["prefix"], info["name"] = self._posix_split_name(info["name"])
1004
1005         return self._create_header(info, USTAR_FORMAT, encoding, errors)
1006
1007     def create_gnu_header(self, info, encoding, errors):
1008         """Return the object as a GNU header block sequence.
1009         """
1010         info["magic"] = GNU_MAGIC
1011
1012         buf = b""
1013         if len(info["linkname"]) > LENGTH_LINK:
1014             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
1015
1016         if len(info["name"]) > LENGTH_NAME:
1017             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
1018
1019         return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1020
1021     def create_pax_header(self, info):
1022         """Return the object as a ustar header block. If it cannot be
1023            represented this way, prepend a pax extended header sequence
1024            with supplement information.
1025         """
1026         info["magic"] = POSIX_MAGIC
1027         pax_headers = self.pax_headers.copy()
1028
1029         # Test string fields for values that exceed the field length or cannot
1030         # be represented in ASCII encoding.
1031         for name, hname, length in (
1032                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1033                 ("uname", "uname", 32), ("gname", "gname", 32)):
1034
1035             if hname in pax_headers:
1036                 # The pax header has priority.
1037                 continue
1038
1039             # Try to encode the string as ASCII.
1040             try:
1041                 info[name].encode("ascii", "strict")
1042             except UnicodeEncodeError:
1043                 pax_headers[hname] = info[name]
1044                 continue
1045
1046             if len(info[name]) > length:
1047                 pax_headers[hname] = info[name]
1048
1049         # Test number fields for values that exceed the field limit or values
1050         # that like to be stored as float.
1051         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1052             if name in pax_headers:
1053                 # The pax header has priority. Avoid overflow.
1054                 info[name] = 0
1055                 continue
1056
1057             val = info[name]
1058             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1059                 pax_headers[name] = str(val)
1060                 info[name] = 0
1061
1062         # Create a pax extended header if necessary.
1063         if pax_headers:
1064             buf = self._create_pax_generic_header(pax_headers, XHDTYPE)
1065         else:
1066             buf = b""
1067
1068         return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1069
1070     @classmethod
1071     def create_pax_global_header(cls, pax_headers):
1072         """Return the object as a pax global header block sequence.
1073         """
1074         return cls._create_pax_generic_header(pax_headers, XGLTYPE)
1075
1076     def _posix_split_name(self, name):
1077         """Split a name longer than 100 chars into a prefix
1078            and a name part.
1079         """
1080         prefix = name[:LENGTH_PREFIX + 1]
1081         while prefix and prefix[-1] != "/":
1082             prefix = prefix[:-1]
1083
1084         name = name[len(prefix):]
1085         prefix = prefix[:-1]
1086
1087         if not prefix or len(name) > LENGTH_NAME:
1088             raise ValueError("name is too long")
1089         return prefix, name
1090
1091     @staticmethod
1092     def _create_header(info, format, encoding, errors):
1093         """Return a header block. info is a dictionary with file
1094            information, format must be one of the *_FORMAT constants.
1095         """
1096         parts = [
1097             stn(info.get("name", ""), 100, encoding, errors),
1098             itn(info.get("mode", 0) & 0o7777, 8, format),
1099             itn(info.get("uid", 0), 8, format),
1100             itn(info.get("gid", 0), 8, format),
1101             itn(info.get("size", 0), 12, format),
1102             itn(info.get("mtime", 0), 12, format),
1103             b"        ", # checksum field
1104             info.get("type", REGTYPE),
1105             stn(info.get("linkname", ""), 100, encoding, errors),
1106             info.get("magic", POSIX_MAGIC),
1107             stn(info.get("uname", "root"), 32, encoding, errors),
1108             stn(info.get("gname", "root"), 32, encoding, errors),
1109             itn(info.get("devmajor", 0), 8, format),
1110             itn(info.get("devminor", 0), 8, format),
1111             stn(info.get("prefix", ""), 155, encoding, errors)
1112         ]
1113
1114         buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1115         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1116         buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1117         return buf
1118
1119     @staticmethod
1120     def _create_payload(payload):
1121         """Return the string payload filled with zero bytes
1122            up to the next 512 byte border.
1123         """
1124         blocks, remainder = divmod(len(payload), BLOCKSIZE)
1125         if remainder > 0:
1126             payload += (BLOCKSIZE - remainder) * NUL
1127         return payload
1128
1129     @classmethod
1130     def _create_gnu_long_header(cls, name, type, encoding, errors):
1131         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1132            for name.
1133         """
1134         name = name.encode(encoding, errors) + NUL
1135
1136         info = {}
1137         info["name"] = "././@LongLink"
1138         info["type"] = type
1139         info["size"] = len(name)
1140         info["magic"] = GNU_MAGIC
1141
1142         # create extended header + name blocks.
1143         return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1144                 cls._create_payload(name)
1145
1146     @classmethod
1147     def _create_pax_generic_header(cls, pax_headers, type):
1148         """Return a POSIX.1-2001 extended or global header sequence
1149            that contains a list of keyword, value pairs. The values
1150            must be strings.
1151         """
1152         records = b""
1153         for keyword, value in pax_headers.items():
1154             keyword = keyword.encode("utf8")
1155             value = value.encode("utf8")
1156             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1157             n = p = 0
1158             while True:
1159                 n = l + len(str(p))
1160                 if n == p:
1161                     break
1162                 p = n
1163             records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1164
1165         # We use a hardcoded "././@PaxHeader" name like star does
1166         # instead of the one that POSIX recommends.
1167         info = {}
1168         info["name"] = "././@PaxHeader"
1169         info["type"] = type
1170         info["size"] = len(records)
1171         info["magic"] = POSIX_MAGIC
1172
1173         # Create pax header + record blocks.
1174         return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1175                 cls._create_payload(records)
1176
1177     @classmethod
1178     def frombuf(cls, buf, encoding, errors):
1179         """Construct a TarInfo object from a 512 byte bytes object.
1180         """
1181         if len(buf) != BLOCKSIZE:
1182             raise HeaderError("truncated header")
1183         if buf.count(NUL) == BLOCKSIZE:
1184             raise HeaderError("empty header")
1185
1186         chksum = nti(buf[148:156])
1187         if chksum not in calc_chksums(buf):
1188             raise HeaderError("bad checksum")
1189
1190         obj = cls()
1191         obj.name = nts(buf[0:100], encoding, errors)
1192         obj.mode = nti(buf[100:108])
1193         obj.uid = nti(buf[108:116])
1194         obj.gid = nti(buf[116:124])
1195         obj.size = nti(buf[124:136])
1196         obj.mtime = nti(buf[136:148])
1197         obj.chksum = chksum
1198         obj.type = buf[156:157]
1199         obj.linkname = nts(buf[157:257], encoding, errors)
1200         obj.uname = nts(buf[265:297], encoding, errors)
1201         obj.gname = nts(buf[297:329], encoding, errors)
1202         obj.devmajor = nti(buf[329:337])
1203         obj.devminor = nti(buf[337:345])
1204         prefix = nts(buf[345:500], encoding, errors)
1205
1206         # Old V7 tar format represents a directory as a regular
1207         # file with a trailing slash.
1208         if obj.type == AREGTYPE and obj.name.endswith("/"):
1209             obj.type = DIRTYPE
1210
1211         # The old GNU sparse format occupies some of the unused
1212         # space in the buffer for up to 4 sparse structures.
1213         # Save the them for later processing in _proc_sparse().
1214         if obj.type == GNUTYPE_SPARSE:
1215             pos = 386
1216             structs = []
1217             for i in range(4):
1218                 try:
1219                     offset = nti(buf[pos:pos + 12])
1220                     numbytes = nti(buf[pos + 12:pos + 24])
1221                 except ValueError:
1222                     break
1223                 structs.append((offset, numbytes))
1224                 pos += 24
1225             isextended = bool(buf[482])
1226             origsize = nti(buf[483:495])
1227             obj._sparse_structs = (structs, isextended, origsize)
1228
1229         # Remove redundant slashes from directories.
1230         if obj.isdir():
1231             obj.name = obj.name.rstrip("/")
1232
1233         # Reconstruct a ustar longname.
1234         if prefix and obj.type not in GNU_TYPES:
1235             obj.name = prefix + "/" + obj.name
1236         return obj
1237
1238     @classmethod
1239     def fromtarfile(cls, tarfile):
1240         """Return the next TarInfo object from TarFile object
1241            tarfile.
1242         """
1243         buf = tarfile.fileobj.read(BLOCKSIZE)
1244         if not buf:
1245             return
1246         obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1247         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1248         return obj._proc_member(tarfile)
1249
1250     #--------------------------------------------------------------------------
1251     # The following are methods that are called depending on the type of a
1252     # member. The entry point is _proc_member() which can be overridden in a
1253     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1254     # implement the following
1255     # operations:
1256     # 1. Set self.offset_data to the position where the data blocks begin,
1257     #    if there is data that follows.
1258     # 2. Set tarfile.offset to the position where the next member's header will
1259     #    begin.
1260     # 3. Return self or another valid TarInfo object.
1261     def _proc_member(self, tarfile):
1262         """Choose the right processing method depending on
1263            the type and call it.
1264         """
1265         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1266             return self._proc_gnulong(tarfile)
1267         elif self.type == GNUTYPE_SPARSE:
1268             return self._proc_sparse(tarfile)
1269         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1270             return self._proc_pax(tarfile)
1271         else:
1272             return self._proc_builtin(tarfile)
1273
1274     def _proc_builtin(self, tarfile):
1275         """Process a builtin type or an unknown type which
1276            will be treated as a regular file.
1277         """
1278         self.offset_data = tarfile.fileobj.tell()
1279         offset = self.offset_data
1280         if self.isreg() or self.type not in SUPPORTED_TYPES:
1281             # Skip the following data blocks.
1282             offset += self._block(self.size)
1283         tarfile.offset = offset
1284
1285         # Patch the TarInfo object with saved global
1286         # header information.
1287         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1288
1289         return self
1290
1291     def _proc_gnulong(self, tarfile):
1292         """Process the blocks that hold a GNU longname
1293            or longlink member.
1294         """
1295         buf = tarfile.fileobj.read(self._block(self.size))
1296
1297         # Fetch the next header and process it.
1298         next = self.fromtarfile(tarfile)
1299         if next is None:
1300             raise HeaderError("missing subsequent header")
1301
1302         # Patch the TarInfo object from the next header with
1303         # the longname information.
1304         next.offset = self.offset
1305         if self.type == GNUTYPE_LONGNAME:
1306             next.name = nts(buf, tarfile.encoding, tarfile.errors)
1307         elif self.type == GNUTYPE_LONGLINK:
1308             next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1309
1310         return next
1311
1312     def _proc_sparse(self, tarfile):
1313         """Process a GNU sparse header plus extra headers.
1314         """
1315         # We already collected some sparse structures in frombuf().
1316         structs, isextended, origsize = self._sparse_structs
1317         del self._sparse_structs
1318
1319         # Collect sparse structures from extended header blocks.
1320         while isextended:
1321             buf = tarfile.fileobj.read(BLOCKSIZE)
1322             pos = 0
1323             for i in range(21):
1324                 try:
1325                     offset = nti(buf[pos:pos + 12])
1326                     numbytes = nti(buf[pos + 12:pos + 24])
1327                 except ValueError:
1328                     break
1329                 structs.append((offset, numbytes))
1330                 pos += 24
1331             isextended = bool(buf[504])
1332
1333         # Transform the sparse structures to something we can use
1334         # in ExFileObject.
1335         self.sparse = _ringbuffer()
1336         lastpos = 0
1337         realpos = 0
1338         for offset, numbytes in structs:
1339             if offset > lastpos:
1340                 self.sparse.append(_hole(lastpos, offset - lastpos))
1341             self.sparse.append(_data(offset, numbytes, realpos))
1342             realpos += numbytes
1343             lastpos = offset + numbytes
1344         if lastpos < origsize:
1345             self.sparse.append(_hole(lastpos, origsize - lastpos))
1346
1347         self.offset_data = tarfile.fileobj.tell()
1348         tarfile.offset = self.offset_data + self._block(self.size)
1349         self.size = origsize
1350
1351         return self
1352
1353     def _proc_pax(self, tarfile):
1354         """Process an extended or global header as described in
1355            POSIX.1-2001.
1356         """
1357         # Read the header information.
1358         buf = tarfile.fileobj.read(self._block(self.size))
1359
1360         # A pax header stores supplemental information for either
1361         # the following file (extended) or all following files
1362         # (global).
1363         if self.type == XGLTYPE:
1364             pax_headers = tarfile.pax_headers
1365         else:
1366             pax_headers = tarfile.pax_headers.copy()
1367
1368         # Parse pax header information. A record looks like that:
1369         # "%d %s=%s\n" % (length, keyword, value). length is the size
1370         # of the complete record including the length field itself and
1371         # the newline. keyword and value are both UTF-8 encoded strings.
1372         regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1373         pos = 0
1374         while True:
1375             match = regex.match(buf, pos)
1376             if not match:
1377                 break
1378
1379             length, keyword = match.groups()
1380             length = int(length)
1381             value = buf[match.end(2) + 1:match.start(1) + length - 1]
1382
1383             keyword = keyword.decode("utf8")
1384             value = value.decode("utf8")
1385
1386             pax_headers[keyword] = value
1387             pos += length
1388
1389         # Fetch the next header.
1390         next = self.fromtarfile(tarfile)
1391
1392         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1393             if next is None:
1394                 raise HeaderError("missing subsequent header")
1395
1396             # Patch the TarInfo object with the extended header info.
1397             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1398             next.offset = self.offset
1399
1400             if "size" in pax_headers:
1401                 # If the extended header replaces the size field,
1402                 # we need to recalculate the offset where the next
1403                 # header starts.
1404                 offset = next.offset_data
1405                 if next.isreg() or next.type not in SUPPORTED_TYPES:
1406                     offset += next._block(next.size)
1407                 tarfile.offset = offset
1408
1409         return next
1410
1411     def _apply_pax_info(self, pax_headers, encoding, errors):
1412         """Replace fields with supplemental information from a previous
1413            pax extended or global header.
1414         """
1415         for keyword, value in pax_headers.items():
1416             if keyword not in PAX_FIELDS:
1417                 continue
1418
1419             if keyword == "path":
1420                 value = value.rstrip("/")
1421
1422             if keyword in PAX_NUMBER_FIELDS:
1423                 try:
1424                     value = PAX_NUMBER_FIELDS[keyword](value)
1425                 except ValueError:
1426                     value = 0
1427
1428             setattr(self, keyword, value)
1429
1430         self.pax_headers = pax_headers.copy()
1431
1432     def _block(self, count):
1433         """Round up a byte count by BLOCKSIZE and return it,
1434            e.g. _block(834) => 1024.
1435         """
1436         blocks, remainder = divmod(count, BLOCKSIZE)
1437         if remainder:
1438             blocks += 1
1439         return blocks * BLOCKSIZE
1440
1441     def isreg(self):
1442         return self.type in REGULAR_TYPES
1443     def isfile(self):
1444         return self.isreg()
1445     def isdir(self):
1446         return self.type == DIRTYPE
1447     def issym(self):
1448         return self.type == SYMTYPE
1449     def islnk(self):
1450         return self.type == LNKTYPE
1451     def ischr(self):
1452         return self.type == CHRTYPE
1453     def isblk(self):
1454         return self.type == BLKTYPE
1455     def isfifo(self):
1456         return self.type == FIFOTYPE
1457     def issparse(self):
1458         return self.type == GNUTYPE_SPARSE
1459     def isdev(self):
1460         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1461 # class TarInfo
1462
1463 class TarFile(object):
1464     """The TarFile Class provides an interface to tar archives.
1465     """
1466
1467     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1468
1469     dereference = False         # If true, add content of linked file to the
1470                                 # tar file, else the link.
1471
1472     ignore_zeros = False        # If true, skips empty or invalid blocks and
1473                                 # continues processing.
1474
1475     errorlevel = 0              # If 0, fatal errors only appear in debug
1476                                 # messages (if debug >= 0). If > 0, errors
1477                                 # are passed to the caller as exceptions.
1478
1479     format = DEFAULT_FORMAT     # The format to use when creating an archive.
1480
1481     encoding = ENCODING         # Encoding for 8-bit character strings.
1482
1483     errors = None               # Error handler for unicode conversion.
1484
1485     tarinfo = TarInfo           # The default TarInfo class to use.
1486
1487     fileobject = ExFileObject   # The default ExFileObject class to use.
1488
1489     def __init__(self, name=None, mode="r", fileobj=None, format=None,
1490             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1491             errors=None, pax_headers=None, debug=None, errorlevel=None):
1492         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1493            read from an existing archive, 'a' to append data to an existing
1494            file or 'w' to create a new file overwriting an existing one. `mode'
1495            defaults to 'r'.
1496            If `fileobj' is given, it is used for reading or writing data. If it
1497            can be determined, `mode' is overridden by `fileobj's mode.
1498            `fileobj' is not closed, when TarFile is closed.
1499         """
1500         if len(mode) > 1 or mode not in "raw":
1501             raise ValueError("mode must be 'r', 'a' or 'w'")
1502         self.mode = mode
1503         self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1504
1505         if not fileobj:
1506             if self.mode == "a" and not os.path.exists(name):
1507                 # Create nonexistent files in append mode.
1508                 self.mode = "w"
1509                 self._mode = "wb"
1510             fileobj = bltn_open(name, self._mode)
1511             self._extfileobj = False
1512         else:
1513             if name is None and hasattr(fileobj, "name"):
1514                 name = fileobj.name
1515             if hasattr(fileobj, "mode"):
1516                 self._mode = fileobj.mode
1517             self._extfileobj = True
1518         self.name = os.path.abspath(name) if name else None
1519         self.fileobj = fileobj
1520
1521         # Init attributes.
1522         if format is not None:
1523             self.format = format
1524         if tarinfo is not None:
1525             self.tarinfo = tarinfo
1526         if dereference is not None:
1527             self.dereference = dereference
1528         if ignore_zeros is not None:
1529             self.ignore_zeros = ignore_zeros
1530         if encoding is not None:
1531             self.encoding = encoding
1532
1533         if errors is not None:
1534             self.errors = errors
1535         elif mode == "r":
1536             self.errors = "replace"
1537         else:
1538             self.errors = "strict"
1539
1540         if pax_headers is not None and self.format == PAX_FORMAT:
1541             self.pax_headers = pax_headers
1542         else:
1543             self.pax_headers = {}
1544
1545         if debug is not None:
1546             self.debug = debug
1547         if errorlevel is not None:
1548             self.errorlevel = errorlevel
1549
1550         # Init datastructures.
1551         self.closed = False
1552         self.members = []       # list of members as TarInfo objects
1553         self._loaded = False    # flag if all members have been read
1554         self.offset = self.fileobj.tell()
1555                                 # current position in the archive file
1556         self.inodes = {}        # dictionary caching the inodes of
1557                                 # archive members already added
1558
1559         if self.mode == "r":
1560             self.firstmember = None
1561             self.firstmember = self.next()
1562
1563         if self.mode == "a":
1564             # Move to the end of the archive,
1565             # before the first empty block.
1566             self.firstmember = None
1567             while True:
1568                 if self.next() is None:
1569                     if self.offset > 0:
1570                         self.fileobj.seek(self.fileobj.tell() - BLOCKSIZE)
1571                     break
1572
1573         if self.mode in "aw":
1574             self._loaded = True
1575
1576             if self.pax_headers:
1577                 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1578                 self.fileobj.write(buf)
1579                 self.offset += len(buf)
1580
1581     def _getposix(self):
1582         return self.format == USTAR_FORMAT
1583     def _setposix(self, value):
1584         import warnings
1585         warnings.warn("use the format attribute instead", DeprecationWarning)
1586         if value:
1587             self.format = USTAR_FORMAT
1588         else:
1589             self.format = GNU_FORMAT
1590     posix = property(_getposix, _setposix)
1591
1592     #--------------------------------------------------------------------------
1593     # Below are the classmethods which act as alternate constructors to the
1594     # TarFile class. The open() method is the only one that is needed for
1595     # public use; it is the "super"-constructor and is able to select an
1596     # adequate "sub"-constructor for a particular compression using the mapping
1597     # from OPEN_METH.
1598     #
1599     # This concept allows one to subclass TarFile without losing the comfort of
1600     # the super-constructor. A sub-constructor is registered and made available
1601     # by adding it to the mapping in OPEN_METH.
1602
1603     @classmethod
1604     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1605         """Open a tar archive for reading, writing or appending. Return
1606            an appropriate TarFile class.
1607
1608            mode:
1609            'r' or 'r:*' open for reading with transparent compression
1610            'r:'         open for reading exclusively uncompressed
1611            'r:gz'       open for reading with gzip compression
1612            'r:bz2'      open for reading with bzip2 compression
1613            'a' or 'a:'  open for appending, creating the file if necessary
1614            'w' or 'w:'  open for writing without compression
1615            'w:gz'       open for writing with gzip compression
1616            'w:bz2'      open for writing with bzip2 compression
1617
1618            'r|*'        open a stream of tar blocks with transparent compression
1619            'r|'         open an uncompressed stream of tar blocks for reading
1620            'r|gz'       open a gzip compressed stream of tar blocks
1621            'r|bz2'      open a bzip2 compressed stream of tar blocks
1622            'w|'         open an uncompressed stream for writing
1623            'w|gz'       open a gzip compressed stream for writing
1624            'w|bz2'      open a bzip2 compressed stream for writing
1625         """
1626
1627         if not name and not fileobj:
1628             raise ValueError("nothing to open")
1629
1630         if mode in ("r", "r:*"):
1631             # Find out which *open() is appropriate for opening the file.
1632             for comptype in cls.OPEN_METH:
1633                 func = getattr(cls, cls.OPEN_METH[comptype])
1634                 if fileobj is not None:
1635                     saved_pos = fileobj.tell()
1636                 try:
1637                     return func(name, "r", fileobj, **kwargs)
1638                 except (ReadError, CompressionError) as e:
1639                     if fileobj is not None:
1640                         fileobj.seek(saved_pos)
1641                     continue
1642             raise ReadError("file could not be opened successfully")
1643
1644         elif ":" in mode:
1645             filemode, comptype = mode.split(":", 1)
1646             filemode = filemode or "r"
1647             comptype = comptype or "tar"
1648
1649             # Select the *open() function according to
1650             # given compression.
1651             if comptype in cls.OPEN_METH:
1652                 func = getattr(cls, cls.OPEN_METH[comptype])
1653             else:
1654                 raise CompressionError("unknown compression type %r" % comptype)
1655             return func(name, filemode, fileobj, **kwargs)
1656
1657         elif "|" in mode:
1658             filemode, comptype = mode.split("|", 1)
1659             filemode = filemode or "r"
1660             comptype = comptype or "tar"
1661
1662             if filemode not in "rw":
1663                 raise ValueError("mode must be 'r' or 'w'")
1664
1665             t = cls(name, filemode,
1666                     _Stream(name, filemode, comptype, fileobj, bufsize),
1667                     **kwargs)
1668             t._extfileobj = False
1669             return t
1670
1671         elif mode in "aw":
1672             return cls.taropen(name, mode, fileobj, **kwargs)
1673
1674         raise ValueError("undiscernible mode")
1675
1676     @classmethod
1677     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1678         """Open uncompressed tar archive name for reading or writing.
1679         """
1680         if len(mode) > 1 or mode not in "raw":
1681             raise ValueError("mode must be 'r', 'a' or 'w'")
1682         return cls(name, mode, fileobj, **kwargs)
1683
1684     @classmethod
1685     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1686         """Open gzip compressed tar archive name for reading or writing.
1687            Appending is not allowed.
1688         """
1689         if len(mode) > 1 or mode not in "rw":
1690             raise ValueError("mode must be 'r' or 'w'")
1691
1692         try:
1693             import gzip
1694             gzip.GzipFile
1695         except (ImportError, AttributeError):
1696             raise CompressionError("gzip module is not available")
1697
1698         if fileobj is None:
1699             fileobj = bltn_open(name, mode + "b")
1700
1701         try:
1702             t = cls.taropen(name, mode,
1703                 gzip.GzipFile(name, mode, compresslevel, fileobj),
1704                 **kwargs)
1705         except IOError:
1706             raise ReadError("not a gzip file")
1707         t._extfileobj = False
1708         return t
1709
1710     @classmethod
1711     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1712         """Open bzip2 compressed tar archive name for reading or writing.
1713            Appending is not allowed.
1714         """
1715         if len(mode) > 1 or mode not in "rw":
1716             raise ValueError("mode must be 'r' or 'w'.")
1717
1718         try:
1719             import bz2
1720         except ImportError:
1721             raise CompressionError("bz2 module is not available")
1722
1723         if fileobj is not None:
1724             fileobj = _BZ2Proxy(fileobj, mode)
1725         else:
1726             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1727
1728         try:
1729             t = cls.taropen(name, mode, fileobj, **kwargs)
1730         except IOError:
1731             raise ReadError("not a bzip2 file")
1732         t._extfileobj = False
1733         return t
1734
1735     # All *open() methods are registered here.
1736     OPEN_METH = {
1737         "tar": "taropen",   # uncompressed tar
1738         "gz":  "gzopen",    # gzip compressed tar
1739         "bz2": "bz2open"    # bzip2 compressed tar
1740     }
1741
1742     #--------------------------------------------------------------------------
1743     # The public methods which TarFile provides:
1744
1745     def close(self):
1746         """Close the TarFile. In write-mode, two finishing zero blocks are
1747            appended to the archive.
1748         """
1749         if self.closed:
1750             return
1751
1752         if self.mode in "aw":
1753             self.fileobj.write(NUL * (BLOCKSIZE * 2))
1754             self.offset += (BLOCKSIZE * 2)
1755             # fill up the end with zero-blocks
1756             # (like option -b20 for tar does)
1757             blocks, remainder = divmod(self.offset, RECORDSIZE)
1758             if remainder > 0:
1759                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1760
1761         if not self._extfileobj:
1762             self.fileobj.close()
1763         self.closed = True
1764
1765     def getmember(self, name):
1766         """Return a TarInfo object for member `name'. If `name' can not be
1767            found in the archive, KeyError is raised. If a member occurs more
1768            than once in the archive, its last occurence is assumed to be the
1769            most up-to-date version.
1770         """
1771         tarinfo = self._getmember(name)
1772         if tarinfo is None:
1773             raise KeyError("filename %r not found" % name)
1774         return tarinfo
1775
1776     def getmembers(self):
1777         """Return the members of the archive as a list of TarInfo objects. The
1778            list has the same order as the members in the archive.
1779         """
1780         self._check()
1781         if not self._loaded:    # if we want to obtain a list of
1782             self._load()        # all members, we first have to
1783                                 # scan the whole archive.
1784         return self.members
1785
1786     def getnames(self):
1787         """Return the members of the archive as a list of their names. It has
1788            the same order as the list returned by getmembers().
1789         """
1790         return [tarinfo.name for tarinfo in self.getmembers()]
1791
1792     def gettarinfo(self, name=None, arcname=None, fileobj=None):
1793         """Create a TarInfo object for either the file `name' or the file
1794            object `fileobj' (using os.fstat on its file descriptor). You can
1795            modify some of the TarInfo's attributes before you add it using
1796            addfile(). If given, `arcname' specifies an alternative name for the
1797            file in the archive.
1798         """
1799         self._check("aw")
1800
1801         # When fileobj is given, replace name by
1802         # fileobj's real name.
1803         if fileobj is not None:
1804             name = fileobj.name
1805
1806         # Building the name of the member in the archive.
1807         # Backward slashes are converted to forward slashes,
1808         # Absolute paths are turned to relative paths.
1809         if arcname is None:
1810             arcname = name
1811         arcname = normpath(arcname)
1812         drv, arcname = os.path.splitdrive(arcname)
1813         while arcname[0:1] == "/":
1814             arcname = arcname[1:]
1815
1816         # Now, fill the TarInfo object with
1817         # information specific for the file.
1818         tarinfo = self.tarinfo()
1819         tarinfo.tarfile = self
1820
1821         # Use os.stat or os.lstat, depending on platform
1822         # and if symlinks shall be resolved.
1823         if fileobj is None:
1824             if hasattr(os, "lstat") and not self.dereference:
1825                 statres = os.lstat(name)
1826             else:
1827                 statres = os.stat(name)
1828         else:
1829             statres = os.fstat(fileobj.fileno())
1830         linkname = ""
1831
1832         stmd = statres.st_mode
1833         if stat.S_ISREG(stmd):
1834             inode = (statres.st_ino, statres.st_dev)
1835             if not self.dereference and statres.st_nlink > 1 and \
1836                     inode in self.inodes and arcname != self.inodes[inode]:
1837                 # Is it a hardlink to an already
1838                 # archived file?
1839                 type = LNKTYPE
1840                 linkname = self.inodes[inode]
1841             else:
1842                 # The inode is added only if its valid.
1843                 # For win32 it is always 0.
1844                 type = REGTYPE
1845                 if inode[0]:
1846                     self.inodes[inode] = arcname
1847         elif stat.S_ISDIR(stmd):
1848             type = DIRTYPE
1849         elif stat.S_ISFIFO(stmd):
1850             type = FIFOTYPE
1851         elif stat.S_ISLNK(stmd):
1852             type = SYMTYPE
1853             linkname = os.readlink(name)
1854         elif stat.S_ISCHR(stmd):
1855             type = CHRTYPE
1856         elif stat.S_ISBLK(stmd):
1857             type = BLKTYPE
1858         else:
1859             return None
1860
1861         # Fill the TarInfo object with all
1862         # information we can get.
1863         tarinfo.name = arcname
1864         tarinfo.mode = stmd
1865         tarinfo.uid = statres.st_uid
1866         tarinfo.gid = statres.st_gid
1867         if stat.S_ISREG(stmd):
1868             tarinfo.size = statres.st_size
1869         else:
1870             tarinfo.size = 0
1871         tarinfo.mtime = statres.st_mtime
1872         tarinfo.type = type
1873         tarinfo.linkname = linkname
1874         if pwd:
1875             try:
1876                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1877             except KeyError:
1878                 pass
1879         if grp:
1880             try:
1881                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1882             except KeyError:
1883                 pass
1884
1885         if type in (CHRTYPE, BLKTYPE):
1886             if hasattr(os, "major") and hasattr(os, "minor"):
1887                 tarinfo.devmajor = os.major(statres.st_rdev)
1888                 tarinfo.devminor = os.minor(statres.st_rdev)
1889         return tarinfo
1890
1891     def list(self, verbose=True):
1892         """Print a table of contents to sys.stdout. If `verbose' is False, only
1893            the names of the members are printed. If it is True, an `ls -l'-like
1894            output is produced.
1895         """
1896         self._check()
1897
1898         for tarinfo in self:
1899             if verbose:
1900                 print(filemode(tarinfo.mode), end=' ')
1901                 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1902                                  tarinfo.gname or tarinfo.gid), end=' ')
1903                 if tarinfo.ischr() or tarinfo.isblk():
1904                     print("%10s" % ("%d,%d" \
1905                                     % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
1906                 else:
1907                     print("%10d" % tarinfo.size, end=' ')
1908                 print("%d-%02d-%02d %02d:%02d:%02d" \
1909                       % time.localtime(tarinfo.mtime)[:6], end=' ')
1910
1911             print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
1912
1913             if verbose:
1914                 if tarinfo.issym():
1915                     print("->", tarinfo.linkname, end=' ')
1916                 if tarinfo.islnk():
1917                     print("link to", tarinfo.linkname, end=' ')
1918             print()
1919
1920     def add(self, name, arcname=None, recursive=True, exclude=None):
1921         """Add the file `name' to the archive. `name' may be any type of file
1922            (directory, fifo, symbolic link, etc.). If given, `arcname'
1923            specifies an alternative name for the file in the archive.
1924            Directories are added recursively by default. This can be avoided by
1925            setting `recursive' to False. `exclude' is a function that should
1926            return True for each filename to be excluded.
1927         """
1928         self._check("aw")
1929
1930         if arcname is None:
1931             arcname = name
1932
1933         # Exclude pathnames.
1934         if exclude is not None and exclude(name):
1935             self._dbg(2, "tarfile: Excluded %r" % name)
1936             return
1937
1938         # Skip if somebody tries to archive the archive...
1939         if self.name is not None and os.path.abspath(name) == self.name:
1940             self._dbg(2, "tarfile: Skipped %r" % name)
1941             return
1942
1943         # Special case: The user wants to add the current
1944         # working directory.
1945         if name == ".":
1946             if recursive:
1947                 if arcname == ".":
1948                     arcname = ""
1949                 for f in os.listdir(name):
1950                     self.add(f, os.path.join(arcname, f), recursive, exclude)
1951             return
1952
1953         self._dbg(1, name)
1954
1955         # Create a TarInfo object from the file.
1956         tarinfo = self.gettarinfo(name, arcname)
1957
1958         if tarinfo is None:
1959             self._dbg(1, "tarfile: Unsupported type %r" % name)
1960             return
1961
1962         # Append the tar header and data to the archive.
1963         if tarinfo.isreg():
1964             f = bltn_open(name, "rb")
1965             self.addfile(tarinfo, f)
1966             f.close()
1967
1968         elif tarinfo.isdir():
1969             self.addfile(tarinfo)
1970             if recursive:
1971                 for f in os.listdir(name):
1972                     self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
1973
1974         else:
1975             self.addfile(tarinfo)
1976
1977     def addfile(self, tarinfo, fileobj=None):
1978         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1979            given, tarinfo.size bytes are read from it and added to the archive.
1980            You can create TarInfo objects using gettarinfo().
1981            On Windows platforms, `fileobj' should always be opened with mode
1982            'rb' to avoid irritation about the file size.
1983         """
1984         self._check("aw")
1985
1986         tarinfo = copy.copy(tarinfo)
1987
1988         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
1989         self.fileobj.write(buf)
1990         self.offset += len(buf)
1991
1992         # If there's data to follow, append it.
1993         if fileobj is not None:
1994             copyfileobj(fileobj, self.fileobj, tarinfo.size)
1995             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1996             if remainder > 0:
1997                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1998                 blocks += 1
1999             self.offset += blocks * BLOCKSIZE
2000
2001         self.members.append(tarinfo)
2002
2003     def extractall(self, path=".", members=None):
2004         """Extract all members from the archive to the current working
2005            directory and set owner, modification time and permissions on
2006            directories afterwards. `path' specifies a different directory
2007            to extract to. `members' is optional and must be a subset of the
2008            list returned by getmembers().
2009         """
2010         directories = []
2011
2012         if members is None:
2013             members = self
2014
2015         for tarinfo in members:
2016             if tarinfo.isdir():
2017                 # Extract directories with a safe mode.
2018                 directories.append(tarinfo)
2019                 tarinfo = copy.copy(tarinfo)
2020                 tarinfo.mode = 0o700
2021             self.extract(tarinfo, path)
2022
2023         # Reverse sort directories.
2024         directories.sort(key=lambda a: a.name)
2025         directories.reverse()
2026
2027         # Set correct owner, mtime and filemode on directories.
2028         for tarinfo in directories:
2029             dirpath = os.path.join(path, tarinfo.name)
2030             try:
2031                 self.chown(tarinfo, dirpath)
2032                 self.utime(tarinfo, dirpath)
2033                 self.chmod(tarinfo, dirpath)
2034             except ExtractError as e:
2035                 if self.errorlevel > 1:
2036                     raise
2037                 else:
2038                     self._dbg(1, "tarfile: %s" % e)
2039
2040     def extract(self, member, path=""):
2041         """Extract a member from the archive to the current working directory,
2042            using its full name. Its file information is extracted as accurately
2043            as possible. `member' may be a filename or a TarInfo object. You can
2044            specify a different directory using `path'.
2045         """
2046         self._check("r")
2047
2048         if isinstance(member, str):
2049             tarinfo = self.getmember(member)
2050         else:
2051             tarinfo = member
2052
2053         # Prepare the link target for makelink().
2054         if tarinfo.islnk():
2055             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2056
2057         try:
2058             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2059         except EnvironmentError as e:
2060             if self.errorlevel > 0:
2061                 raise
2062             else:
2063                 if e.filename is None:
2064                     self._dbg(1, "tarfile: %s" % e.strerror)
2065                 else:
2066                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2067         except ExtractError as e:
2068             if self.errorlevel > 1:
2069                 raise
2070             else:
2071                 self._dbg(1, "tarfile: %s" % e)
2072
2073     def extractfile(self, member):
2074         """Extract a member from the archive as a file object. `member' may be
2075            a filename or a TarInfo object. If `member' is a regular file, a
2076            file-like object is returned. If `member' is a link, a file-like
2077            object is constructed from the link's target. If `member' is none of
2078            the above, None is returned.
2079            The file-like object is read-only and provides the following
2080            methods: read(), readline(), readlines(), seek() and tell()
2081         """
2082         self._check("r")
2083
2084         if isinstance(member, str):
2085             tarinfo = self.getmember(member)
2086         else:
2087             tarinfo = member
2088
2089         if tarinfo.isreg():
2090             return self.fileobject(self, tarinfo)
2091
2092         elif tarinfo.type not in SUPPORTED_TYPES:
2093             # If a member's type is unknown, it is treated as a
2094             # regular file.
2095             return self.fileobject(self, tarinfo)
2096
2097         elif tarinfo.islnk() or tarinfo.issym():
2098             if isinstance(self.fileobj, _Stream):
2099                 # A small but ugly workaround for the case that someone tries
2100                 # to extract a (sym)link as a file-object from a non-seekable
2101                 # stream of tar blocks.
2102                 raise StreamError("cannot extract (sym)link as file object")
2103             else:
2104                 # A (sym)link's file object is its target's file object.
2105                 return self.extractfile(self._getmember(tarinfo.linkname,
2106                                                         tarinfo))
2107         else:
2108             # If there's no data associated with the member (directory, chrdev,
2109             # blkdev, etc.), return None instead of a file object.
2110             return None
2111
2112     def _extract_member(self, tarinfo, targetpath):
2113         """Extract the TarInfo object tarinfo to a physical
2114            file called targetpath.
2115         """
2116         # Fetch the TarInfo object for the given name
2117         # and build the destination pathname, replacing
2118         # forward slashes to platform specific separators.
2119         if targetpath[-1:] == "/":
2120             targetpath = targetpath[:-1]
2121         targetpath = os.path.normpath(targetpath)
2122
2123         # Create all upper directories.
2124         upperdirs = os.path.dirname(targetpath)
2125         if upperdirs and not os.path.exists(upperdirs):
2126             # Create directories that are not part of the archive with
2127             # default permissions.
2128             os.makedirs(upperdirs)
2129
2130         if tarinfo.islnk() or tarinfo.issym():
2131             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2132         else:
2133             self._dbg(1, tarinfo.name)
2134
2135         if tarinfo.isreg():
2136             self.makefile(tarinfo, targetpath)
2137         elif tarinfo.isdir():
2138             self.makedir(tarinfo, targetpath)
2139         elif tarinfo.isfifo():
2140             self.makefifo(tarinfo, targetpath)
2141         elif tarinfo.ischr() or tarinfo.isblk():
2142             self.makedev(tarinfo, targetpath)
2143         elif tarinfo.islnk() or tarinfo.issym():
2144             self.makelink(tarinfo, targetpath)
2145         elif tarinfo.type not in SUPPORTED_TYPES:
2146             self.makeunknown(tarinfo, targetpath)
2147         else:
2148             self.makefile(tarinfo, targetpath)
2149
2150         self.chown(tarinfo, targetpath)
2151         if not tarinfo.issym():
2152             self.chmod(tarinfo, targetpath)
2153             self.utime(tarinfo, targetpath)
2154
2155     #--------------------------------------------------------------------------
2156     # Below are the different file methods. They are called via
2157     # _extract_member() when extract() is called. They can be replaced in a
2158     # subclass to implement other functionality.
2159
2160     def makedir(self, tarinfo, targetpath):
2161         """Make a directory called targetpath.
2162         """
2163         try:
2164             # Use a safe mode for the directory, the real mode is set
2165             # later in _extract_member().
2166             os.mkdir(targetpath, 0o700)
2167         except EnvironmentError as e:
2168             if e.errno != errno.EEXIST:
2169                 raise
2170
2171     def makefile(self, tarinfo, targetpath):
2172         """Make a file called targetpath.
2173         """
2174         source = self.extractfile(tarinfo)
2175         target = bltn_open(targetpath, "wb")
2176         copyfileobj(source, target)
2177         source.close()
2178         target.close()
2179
2180     def makeunknown(self, tarinfo, targetpath):
2181         """Make a file from a TarInfo object with an unknown type
2182            at targetpath.
2183         """
2184         self.makefile(tarinfo, targetpath)
2185         self._dbg(1, "tarfile: Unknown file type %r, " \
2186                      "extracted as regular file." % tarinfo.type)
2187
2188     def makefifo(self, tarinfo, targetpath):
2189         """Make a fifo called targetpath.
2190         """
2191         if hasattr(os, "mkfifo"):
2192             os.mkfifo(targetpath)
2193         else:
2194             raise ExtractError("fifo not supported by system")
2195
2196     def makedev(self, tarinfo, targetpath):
2197         """Make a character or block device called targetpath.
2198         """
2199         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2200             raise ExtractError("special devices not supported by system")
2201
2202         mode = tarinfo.mode
2203         if tarinfo.isblk():
2204             mode |= stat.S_IFBLK
2205         else:
2206             mode |= stat.S_IFCHR
2207
2208         os.mknod(targetpath, mode,
2209                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
2210
2211     def makelink(self, tarinfo, targetpath):
2212         """Make a (symbolic) link called targetpath. If it cannot be created
2213           (platform limitation), we try to make a copy of the referenced file
2214           instead of a link.
2215         """
2216         linkpath = tarinfo.linkname
2217         try:
2218             if tarinfo.issym():
2219                 os.symlink(linkpath, targetpath)
2220             else:
2221                 # See extract().
2222                 os.link(tarinfo._link_target, targetpath)
2223         except AttributeError:
2224             if tarinfo.issym():
2225                 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2226                                         linkpath)
2227                 linkpath = normpath(linkpath)
2228
2229             try:
2230                 self._extract_member(self.getmember(linkpath), targetpath)
2231             except (EnvironmentError, KeyError) as e:
2232                 linkpath = os.path.normpath(linkpath)
2233                 try:
2234                     shutil.copy2(linkpath, targetpath)
2235                 except EnvironmentError as e:
2236                     raise IOError("link could not be created")
2237
2238     def chown(self, tarinfo, targetpath):
2239         """Set owner of targetpath according to tarinfo.
2240         """
2241         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2242             # We have to be root to do so.
2243             try:
2244                 g = grp.getgrnam(tarinfo.gname)[2]
2245             except KeyError:
2246                 try:
2247                     g = grp.getgrgid(tarinfo.gid)[2]
2248                 except KeyError:
2249                     g = os.getgid()
2250             try:
2251                 u = pwd.getpwnam(tarinfo.uname)[2]
2252             except KeyError:
2253                 try:
2254                     u = pwd.getpwuid(tarinfo.uid)[2]
2255                 except KeyError:
2256                     u = os.getuid()
2257             try:
2258                 if tarinfo.issym() and hasattr(os, "lchown"):
2259                     os.lchown(targetpath, u, g)
2260                 else:
2261                     if sys.platform != "os2emx":
2262                         os.chown(targetpath, u, g)
2263             except EnvironmentError as e:
2264                 raise ExtractError("could not change owner")
2265
2266     def chmod(self, tarinfo, targetpath):
2267         """Set file permissions of targetpath according to tarinfo.
2268         """
2269         if hasattr(os, 'chmod'):
2270             try:
2271                 os.chmod(targetpath, tarinfo.mode)
2272             except EnvironmentError as e:
2273                 raise ExtractError("could not change mode")
2274
2275     def utime(self, tarinfo, targetpath):
2276         """Set modification time of targetpath according to tarinfo.
2277         """
2278         if not hasattr(os, 'utime'):
2279             return
2280         if sys.platform == "win32" and tarinfo.isdir():
2281             # According to msdn.microsoft.com, it is an error (EACCES)
2282             # to use utime() on directories.
2283             return
2284         try:
2285             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2286         except EnvironmentError as e:
2287             raise ExtractError("could not change modification time")
2288
2289     #--------------------------------------------------------------------------
2290     def next(self):
2291         """Return the next member of the archive as a TarInfo object, when
2292            TarFile is opened for reading. Return None if there is no more
2293            available.
2294         """
2295         self._check("ra")
2296         if self.firstmember is not None:
2297             m = self.firstmember
2298             self.firstmember = None
2299             return m
2300
2301         # Read the next block.
2302         self.fileobj.seek(self.offset)
2303         while True:
2304             try:
2305                 tarinfo = self.tarinfo.fromtarfile(self)
2306                 if tarinfo is None:
2307                     return
2308                 self.members.append(tarinfo)
2309
2310             except HeaderError as e:
2311                 if self.ignore_zeros:
2312                     self._dbg(2, "0x%X: %s" % (self.offset, e))
2313                     self.offset += BLOCKSIZE
2314                     continue
2315                 else:
2316                     if self.offset == 0:
2317                         raise ReadError(str(e))
2318                     return None
2319             break
2320
2321         return tarinfo
2322
2323     #--------------------------------------------------------------------------
2324     # Little helper methods:
2325
2326     def _getmember(self, name, tarinfo=None):
2327         """Find an archive member by name from bottom to top.
2328            If tarinfo is given, it is used as the starting point.
2329         """
2330         # Ensure that all members have been loaded.
2331         members = self.getmembers()
2332
2333         if tarinfo is None:
2334             end = len(members)
2335         else:
2336             end = members.index(tarinfo)
2337
2338         for i in range(end - 1, -1, -1):
2339             if name == members[i].name:
2340                 return members[i]
2341
2342     def _load(self):
2343         """Read through the entire archive file and look for readable
2344            members.
2345         """
2346         while True:
2347             tarinfo = self.next()
2348             if tarinfo is None:
2349                 break
2350         self._loaded = True
2351
2352     def _check(self, mode=None):
2353         """Check if TarFile is still open, and if the operation's mode
2354            corresponds to TarFile's mode.
2355         """
2356         if self.closed:
2357             raise IOError("%s is closed" % self.__class__.__name__)
2358         if mode is not None and self.mode not in mode:
2359             raise IOError("bad operation for mode %r" % self.mode)
2360
2361     def __iter__(self):
2362         """Provide an iterator object.
2363         """
2364         if self._loaded:
2365             return iter(self.members)
2366         else:
2367             return TarIter(self)
2368
2369     def _dbg(self, level, msg):
2370         """Write debugging output to sys.stderr.
2371         """
2372         if level <= self.debug:
2373             print(msg, file=sys.stderr)
2374 # class TarFile
2375
2376 class TarIter:
2377     """Iterator Class.
2378
2379        for tarinfo in TarFile(...):
2380            suite...
2381     """
2382
2383     def __init__(self, tarfile):
2384         """Construct a TarIter object.
2385         """
2386         self.tarfile = tarfile
2387         self.index = 0
2388     def __iter__(self):
2389         """Return iterator object.
2390         """
2391         return self
2392     def __next__(self):
2393         """Return the next item using TarFile's next() method.
2394            When all members have been read, set TarFile as _loaded.
2395         """
2396         # Fix for SF #1100429: Under rare circumstances it can
2397         # happen that getmembers() is called during iteration,
2398         # which will cause TarIter to stop prematurely.
2399         if not self.tarfile._loaded:
2400             tarinfo = self.tarfile.next()
2401             if not tarinfo:
2402                 self.tarfile._loaded = True
2403                 raise StopIteration
2404         else:
2405             try:
2406                 tarinfo = self.tarfile.members[self.index]
2407             except IndexError:
2408                 raise StopIteration
2409         self.index += 1
2410         return tarinfo
2411
2412 # Helper classes for sparse file support
2413 class _section:
2414     """Base class for _data and _hole.
2415     """
2416     def __init__(self, offset, size):
2417         self.offset = offset
2418         self.size = size
2419     def __contains__(self, offset):
2420         return self.offset <= offset < self.offset + self.size
2421
2422 class _data(_section):
2423     """Represent a data section in a sparse file.
2424     """
2425     def __init__(self, offset, size, realpos):
2426         _section.__init__(self, offset, size)
2427         self.realpos = realpos
2428
2429 class _hole(_section):
2430     """Represent a hole section in a sparse file.
2431     """
2432     pass
2433
2434 class _ringbuffer(list):
2435     """Ringbuffer class which increases performance
2436        over a regular list.
2437     """
2438     def __init__(self):
2439         self.idx = 0
2440     def find(self, offset):
2441         idx = self.idx
2442         while True:
2443             item = self[idx]
2444             if offset in item:
2445                 break
2446             idx += 1
2447             if idx == len(self):
2448                 idx = 0
2449             if idx == self.idx:
2450                 # End of File
2451                 return None
2452         self.idx = idx
2453         return item
2454
2455 #---------------------------------------------
2456 # zipfile compatible TarFile class
2457 #---------------------------------------------
2458 TAR_PLAIN = 0           # zipfile.ZIP_STORED
2459 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
2460 class TarFileCompat:
2461     """TarFile class compatible with standard module zipfile's
2462        ZipFile class.
2463     """
2464     def __init__(self, file, mode="r", compression=TAR_PLAIN):
2465         if compression == TAR_PLAIN:
2466             self.tarfile = TarFile.taropen(file, mode)
2467         elif compression == TAR_GZIPPED:
2468             self.tarfile = TarFile.gzopen(file, mode)
2469         else:
2470             raise ValueError("unknown compression constant")
2471         if mode[0:1] == "r":
2472             members = self.tarfile.getmembers()
2473             for m in members:
2474                 m.filename = m.name
2475                 m.file_size = m.size
2476                 m.date_time = time.gmtime(m.mtime)[:6]
2477     def namelist(self):
2478         return map(lambda m: m.name, self.infolist())
2479     def infolist(self):
2480         return filter(lambda m: m.type in REGULAR_TYPES,
2481                       self.tarfile.getmembers())
2482     def printdir(self):
2483         self.tarfile.list()
2484     def testzip(self):
2485         return
2486     def getinfo(self, name):
2487         return self.tarfile.getmember(name)
2488     def read(self, name):
2489         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2490     def write(self, filename, arcname=None, compress_type=None):
2491         self.tarfile.add(filename, arcname)
2492     def writestr(self, zinfo, bytes):
2493         from io import StringIO
2494         import calendar
2495         zinfo.name = zinfo.filename
2496         zinfo.size = zinfo.file_size
2497         zinfo.mtime = calendar.timegm(zinfo.date_time)
2498         self.tarfile.addfile(zinfo, StringIO(bytes))
2499     def close(self):
2500         self.tarfile.close()
2501 #class TarFileCompat
2502
2503 #--------------------
2504 # exported functions
2505 #--------------------
2506 def is_tarfile(name):
2507     """Return True if name points to a tar archive that we
2508        are able to handle, else return False.
2509     """
2510     try:
2511         t = open(name)
2512         t.close()
2513         return True
2514     except TarError:
2515         return False
2516
2517 bltn_open = open
2518 open = TarFile.open