1 "Read and write ZIP files."
2 # Written by James C. Ahlstrom jim@interet.com
3 # All rights transferred to CNRI pursuant to the Python contribution agreement
5 import struct
, os
, time
9 import zlib
# We may need its compression method
13 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
16 class BadZipfile(Exception):
18 error
= BadZipfile
# The exception raised by this module
20 # constants for Zip file compression methods
23 # Other ZIP compression methods not supported
25 # Here are some struct module formats for reading headers
26 structEndArchive
= "<4s4H2lH" # 9 items, end of archive, 22 bytes
27 stringEndArchive
= "PK\005\006" # magic number for end of archive record
28 structCentralDir
= "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
29 stringCentralDir
= "PK\001\002" # magic number for central directory
30 structFileHeader
= "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31 stringFileHeader
= "PK\003\004" # magic number for file header
33 # indexes of entries in the central directory structure
35 _CD_CREATE_VERSION
= 1
37 _CD_EXTRACT_VERSION
= 3
38 _CD_EXTRACT_SYSTEM
= 4 # is this meaningful?
44 _CD_COMPRESSED_SIZE
= 10
45 _CD_UNCOMPRESSED_SIZE
= 11
46 _CD_FILENAME_LENGTH
= 12
47 _CD_EXTRA_FIELD_LENGTH
= 13
48 _CD_COMMENT_LENGTH
= 14
49 _CD_DISK_NUMBER_START
= 15
50 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
51 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
52 _CD_LOCAL_HEADER_OFFSET
= 18
54 # indexes of entries in the local file header structure
56 _FH_EXTRACT_VERSION
= 1
57 _FH_EXTRACT_SYSTEM
= 2 # is this meaningful?
58 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
59 _FH_COMPRESSION_METHOD
= 4
63 _FH_COMPRESSED_SIZE
= 8
64 _FH_UNCOMPRESSED_SIZE
= 9
65 _FH_FILENAME_LENGTH
= 10
66 _FH_EXTRA_FIELD_LENGTH
= 11
68 # Used to compare file passed to ZipFile
70 _STRING_TYPES
= (types
.StringType
,)
71 if hasattr(types
, "UnicodeType"):
72 _STRING_TYPES
= _STRING_TYPES
+ (types
.UnicodeType
,)
75 def is_zipfile(filename
):
76 """Quickly see if file is a ZIP file by checking the magic number.
78 Will not accept a ZIP archive with an ending comment.
81 fpin
= open(filename
, "rb")
82 fpin
.seek(-22, 2) # Seek to end-of-file record
85 if endrec
[0:4] == "PK\005\006" and endrec
[-2:] == "\000\000":
86 return 1 # file has correct magic number
92 """Class with attributes describing each file in the ZIP archive."""
94 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
95 self
.filename
= _normpath(filename
) # Name of the file in the archive
96 self
.date_time
= date_time
# year, month, day, hour, min, sec
98 self
.compress_type
= ZIP_STORED
# Type of compression for the file
99 self
.comment
= "" # Comment for each file
100 self
.extra
= "" # ZIP extra data
101 self
.create_system
= 0 # System which created ZIP archive
102 self
.create_version
= 20 # Version which created ZIP archive
103 self
.extract_version
= 20 # Version needed to extract archive
104 self
.reserved
= 0 # Must be zero
105 self
.flag_bits
= 0 # ZIP flag bits
106 self
.volume
= 0 # Volume number of file header
107 self
.internal_attr
= 0 # Internal attributes
108 self
.external_attr
= 0 # External file attributes
109 # Other attributes are set by class ZipFile:
110 # header_offset Byte offset to the file header
111 # file_offset Byte offset to the start of the file data
112 # CRC CRC-32 of the uncompressed file
113 # compress_size Size of the compressed file
114 # file_size Size of the uncompressed file
116 def FileHeader(self
):
117 """Return the per-file header as a string."""
119 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
120 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
121 if self
.flag_bits
& 0x08:
122 # Set these to zero because we write them after the file data
123 CRC
= compress_size
= file_size
= 0
126 compress_size
= self
.compress_size
127 file_size
= self
.file_size
128 header
= struct
.pack(structFileHeader
, stringFileHeader
,
129 self
.extract_version
, self
.reserved
, self
.flag_bits
,
130 self
.compress_type
, dostime
, dosdate
, CRC
,
131 compress_size
, file_size
,
132 len(self
.filename
), len(self
.extra
))
133 return header
+ self
.filename
+ self
.extra
136 # This is used to ensure paths in generated ZIP files always use
137 # forward slashes as the directory separator, as required by the
138 # ZIP format specification.
141 return path
.replace(os
.sep
, "/")
148 """ Class with methods to open, read, write, close, list zip files.
150 z = ZipFile(file, mode="r", compression=ZIP_STORED)
152 file: Either the path to the file, or a file-like object.
153 If it is a path, the file will be opened and closed by ZipFile.
154 mode: The mode can be either read "r", write "w" or append "a".
155 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
158 fp
= None # Set here since __del__ checks it
160 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
):
161 """Open the ZIP file with mode read "r", write "w" or append "a"."""
162 if compression
== ZIP_STORED
:
164 elif compression
== ZIP_DEFLATED
:
167 "Compression requires the (missing) zlib module"
169 raise RuntimeError, "That compression method is not supported"
170 self
.debug
= 0 # Level of printing: 0 through 3
171 self
.NameToInfo
= {} # Find file info given name
172 self
.filelist
= [] # List of ZipInfo instances for archive
173 self
.compression
= compression
# Method of compression
174 self
.mode
= key
= mode
[0]
176 # Check if we were passed a file-like object
177 if type(file) in _STRING_TYPES
:
180 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
181 self
.fp
= open(file, modeDict
[mode
])
185 self
.filename
= getattr(file, 'name', None)
193 fp
.seek(-22, 2) # Seek to end-of-file record
195 if endrec
[0:4] == stringEndArchive
and \
196 endrec
[-2:] == "\000\000":
197 self
._GetContents
() # file is a zip file
198 # seek to start of directory and overwrite
199 fp
.seek(self
.start_dir
, 0)
200 else: # file is not a zip file, just append
203 if not self
._filePassed
:
206 raise RuntimeError, 'Mode must be "r", "w" or "a"'
208 def _GetContents(self
):
209 """Read the directory, making sure we close the file if the format
212 self
._RealGetContents
()
214 if not self
._filePassed
:
219 def _RealGetContents(self
):
220 """Read in the table of contents for the ZIP file."""
222 fp
.seek(-22, 2) # Start of end-of-archive record
223 filesize
= fp
.tell() + 22 # Get file size
224 endrec
= fp
.read(22) # Archive must not end with a comment!
225 if endrec
[0:4] != stringEndArchive
or endrec
[-2:] != "\000\000":
226 raise BadZipfile
, "File is not a zip file, or ends with a comment"
227 endrec
= struct
.unpack(structEndArchive
, endrec
)
230 size_cd
= endrec
[5] # bytes in central directory
231 offset_cd
= endrec
[6] # offset of central directory
232 x
= filesize
- 22 - size_cd
233 # "concat" is zero, unless zip was concatenated to another file
234 concat
= x
- offset_cd
236 print "given, inferred, offset", offset_cd
, x
, concat
237 # self.start_dir: Position of start of central directory
238 self
.start_dir
= offset_cd
+ concat
239 fp
.seek(self
.start_dir
, 0)
241 while total
< size_cd
:
242 centdir
= fp
.read(46)
244 if centdir
[0:4] != stringCentralDir
:
245 raise BadZipfile
, "Bad magic number for central directory"
246 centdir
= struct
.unpack(structCentralDir
, centdir
)
249 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
250 # Create ZipInfo instance to store file information
251 x
= ZipInfo(filename
)
252 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
253 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
254 total
= (total
+ centdir
[_CD_FILENAME_LENGTH
]
255 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
256 + centdir
[_CD_COMMENT_LENGTH
])
257 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
] + concat
258 # file_offset must be computed below...
259 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
260 x
.flag_bits
, x
.compress_type
, t
, d
,
261 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
262 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
263 # Convert date/time code to (year, month, day, hour, min, sec)
264 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
265 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
266 self
.filelist
.append(x
)
267 self
.NameToInfo
[x
.filename
] = x
270 for data
in self
.filelist
:
271 fp
.seek(data
.header_offset
, 0)
272 fheader
= fp
.read(30)
273 if fheader
[0:4] != stringFileHeader
:
274 raise BadZipfile
, "Bad magic number for file header"
275 fheader
= struct
.unpack(structFileHeader
, fheader
)
276 # file_offset is computed here, since the extra field for
277 # the central directory and for the local file header
278 # refer to different fields, and they can have different
280 data
.file_offset
= (data
.header_offset
+ 30
281 + fheader
[_FH_FILENAME_LENGTH
]
282 + fheader
[_FH_EXTRA_FIELD_LENGTH
])
283 fname
= fp
.read(fheader
[_FH_FILENAME_LENGTH
])
284 if fname
!= data
.filename
:
285 raise RuntimeError, \
286 'File name in directory "%s" and header "%s" differ.' % (
287 data
.filename
, fname
)
290 """Return a list of file names in the archive."""
292 for data
in self
.filelist
:
293 l
.append(data
.filename
)
297 """Return a list of class ZipInfo instances for files in the
302 """Print a table of contents for the zip file."""
303 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
304 for zinfo
in self
.filelist
:
305 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
306 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
309 """Read all the files and check the CRC."""
310 for zinfo
in self
.filelist
:
312 self
.read(zinfo
.filename
) # Check CRC-32
314 return zinfo
.filename
316 def getinfo(self
, name
):
317 """Return the instance of ZipInfo given 'name'."""
318 return self
.NameToInfo
[name
]
320 def read(self
, name
):
321 """Return file bytes (as a string) for name."""
322 if self
.mode
not in ("r", "a"):
323 raise RuntimeError, 'read() requires mode "r" or "a"'
325 raise RuntimeError, \
326 "Attempt to read ZIP archive that was already closed"
327 zinfo
= self
.getinfo(name
)
328 filepos
= self
.fp
.tell()
329 self
.fp
.seek(zinfo
.file_offset
, 0)
330 bytes
= self
.fp
.read(zinfo
.compress_size
)
331 self
.fp
.seek(filepos
, 0)
332 if zinfo
.compress_type
== ZIP_STORED
:
334 elif zinfo
.compress_type
== ZIP_DEFLATED
:
336 raise RuntimeError, \
337 "De-compression requires the (missing) zlib module"
338 # zlib compress/decompress code by Jeremy Hylton of CNRI
339 dc
= zlib
.decompressobj(-15)
340 bytes
= dc
.decompress(bytes
)
341 # need to feed in unused pad byte so that zlib won't choke
342 ex
= dc
.decompress('Z') + dc
.flush()
347 "Unsupported compression method %d for file %s" % \
348 (zinfo
.compress_type
, name
)
349 crc
= binascii
.crc32(bytes
)
351 raise BadZipfile
, "Bad CRC-32 for file %s" % name
354 def _writecheck(self
, zinfo
):
355 """Check for errors before writing a file to the archive."""
356 if self
.NameToInfo
.has_key(zinfo
.filename
):
357 if self
.debug
: # Warning for duplicate names
358 print "Duplicate name:", zinfo
.filename
359 if self
.mode
not in ("w", "a"):
360 raise RuntimeError, 'write() requires mode "w" or "a"'
362 raise RuntimeError, \
363 "Attempt to write ZIP archive that was already closed"
364 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
365 raise RuntimeError, \
366 "Compression requires the (missing) zlib module"
367 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
368 raise RuntimeError, \
369 "That compression method is not supported"
371 def write(self
, filename
, arcname
=None, compress_type
=None):
372 """Put the bytes from filename into the archive under the name
374 st
= os
.stat(filename
)
375 mtime
= time
.localtime(st
[8])
376 date_time
= mtime
[0:6]
377 # Create ZipInfo instance to store file information
379 zinfo
= ZipInfo(filename
, date_time
)
381 zinfo
= ZipInfo(arcname
, date_time
)
382 zinfo
.external_attr
= st
[0] << 16 # Unix attributes
383 if compress_type
is None:
384 zinfo
.compress_type
= self
.compression
386 zinfo
.compress_type
= compress_type
387 self
._writecheck
(zinfo
)
388 fp
= open(filename
, "rb")
389 zinfo
.flag_bits
= 0x00
390 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
391 # Must overwrite CRC and sizes with correct data later
393 zinfo
.compress_size
= compress_size
= 0
394 zinfo
.file_size
= file_size
= 0
395 self
.fp
.write(zinfo
.FileHeader())
396 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
397 if zinfo
.compress_type
== ZIP_DEFLATED
:
398 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
403 buf
= fp
.read(1024 * 8)
406 file_size
= file_size
+ len(buf
)
407 CRC
= binascii
.crc32(buf
, CRC
)
409 buf
= cmpr
.compress(buf
)
410 compress_size
= compress_size
+ len(buf
)
415 compress_size
= compress_size
+ len(buf
)
417 zinfo
.compress_size
= compress_size
419 zinfo
.compress_size
= file_size
421 zinfo
.file_size
= file_size
422 # Seek backwards and write CRC and file sizes
423 position
= self
.fp
.tell() # Preserve current position in file
424 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
425 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
427 self
.fp
.seek(position
, 0)
428 self
.filelist
.append(zinfo
)
429 self
.NameToInfo
[zinfo
.filename
] = zinfo
431 def writestr(self
, zinfo
, bytes
):
432 """Write a file into the archive. The contents is the string
434 self
._writecheck
(zinfo
)
435 zinfo
.file_size
= len(bytes
) # Uncompressed size
436 zinfo
.CRC
= binascii
.crc32(bytes
) # CRC-32 checksum
437 if zinfo
.compress_type
== ZIP_DEFLATED
:
438 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
440 bytes
= co
.compress(bytes
) + co
.flush()
441 zinfo
.compress_size
= len(bytes
) # Compressed size
443 zinfo
.compress_size
= zinfo
.file_size
444 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
445 self
.fp
.write(zinfo
.FileHeader())
446 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
448 if zinfo
.flag_bits
& 0x08:
449 # Write CRC and file sizes after the file data
450 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
452 self
.filelist
.append(zinfo
)
453 self
.NameToInfo
[zinfo
.filename
] = zinfo
456 """Call the "close()" method in case the user forgot."""
460 """Close the file, and for mode "w" and "a" write the ending
464 if self
.mode
in ("w", "a"): # write ending records
466 pos1
= self
.fp
.tell()
467 for zinfo
in self
.filelist
: # write central directory
470 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
471 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
472 centdir
= struct
.pack(structCentralDir
,
473 stringCentralDir
, zinfo
.create_version
,
474 zinfo
.create_system
, zinfo
.extract_version
, zinfo
.reserved
,
475 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
476 zinfo
.CRC
, zinfo
.compress_size
, zinfo
.file_size
,
477 len(zinfo
.filename
), len(zinfo
.extra
), len(zinfo
.comment
),
478 0, zinfo
.internal_attr
, zinfo
.external_attr
,
480 self
.fp
.write(centdir
)
481 self
.fp
.write(zinfo
.filename
)
482 self
.fp
.write(zinfo
.extra
)
483 self
.fp
.write(zinfo
.comment
)
484 pos2
= self
.fp
.tell()
485 # Write end-of-zip-archive record
486 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
487 0, 0, count
, count
, pos2
- pos1
, pos1
, 0)
488 self
.fp
.write(endrec
)
490 if not self
._filePassed
:
495 class PyZipFile(ZipFile
):
496 """Class to create ZIP archives with Python library files and packages."""
498 def writepy(self
, pathname
, basename
= ""):
499 """Add all files from "pathname" to the ZIP archive.
501 If pathname is a package directory, search the directory and
502 all package subdirectories recursively for all *.py and enter
503 the modules into the archive. If pathname is a plain
504 directory, listdir *.py and enter all modules. Else, pathname
505 must be a Python *.py file and the module will be put into the
506 archive. Added modules are always module.pyo or module.pyc.
507 This method will compile the module.py into module.pyc if
510 dir, name
= os
.path
.split(pathname
)
511 if os
.path
.isdir(pathname
):
512 initname
= os
.path
.join(pathname
, "__init__.py")
513 if os
.path
.isfile(initname
):
514 # This is a package directory, add it
516 basename
= "%s/%s" % (basename
, name
)
520 print "Adding package in", pathname
, "as", basename
521 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
523 print "Adding", arcname
524 self
.write(fname
, arcname
)
525 dirlist
= os
.listdir(pathname
)
526 dirlist
.remove("__init__.py")
527 # Add all *.py files and package subdirectories
528 for filename
in dirlist
:
529 path
= os
.path
.join(pathname
, filename
)
530 root
, ext
= os
.path
.splitext(filename
)
531 if os
.path
.isdir(path
):
532 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
533 # This is a package directory, add it
534 self
.writepy(path
, basename
) # Recursive call
536 fname
, arcname
= self
._get
_codename
(path
[0:-3],
539 print "Adding", arcname
540 self
.write(fname
, arcname
)
542 # This is NOT a package directory, add its files at top level
544 print "Adding files from directory", pathname
545 for filename
in os
.listdir(pathname
):
546 path
= os
.path
.join(pathname
, filename
)
547 root
, ext
= os
.path
.splitext(filename
)
549 fname
, arcname
= self
._get
_codename
(path
[0:-3],
552 print "Adding", arcname
553 self
.write(fname
, arcname
)
555 if pathname
[-3:] != ".py":
556 raise RuntimeError, \
557 'Files added with writepy() must end with ".py"'
558 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
560 print "Adding file", arcname
561 self
.write(fname
, arcname
)
563 def _get_codename(self
, pathname
, basename
):
564 """Return (filename, archivename) for the path.
566 Given a module name path, return the correct file path and
567 archive name, compiling if necessary. For example, given
568 /python/lib/string, return (/python/lib/string.pyc, string).
570 file_py
= pathname
+ ".py"
571 file_pyc
= pathname
+ ".pyc"
572 file_pyo
= pathname
+ ".pyo"
573 if os
.path
.isfile(file_pyo
) and \
574 os
.stat(file_pyo
)[8] >= os
.stat(file_py
)[8]:
575 fname
= file_pyo
# Use .pyo file
576 elif not os
.path
.isfile(file_pyc
) or \
577 os
.stat(file_pyc
)[8] < os
.stat(file_py
)[8]:
580 print "Compiling", file_py
581 py_compile
.compile(file_py
, file_pyc
)
585 archivename
= os
.path
.split(fname
)[1]
587 archivename
= "%s/%s" % (basename
, archivename
)
588 return (fname
, archivename
)