1 "Read and write ZIP files."
2 # Written by James C. Ahlstrom jim@interet.com
3 # All rights transferred to CNRI pursuant to the Python contribution agreement
5 import struct
, os
, time
9 import zlib
# We may need its compression method
13 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
16 class BadZipfile(Exception):
18 error
= BadZipfile
# The exception raised by this module
20 # constants for Zip file compression methods
23 # Other ZIP compression methods not supported
25 # Here are some struct module formats for reading headers
26 structEndArchive
= "<4s4H2lH" # 9 items, end of archive, 22 bytes
27 stringEndArchive
= "PK\005\006" # magic number for end of archive record
28 structCentralDir
= "<4s4B4H3l5HLl"# 19 items, central directory, 46 bytes
29 stringCentralDir
= "PK\001\002" # magic number for central directory
30 structFileHeader
= "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31 stringFileHeader
= "PK\003\004" # magic number for file header
33 # indexes of entries in the central directory structure
35 _CD_CREATE_VERSION
= 1
37 _CD_EXTRACT_VERSION
= 3
38 _CD_EXTRACT_SYSTEM
= 4 # is this meaningful?
44 _CD_COMPRESSED_SIZE
= 10
45 _CD_UNCOMPRESSED_SIZE
= 11
46 _CD_FILENAME_LENGTH
= 12
47 _CD_EXTRA_FIELD_LENGTH
= 13
48 _CD_COMMENT_LENGTH
= 14
49 _CD_DISK_NUMBER_START
= 15
50 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
51 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
52 _CD_LOCAL_HEADER_OFFSET
= 18
54 # indexes of entries in the local file header structure
56 _FH_EXTRACT_VERSION
= 1
57 _FH_EXTRACT_SYSTEM
= 2 # is this meaningful?
58 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
59 _FH_COMPRESSION_METHOD
= 4
63 _FH_COMPRESSED_SIZE
= 8
64 _FH_UNCOMPRESSED_SIZE
= 9
65 _FH_FILENAME_LENGTH
= 10
66 _FH_EXTRA_FIELD_LENGTH
= 11
68 def is_zipfile(filename
):
69 """Quickly see if file is a ZIP file by checking the magic number."""
71 fpin
= open(filename
, "rb")
72 endrec
= _EndRecData(fpin
)
75 return True # file has correct magic number
80 def _EndRecData(fpin
):
81 """Return data from the "End of Central Directory" record, or None.
83 The data is a list of the nine items in the ZIP "End of central dir"
84 record followed by a tenth item, the file seek offset of this record."""
85 fpin
.seek(-22, 2) # Assume no archive comment.
86 filesize
= fpin
.tell() + 22 # Get file size
88 if data
[0:4] == stringEndArchive
and data
[-2:] == "\000\000":
89 endrec
= struct
.unpack(structEndArchive
, data
)
91 endrec
.append("") # Append the archive comment
92 endrec
.append(filesize
- 22) # Append the record start offset
94 # Search the last END_BLOCK bytes of the file for the record signature.
95 # The comment is appended to the ZIP file and has a 16 bit length.
96 # So the comment may be up to 64K long. We limit the search for the
97 # signature to a few Kbytes at the end of the file for efficiency.
98 # also, the signature must not appear in the comment.
99 END_BLOCK
= min(filesize
, 1024 * 4)
100 fpin
.seek(filesize
- END_BLOCK
, 0)
102 start
= data
.rfind(stringEndArchive
)
103 if start
>= 0: # Correct signature string was found
104 endrec
= struct
.unpack(structEndArchive
, data
[start
:start
+22])
105 endrec
= list(endrec
)
106 comment
= data
[start
+22:]
107 if endrec
[7] == len(comment
): # Comment length checks out
108 # Append the archive comment and start offset
109 endrec
.append(comment
)
110 endrec
.append(filesize
- END_BLOCK
+ start
)
112 return # Error, return None
116 """Class with attributes describing each file in the ZIP archive."""
118 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
119 self
.filename
= _normpath(filename
) # Name of the file in the archive
120 self
.date_time
= date_time
# year, month, day, hour, min, sec
122 self
.compress_type
= ZIP_STORED
# Type of compression for the file
123 self
.comment
= "" # Comment for each file
124 self
.extra
= "" # ZIP extra data
125 self
.create_system
= 0 # System which created ZIP archive
126 self
.create_version
= 20 # Version which created ZIP archive
127 self
.extract_version
= 20 # Version needed to extract archive
128 self
.reserved
= 0 # Must be zero
129 self
.flag_bits
= 0 # ZIP flag bits
130 self
.volume
= 0 # Volume number of file header
131 self
.internal_attr
= 0 # Internal attributes
132 self
.external_attr
= 0 # External file attributes
133 # Other attributes are set by class ZipFile:
134 # header_offset Byte offset to the file header
135 # file_offset Byte offset to the start of the file data
136 # CRC CRC-32 of the uncompressed file
137 # compress_size Size of the compressed file
138 # file_size Size of the uncompressed file
140 def FileHeader(self
):
141 """Return the per-file header as a string."""
143 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
144 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
145 if self
.flag_bits
& 0x08:
146 # Set these to zero because we write them after the file data
147 CRC
= compress_size
= file_size
= 0
150 compress_size
= self
.compress_size
151 file_size
= self
.file_size
152 header
= struct
.pack(structFileHeader
, stringFileHeader
,
153 self
.extract_version
, self
.reserved
, self
.flag_bits
,
154 self
.compress_type
, dostime
, dosdate
, CRC
,
155 compress_size
, file_size
,
156 len(self
.filename
), len(self
.extra
))
157 return header
+ self
.filename
+ self
.extra
160 # This is used to ensure paths in generated ZIP files always use
161 # forward slashes as the directory separator, as required by the
162 # ZIP format specification.
165 return path
.replace(os
.sep
, "/")
172 """ Class with methods to open, read, write, close, list zip files.
174 z = ZipFile(file, mode="r", compression=ZIP_STORED)
176 file: Either the path to the file, or a file-like object.
177 If it is a path, the file will be opened and closed by ZipFile.
178 mode: The mode can be either read "r", write "w" or append "a".
179 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
182 fp
= None # Set here since __del__ checks it
184 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
):
185 """Open the ZIP file with mode read "r", write "w" or append "a"."""
186 if compression
== ZIP_STORED
:
188 elif compression
== ZIP_DEFLATED
:
191 "Compression requires the (missing) zlib module"
193 raise RuntimeError, "That compression method is not supported"
194 self
.debug
= 0 # Level of printing: 0 through 3
195 self
.NameToInfo
= {} # Find file info given name
196 self
.filelist
= [] # List of ZipInfo instances for archive
197 self
.compression
= compression
# Method of compression
198 self
.mode
= key
= mode
[0]
200 # Check if we were passed a file-like object
201 if isinstance(file, basestring
):
204 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
205 self
.fp
= open(file, modeDict
[mode
])
209 self
.filename
= getattr(file, 'name', None)
216 try: # See if file is a zip file
217 self
._RealGetContents
()
218 # seek to start of directory and overwrite
219 self
.fp
.seek(self
.start_dir
, 0)
220 except BadZipfile
: # file is not a zip file, just append
223 if not self
._filePassed
:
226 raise RuntimeError, 'Mode must be "r", "w" or "a"'
228 def _GetContents(self
):
229 """Read the directory, making sure we close the file if the format
232 self
._RealGetContents
()
234 if not self
._filePassed
:
239 def _RealGetContents(self
):
240 """Read in the table of contents for the ZIP file."""
242 endrec
= _EndRecData(fp
)
244 raise BadZipfile
, "File is not a zip file"
247 size_cd
= endrec
[5] # bytes in central directory
248 offset_cd
= endrec
[6] # offset of central directory
249 self
.comment
= endrec
[8] # archive comment
250 # endrec[9] is the offset of the "End of Central Dir" record
251 x
= endrec
[9] - size_cd
252 # "concat" is zero, unless zip was concatenated to another file
253 concat
= x
- offset_cd
255 print "given, inferred, offset", offset_cd
, x
, concat
256 # self.start_dir: Position of start of central directory
257 self
.start_dir
= offset_cd
+ concat
258 fp
.seek(self
.start_dir
, 0)
260 while total
< size_cd
:
261 centdir
= fp
.read(46)
263 if centdir
[0:4] != stringCentralDir
:
264 raise BadZipfile
, "Bad magic number for central directory"
265 centdir
= struct
.unpack(structCentralDir
, centdir
)
268 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
269 # Create ZipInfo instance to store file information
270 x
= ZipInfo(filename
)
271 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
272 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
273 total
= (total
+ centdir
[_CD_FILENAME_LENGTH
]
274 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
275 + centdir
[_CD_COMMENT_LENGTH
])
276 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
] + concat
277 # file_offset must be computed below...
278 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
279 x
.flag_bits
, x
.compress_type
, t
, d
,
280 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
281 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
282 # Convert date/time code to (year, month, day, hour, min, sec)
283 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
284 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
285 self
.filelist
.append(x
)
286 self
.NameToInfo
[x
.filename
] = x
289 for data
in self
.filelist
:
290 fp
.seek(data
.header_offset
, 0)
291 fheader
= fp
.read(30)
292 if fheader
[0:4] != stringFileHeader
:
293 raise BadZipfile
, "Bad magic number for file header"
294 fheader
= struct
.unpack(structFileHeader
, fheader
)
295 # file_offset is computed here, since the extra field for
296 # the central directory and for the local file header
297 # refer to different fields, and they can have different
299 data
.file_offset
= (data
.header_offset
+ 30
300 + fheader
[_FH_FILENAME_LENGTH
]
301 + fheader
[_FH_EXTRA_FIELD_LENGTH
])
302 fname
= fp
.read(fheader
[_FH_FILENAME_LENGTH
])
303 if fname
!= data
.filename
:
304 raise RuntimeError, \
305 'File name in directory "%s" and header "%s" differ.' % (
306 data
.filename
, fname
)
309 """Return a list of file names in the archive."""
311 for data
in self
.filelist
:
312 l
.append(data
.filename
)
316 """Return a list of class ZipInfo instances for files in the
321 """Print a table of contents for the zip file."""
322 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
323 for zinfo
in self
.filelist
:
324 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
325 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
328 """Read all the files and check the CRC."""
329 for zinfo
in self
.filelist
:
331 self
.read(zinfo
.filename
) # Check CRC-32
333 return zinfo
.filename
335 def getinfo(self
, name
):
336 """Return the instance of ZipInfo given 'name'."""
337 return self
.NameToInfo
[name
]
339 def read(self
, name
):
340 """Return file bytes (as a string) for name."""
341 if self
.mode
not in ("r", "a"):
342 raise RuntimeError, 'read() requires mode "r" or "a"'
344 raise RuntimeError, \
345 "Attempt to read ZIP archive that was already closed"
346 zinfo
= self
.getinfo(name
)
347 filepos
= self
.fp
.tell()
348 self
.fp
.seek(zinfo
.file_offset
, 0)
349 bytes
= self
.fp
.read(zinfo
.compress_size
)
350 self
.fp
.seek(filepos
, 0)
351 if zinfo
.compress_type
== ZIP_STORED
:
353 elif zinfo
.compress_type
== ZIP_DEFLATED
:
355 raise RuntimeError, \
356 "De-compression requires the (missing) zlib module"
357 # zlib compress/decompress code by Jeremy Hylton of CNRI
358 dc
= zlib
.decompressobj(-15)
359 bytes
= dc
.decompress(bytes
)
360 # need to feed in unused pad byte so that zlib won't choke
361 ex
= dc
.decompress('Z') + dc
.flush()
366 "Unsupported compression method %d for file %s" % \
367 (zinfo
.compress_type
, name
)
368 crc
= binascii
.crc32(bytes
)
370 raise BadZipfile
, "Bad CRC-32 for file %s" % name
373 def _writecheck(self
, zinfo
):
374 """Check for errors before writing a file to the archive."""
375 if zinfo
.filename
in self
.NameToInfo
:
376 if self
.debug
: # Warning for duplicate names
377 print "Duplicate name:", zinfo
.filename
378 if self
.mode
not in ("w", "a"):
379 raise RuntimeError, 'write() requires mode "w" or "a"'
381 raise RuntimeError, \
382 "Attempt to write ZIP archive that was already closed"
383 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
384 raise RuntimeError, \
385 "Compression requires the (missing) zlib module"
386 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
387 raise RuntimeError, \
388 "That compression method is not supported"
390 def write(self
, filename
, arcname
=None, compress_type
=None):
391 """Put the bytes from filename into the archive under the name
393 st
= os
.stat(filename
)
394 mtime
= time
.localtime(st
.st_mtime
)
395 date_time
= mtime
[0:6]
396 # Create ZipInfo instance to store file information
398 zinfo
= ZipInfo(filename
, date_time
)
400 zinfo
= ZipInfo(arcname
, date_time
)
401 zinfo
.external_attr
= st
[0] << 16L # Unix attributes
402 if compress_type
is None:
403 zinfo
.compress_type
= self
.compression
405 zinfo
.compress_type
= compress_type
406 self
._writecheck
(zinfo
)
407 fp
= open(filename
, "rb")
408 zinfo
.flag_bits
= 0x00
409 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
410 # Must overwrite CRC and sizes with correct data later
412 zinfo
.compress_size
= compress_size
= 0
413 zinfo
.file_size
= file_size
= 0
414 self
.fp
.write(zinfo
.FileHeader())
415 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
416 if zinfo
.compress_type
== ZIP_DEFLATED
:
417 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
422 buf
= fp
.read(1024 * 8)
425 file_size
= file_size
+ len(buf
)
426 CRC
= binascii
.crc32(buf
, CRC
)
428 buf
= cmpr
.compress(buf
)
429 compress_size
= compress_size
+ len(buf
)
434 compress_size
= compress_size
+ len(buf
)
436 zinfo
.compress_size
= compress_size
438 zinfo
.compress_size
= file_size
440 zinfo
.file_size
= file_size
441 # Seek backwards and write CRC and file sizes
442 position
= self
.fp
.tell() # Preserve current position in file
443 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
444 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
446 self
.fp
.seek(position
, 0)
447 self
.filelist
.append(zinfo
)
448 self
.NameToInfo
[zinfo
.filename
] = zinfo
450 def writestr(self
, zinfo
, bytes
):
451 """Write a file into the archive. The contents is the string
453 self
._writecheck
(zinfo
)
454 zinfo
.file_size
= len(bytes
) # Uncompressed size
455 zinfo
.CRC
= binascii
.crc32(bytes
) # CRC-32 checksum
456 if zinfo
.compress_type
== ZIP_DEFLATED
:
457 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
459 bytes
= co
.compress(bytes
) + co
.flush()
460 zinfo
.compress_size
= len(bytes
) # Compressed size
462 zinfo
.compress_size
= zinfo
.file_size
463 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
464 self
.fp
.write(zinfo
.FileHeader())
465 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
467 if zinfo
.flag_bits
& 0x08:
468 # Write CRC and file sizes after the file data
469 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
471 self
.filelist
.append(zinfo
)
472 self
.NameToInfo
[zinfo
.filename
] = zinfo
475 """Call the "close()" method in case the user forgot."""
479 """Close the file, and for mode "w" and "a" write the ending
483 if self
.mode
in ("w", "a"): # write ending records
485 pos1
= self
.fp
.tell()
486 for zinfo
in self
.filelist
: # write central directory
489 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
490 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
491 centdir
= struct
.pack(structCentralDir
,
492 stringCentralDir
, zinfo
.create_version
,
493 zinfo
.create_system
, zinfo
.extract_version
, zinfo
.reserved
,
494 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
495 zinfo
.CRC
, zinfo
.compress_size
, zinfo
.file_size
,
496 len(zinfo
.filename
), len(zinfo
.extra
), len(zinfo
.comment
),
497 0, zinfo
.internal_attr
, zinfo
.external_attr
,
499 self
.fp
.write(centdir
)
500 self
.fp
.write(zinfo
.filename
)
501 self
.fp
.write(zinfo
.extra
)
502 self
.fp
.write(zinfo
.comment
)
503 pos2
= self
.fp
.tell()
504 # Write end-of-zip-archive record
505 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
506 0, 0, count
, count
, pos2
- pos1
, pos1
, 0)
507 self
.fp
.write(endrec
)
509 if not self
._filePassed
:
514 class PyZipFile(ZipFile
):
515 """Class to create ZIP archives with Python library files and packages."""
517 def writepy(self
, pathname
, basename
= ""):
518 """Add all files from "pathname" to the ZIP archive.
520 If pathname is a package directory, search the directory and
521 all package subdirectories recursively for all *.py and enter
522 the modules into the archive. If pathname is a plain
523 directory, listdir *.py and enter all modules. Else, pathname
524 must be a Python *.py file and the module will be put into the
525 archive. Added modules are always module.pyo or module.pyc.
526 This method will compile the module.py into module.pyc if
529 dir, name
= os
.path
.split(pathname
)
530 if os
.path
.isdir(pathname
):
531 initname
= os
.path
.join(pathname
, "__init__.py")
532 if os
.path
.isfile(initname
):
533 # This is a package directory, add it
535 basename
= "%s/%s" % (basename
, name
)
539 print "Adding package in", pathname
, "as", basename
540 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
542 print "Adding", arcname
543 self
.write(fname
, arcname
)
544 dirlist
= os
.listdir(pathname
)
545 dirlist
.remove("__init__.py")
546 # Add all *.py files and package subdirectories
547 for filename
in dirlist
:
548 path
= os
.path
.join(pathname
, filename
)
549 root
, ext
= os
.path
.splitext(filename
)
550 if os
.path
.isdir(path
):
551 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
552 # This is a package directory, add it
553 self
.writepy(path
, basename
) # Recursive call
555 fname
, arcname
= self
._get
_codename
(path
[0:-3],
558 print "Adding", arcname
559 self
.write(fname
, arcname
)
561 # This is NOT a package directory, add its files at top level
563 print "Adding files from directory", pathname
564 for filename
in os
.listdir(pathname
):
565 path
= os
.path
.join(pathname
, filename
)
566 root
, ext
= os
.path
.splitext(filename
)
568 fname
, arcname
= self
._get
_codename
(path
[0:-3],
571 print "Adding", arcname
572 self
.write(fname
, arcname
)
574 if pathname
[-3:] != ".py":
575 raise RuntimeError, \
576 'Files added with writepy() must end with ".py"'
577 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
579 print "Adding file", arcname
580 self
.write(fname
, arcname
)
582 def _get_codename(self
, pathname
, basename
):
583 """Return (filename, archivename) for the path.
585 Given a module name path, return the correct file path and
586 archive name, compiling if necessary. For example, given
587 /python/lib/string, return (/python/lib/string.pyc, string).
589 file_py
= pathname
+ ".py"
590 file_pyc
= pathname
+ ".pyc"
591 file_pyo
= pathname
+ ".pyo"
592 if os
.path
.isfile(file_pyo
) and \
593 os
.stat(file_pyo
).st_mtime
>= os
.stat(file_py
).st_mtime
:
594 fname
= file_pyo
# Use .pyo file
595 elif not os
.path
.isfile(file_pyc
) or \
596 os
.stat(file_pyc
).st_mtime
< os
.stat(file_py
).st_mtime
:
599 print "Compiling", file_py
600 py_compile
.compile(file_py
, file_pyc
)
604 archivename
= os
.path
.split(fname
)[1]
606 archivename
= "%s/%s" % (basename
, archivename
)
607 return (fname
, archivename
)