1 "Read and write ZIP files."
2 # Written by James C. Ahlstrom jim@interet.com
3 # All rights transferred to CNRI pursuant to the Python contribution agreement
5 import struct
, os
, time
9 import zlib
# We may need its compression method
13 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
16 class BadZipfile(Exception):
18 error
= BadZipfile
# The exception raised by this module
20 # constants for Zip file compression methods
23 # Other ZIP compression methods not supported
25 # Here are some struct module formats for reading headers
26 structEndArchive
= "<4s4H2lH" # 9 items, end of archive, 22 bytes
27 stringEndArchive
= "PK\005\006" # magic number for end of archive record
28 structCentralDir
= "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
29 stringCentralDir
= "PK\001\002" # magic number for central directory
30 structFileHeader
= "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31 stringFileHeader
= "PK\003\004" # magic number for file header
33 # indexes of entries in the central directory structure
35 _CD_CREATE_VERSION
= 1
37 _CD_EXTRACT_VERSION
= 3
38 _CD_EXTRACT_SYSTEM
= 4 # is this meaningful?
44 _CD_COMPRESSED_SIZE
= 10
45 _CD_UNCOMPRESSED_SIZE
= 11
46 _CD_FILENAME_LENGTH
= 12
47 _CD_EXTRA_FIELD_LENGTH
= 13
48 _CD_COMMENT_LENGTH
= 14
49 _CD_DISK_NUMBER_START
= 15
50 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
51 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
52 _CD_LOCAL_HEADER_OFFSET
= 18
54 # indexes of entries in the local file header structure
56 _FH_EXTRACT_VERSION
= 1
57 _FH_EXTRACT_SYSTEM
= 2 # is this meaningful?
58 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
59 _FH_COMPRESSION_METHOD
= 4
63 _FH_COMPRESSED_SIZE
= 8
64 _FH_UNCOMPRESSED_SIZE
= 9
65 _FH_FILENAME_LENGTH
= 10
66 _FH_EXTRA_FIELD_LENGTH
= 11
68 def is_zipfile(filename
):
69 """Quickly see if file is a ZIP file by checking the magic number.
71 Will not accept a ZIP archive with an ending comment.
74 fpin
= open(filename
, "rb")
75 fpin
.seek(-22, 2) # Seek to end-of-file record
78 if endrec
[0:4] == "PK\005\006" and endrec
[-2:] == "\000\000":
79 return True # file has correct magic number
86 """Class with attributes describing each file in the ZIP archive."""
88 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
89 self
.filename
= _normpath(filename
) # Name of the file in the archive
90 self
.date_time
= date_time
# year, month, day, hour, min, sec
92 self
.compress_type
= ZIP_STORED
# Type of compression for the file
93 self
.comment
= "" # Comment for each file
94 self
.extra
= "" # ZIP extra data
95 self
.create_system
= 0 # System which created ZIP archive
96 self
.create_version
= 20 # Version which created ZIP archive
97 self
.extract_version
= 20 # Version needed to extract archive
98 self
.reserved
= 0 # Must be zero
99 self
.flag_bits
= 0 # ZIP flag bits
100 self
.volume
= 0 # Volume number of file header
101 self
.internal_attr
= 0 # Internal attributes
102 self
.external_attr
= 0 # External file attributes
103 # Other attributes are set by class ZipFile:
104 # header_offset Byte offset to the file header
105 # file_offset Byte offset to the start of the file data
106 # CRC CRC-32 of the uncompressed file
107 # compress_size Size of the compressed file
108 # file_size Size of the uncompressed file
110 def FileHeader(self
):
111 """Return the per-file header as a string."""
113 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
114 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
115 if self
.flag_bits
& 0x08:
116 # Set these to zero because we write them after the file data
117 CRC
= compress_size
= file_size
= 0
120 compress_size
= self
.compress_size
121 file_size
= self
.file_size
122 header
= struct
.pack(structFileHeader
, stringFileHeader
,
123 self
.extract_version
, self
.reserved
, self
.flag_bits
,
124 self
.compress_type
, dostime
, dosdate
, CRC
,
125 compress_size
, file_size
,
126 len(self
.filename
), len(self
.extra
))
127 return header
+ self
.filename
+ self
.extra
130 # This is used to ensure paths in generated ZIP files always use
131 # forward slashes as the directory separator, as required by the
132 # ZIP format specification.
135 return path
.replace(os
.sep
, "/")
142 """ Class with methods to open, read, write, close, list zip files.
144 z = ZipFile(file, mode="r", compression=ZIP_STORED)
146 file: Either the path to the file, or a file-like object.
147 If it is a path, the file will be opened and closed by ZipFile.
148 mode: The mode can be either read "r", write "w" or append "a".
149 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
152 fp
= None # Set here since __del__ checks it
154 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
):
155 """Open the ZIP file with mode read "r", write "w" or append "a"."""
156 if compression
== ZIP_STORED
:
158 elif compression
== ZIP_DEFLATED
:
161 "Compression requires the (missing) zlib module"
163 raise RuntimeError, "That compression method is not supported"
164 self
.debug
= 0 # Level of printing: 0 through 3
165 self
.NameToInfo
= {} # Find file info given name
166 self
.filelist
= [] # List of ZipInfo instances for archive
167 self
.compression
= compression
# Method of compression
168 self
.mode
= key
= mode
[0]
170 # Check if we were passed a file-like object
171 if isinstance(file, basestring
):
174 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
175 self
.fp
= open(file, modeDict
[mode
])
179 self
.filename
= getattr(file, 'name', None)
187 fp
.seek(-22, 2) # Seek to end-of-file record
189 if endrec
[0:4] == stringEndArchive
and \
190 endrec
[-2:] == "\000\000":
191 self
._GetContents
() # file is a zip file
192 # seek to start of directory and overwrite
193 fp
.seek(self
.start_dir
, 0)
194 else: # file is not a zip file, just append
197 if not self
._filePassed
:
200 raise RuntimeError, 'Mode must be "r", "w" or "a"'
202 def _GetContents(self
):
203 """Read the directory, making sure we close the file if the format
206 self
._RealGetContents
()
208 if not self
._filePassed
:
213 def _RealGetContents(self
):
214 """Read in the table of contents for the ZIP file."""
216 fp
.seek(-22, 2) # Start of end-of-archive record
217 filesize
= fp
.tell() + 22 # Get file size
218 endrec
= fp
.read(22) # Archive must not end with a comment!
219 if endrec
[0:4] != stringEndArchive
or endrec
[-2:] != "\000\000":
220 raise BadZipfile
, "File is not a zip file, or ends with a comment"
221 endrec
= struct
.unpack(structEndArchive
, endrec
)
224 size_cd
= endrec
[5] # bytes in central directory
225 offset_cd
= endrec
[6] # offset of central directory
226 x
= filesize
- 22 - size_cd
227 # "concat" is zero, unless zip was concatenated to another file
228 concat
= x
- offset_cd
230 print "given, inferred, offset", offset_cd
, x
, concat
231 # self.start_dir: Position of start of central directory
232 self
.start_dir
= offset_cd
+ concat
233 fp
.seek(self
.start_dir
, 0)
235 while total
< size_cd
:
236 centdir
= fp
.read(46)
238 if centdir
[0:4] != stringCentralDir
:
239 raise BadZipfile
, "Bad magic number for central directory"
240 centdir
= struct
.unpack(structCentralDir
, centdir
)
243 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
244 # Create ZipInfo instance to store file information
245 x
= ZipInfo(filename
)
246 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
247 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
248 total
= (total
+ centdir
[_CD_FILENAME_LENGTH
]
249 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
250 + centdir
[_CD_COMMENT_LENGTH
])
251 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
] + concat
252 # file_offset must be computed below...
253 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
254 x
.flag_bits
, x
.compress_type
, t
, d
,
255 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
256 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
257 # Convert date/time code to (year, month, day, hour, min, sec)
258 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
259 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
260 self
.filelist
.append(x
)
261 self
.NameToInfo
[x
.filename
] = x
264 for data
in self
.filelist
:
265 fp
.seek(data
.header_offset
, 0)
266 fheader
= fp
.read(30)
267 if fheader
[0:4] != stringFileHeader
:
268 raise BadZipfile
, "Bad magic number for file header"
269 fheader
= struct
.unpack(structFileHeader
, fheader
)
270 # file_offset is computed here, since the extra field for
271 # the central directory and for the local file header
272 # refer to different fields, and they can have different
274 data
.file_offset
= (data
.header_offset
+ 30
275 + fheader
[_FH_FILENAME_LENGTH
]
276 + fheader
[_FH_EXTRA_FIELD_LENGTH
])
277 fname
= fp
.read(fheader
[_FH_FILENAME_LENGTH
])
278 if fname
!= data
.filename
:
279 raise RuntimeError, \
280 'File name in directory "%s" and header "%s" differ.' % (
281 data
.filename
, fname
)
284 """Return a list of file names in the archive."""
286 for data
in self
.filelist
:
287 l
.append(data
.filename
)
291 """Return a list of class ZipInfo instances for files in the
296 """Print a table of contents for the zip file."""
297 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
298 for zinfo
in self
.filelist
:
299 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
300 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
303 """Read all the files and check the CRC."""
304 for zinfo
in self
.filelist
:
306 self
.read(zinfo
.filename
) # Check CRC-32
308 return zinfo
.filename
310 def getinfo(self
, name
):
311 """Return the instance of ZipInfo given 'name'."""
312 return self
.NameToInfo
[name
]
314 def read(self
, name
):
315 """Return file bytes (as a string) for name."""
316 if self
.mode
not in ("r", "a"):
317 raise RuntimeError, 'read() requires mode "r" or "a"'
319 raise RuntimeError, \
320 "Attempt to read ZIP archive that was already closed"
321 zinfo
= self
.getinfo(name
)
322 filepos
= self
.fp
.tell()
323 self
.fp
.seek(zinfo
.file_offset
, 0)
324 bytes
= self
.fp
.read(zinfo
.compress_size
)
325 self
.fp
.seek(filepos
, 0)
326 if zinfo
.compress_type
== ZIP_STORED
:
328 elif zinfo
.compress_type
== ZIP_DEFLATED
:
330 raise RuntimeError, \
331 "De-compression requires the (missing) zlib module"
332 # zlib compress/decompress code by Jeremy Hylton of CNRI
333 dc
= zlib
.decompressobj(-15)
334 bytes
= dc
.decompress(bytes
)
335 # need to feed in unused pad byte so that zlib won't choke
336 ex
= dc
.decompress('Z') + dc
.flush()
341 "Unsupported compression method %d for file %s" % \
342 (zinfo
.compress_type
, name
)
343 crc
= binascii
.crc32(bytes
)
345 raise BadZipfile
, "Bad CRC-32 for file %s" % name
348 def _writecheck(self
, zinfo
):
349 """Check for errors before writing a file to the archive."""
350 if zinfo
.filename
in self
.NameToInfo
:
351 if self
.debug
: # Warning for duplicate names
352 print "Duplicate name:", zinfo
.filename
353 if self
.mode
not in ("w", "a"):
354 raise RuntimeError, 'write() requires mode "w" or "a"'
356 raise RuntimeError, \
357 "Attempt to write ZIP archive that was already closed"
358 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
359 raise RuntimeError, \
360 "Compression requires the (missing) zlib module"
361 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
362 raise RuntimeError, \
363 "That compression method is not supported"
365 def write(self
, filename
, arcname
=None, compress_type
=None):
366 """Put the bytes from filename into the archive under the name
368 st
= os
.stat(filename
)
369 mtime
= time
.localtime(st
.st_mtime
)
370 date_time
= mtime
[0:6]
371 # Create ZipInfo instance to store file information
373 zinfo
= ZipInfo(filename
, date_time
)
375 zinfo
= ZipInfo(arcname
, date_time
)
376 zinfo
.external_attr
= st
[0] << 16 # Unix attributes
377 if compress_type
is None:
378 zinfo
.compress_type
= self
.compression
380 zinfo
.compress_type
= compress_type
381 self
._writecheck
(zinfo
)
382 fp
= open(filename
, "rb")
383 zinfo
.flag_bits
= 0x00
384 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
385 # Must overwrite CRC and sizes with correct data later
387 zinfo
.compress_size
= compress_size
= 0
388 zinfo
.file_size
= file_size
= 0
389 self
.fp
.write(zinfo
.FileHeader())
390 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
391 if zinfo
.compress_type
== ZIP_DEFLATED
:
392 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
397 buf
= fp
.read(1024 * 8)
400 file_size
= file_size
+ len(buf
)
401 CRC
= binascii
.crc32(buf
, CRC
)
403 buf
= cmpr
.compress(buf
)
404 compress_size
= compress_size
+ len(buf
)
409 compress_size
= compress_size
+ len(buf
)
411 zinfo
.compress_size
= compress_size
413 zinfo
.compress_size
= file_size
415 zinfo
.file_size
= file_size
416 # Seek backwards and write CRC and file sizes
417 position
= self
.fp
.tell() # Preserve current position in file
418 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
419 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
421 self
.fp
.seek(position
, 0)
422 self
.filelist
.append(zinfo
)
423 self
.NameToInfo
[zinfo
.filename
] = zinfo
425 def writestr(self
, zinfo
, bytes
):
426 """Write a file into the archive. The contents is the string
428 self
._writecheck
(zinfo
)
429 zinfo
.file_size
= len(bytes
) # Uncompressed size
430 zinfo
.CRC
= binascii
.crc32(bytes
) # CRC-32 checksum
431 if zinfo
.compress_type
== ZIP_DEFLATED
:
432 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
434 bytes
= co
.compress(bytes
) + co
.flush()
435 zinfo
.compress_size
= len(bytes
) # Compressed size
437 zinfo
.compress_size
= zinfo
.file_size
438 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
439 self
.fp
.write(zinfo
.FileHeader())
440 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
442 if zinfo
.flag_bits
& 0x08:
443 # Write CRC and file sizes after the file data
444 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
446 self
.filelist
.append(zinfo
)
447 self
.NameToInfo
[zinfo
.filename
] = zinfo
450 """Call the "close()" method in case the user forgot."""
454 """Close the file, and for mode "w" and "a" write the ending
458 if self
.mode
in ("w", "a"): # write ending records
460 pos1
= self
.fp
.tell()
461 for zinfo
in self
.filelist
: # write central directory
464 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
465 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
466 centdir
= struct
.pack(structCentralDir
,
467 stringCentralDir
, zinfo
.create_version
,
468 zinfo
.create_system
, zinfo
.extract_version
, zinfo
.reserved
,
469 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
470 zinfo
.CRC
, zinfo
.compress_size
, zinfo
.file_size
,
471 len(zinfo
.filename
), len(zinfo
.extra
), len(zinfo
.comment
),
472 0, zinfo
.internal_attr
, zinfo
.external_attr
,
474 self
.fp
.write(centdir
)
475 self
.fp
.write(zinfo
.filename
)
476 self
.fp
.write(zinfo
.extra
)
477 self
.fp
.write(zinfo
.comment
)
478 pos2
= self
.fp
.tell()
479 # Write end-of-zip-archive record
480 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
481 0, 0, count
, count
, pos2
- pos1
, pos1
, 0)
482 self
.fp
.write(endrec
)
484 if not self
._filePassed
:
489 class PyZipFile(ZipFile
):
490 """Class to create ZIP archives with Python library files and packages."""
492 def writepy(self
, pathname
, basename
= ""):
493 """Add all files from "pathname" to the ZIP archive.
495 If pathname is a package directory, search the directory and
496 all package subdirectories recursively for all *.py and enter
497 the modules into the archive. If pathname is a plain
498 directory, listdir *.py and enter all modules. Else, pathname
499 must be a Python *.py file and the module will be put into the
500 archive. Added modules are always module.pyo or module.pyc.
501 This method will compile the module.py into module.pyc if
504 dir, name
= os
.path
.split(pathname
)
505 if os
.path
.isdir(pathname
):
506 initname
= os
.path
.join(pathname
, "__init__.py")
507 if os
.path
.isfile(initname
):
508 # This is a package directory, add it
510 basename
= "%s/%s" % (basename
, name
)
514 print "Adding package in", pathname
, "as", basename
515 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
517 print "Adding", arcname
518 self
.write(fname
, arcname
)
519 dirlist
= os
.listdir(pathname
)
520 dirlist
.remove("__init__.py")
521 # Add all *.py files and package subdirectories
522 for filename
in dirlist
:
523 path
= os
.path
.join(pathname
, filename
)
524 root
, ext
= os
.path
.splitext(filename
)
525 if os
.path
.isdir(path
):
526 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
527 # This is a package directory, add it
528 self
.writepy(path
, basename
) # Recursive call
530 fname
, arcname
= self
._get
_codename
(path
[0:-3],
533 print "Adding", arcname
534 self
.write(fname
, arcname
)
536 # This is NOT a package directory, add its files at top level
538 print "Adding files from directory", pathname
539 for filename
in os
.listdir(pathname
):
540 path
= os
.path
.join(pathname
, filename
)
541 root
, ext
= os
.path
.splitext(filename
)
543 fname
, arcname
= self
._get
_codename
(path
[0:-3],
546 print "Adding", arcname
547 self
.write(fname
, arcname
)
549 if pathname
[-3:] != ".py":
550 raise RuntimeError, \
551 'Files added with writepy() must end with ".py"'
552 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
554 print "Adding file", arcname
555 self
.write(fname
, arcname
)
557 def _get_codename(self
, pathname
, basename
):
558 """Return (filename, archivename) for the path.
560 Given a module name path, return the correct file path and
561 archive name, compiling if necessary. For example, given
562 /python/lib/string, return (/python/lib/string.pyc, string).
564 file_py
= pathname
+ ".py"
565 file_pyc
= pathname
+ ".pyc"
566 file_pyo
= pathname
+ ".pyo"
567 if os
.path
.isfile(file_pyo
) and \
568 os
.stat(file_pyo
).st_mtime
>= os
.stat(file_py
).st_mtime
:
569 fname
= file_pyo
# Use .pyo file
570 elif not os
.path
.isfile(file_pyc
) or \
571 os
.stat(file_pyc
).st_mtime
< os
.stat(file_py
).st_mtime
:
574 print "Compiling", file_py
575 py_compile
.compile(file_py
, file_pyc
)
579 archivename
= os
.path
.split(fname
)[1]
581 archivename
= "%s/%s" % (basename
, archivename
)
582 return (fname
, archivename
)