1 "Read and write ZIP files."
2 # Written by James C. Ahlstrom jim@interet.com
3 # All rights transferred to CNRI pursuant to the Python contribution agreement
5 import struct
, os
, time
9 import zlib
# We may need its compression method
13 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
16 class BadZipfile(Exception):
18 error
= BadZipfile
# The exception raised by this module
20 # constants for Zip file compression methods
23 # Other ZIP compression methods not supported
25 # Here are some struct module formats for reading headers
26 structEndArchive
= "<4s4H2lH" # 9 items, end of archive, 22 bytes
27 stringEndArchive
= "PK\005\006" # magic number for end of archive record
28 structCentralDir
= "<4s4B4H3l5HLl"# 19 items, central directory, 46 bytes
29 stringCentralDir
= "PK\001\002" # magic number for central directory
30 structFileHeader
= "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31 stringFileHeader
= "PK\003\004" # magic number for file header
33 # indexes of entries in the central directory structure
35 _CD_CREATE_VERSION
= 1
37 _CD_EXTRACT_VERSION
= 3
38 _CD_EXTRACT_SYSTEM
= 4 # is this meaningful?
44 _CD_COMPRESSED_SIZE
= 10
45 _CD_UNCOMPRESSED_SIZE
= 11
46 _CD_FILENAME_LENGTH
= 12
47 _CD_EXTRA_FIELD_LENGTH
= 13
48 _CD_COMMENT_LENGTH
= 14
49 _CD_DISK_NUMBER_START
= 15
50 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
51 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
52 _CD_LOCAL_HEADER_OFFSET
= 18
54 # indexes of entries in the local file header structure
56 _FH_EXTRACT_VERSION
= 1
57 _FH_EXTRACT_SYSTEM
= 2 # is this meaningful?
58 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
59 _FH_COMPRESSION_METHOD
= 4
63 _FH_COMPRESSED_SIZE
= 8
64 _FH_UNCOMPRESSED_SIZE
= 9
65 _FH_FILENAME_LENGTH
= 10
66 _FH_EXTRA_FIELD_LENGTH
= 11
68 def is_zipfile(filename
):
69 """Quickly see if file is a ZIP file by checking the magic number."""
71 fpin
= open(filename
, "rb")
72 endrec
= _EndRecData(fpin
)
75 return True # file has correct magic number
80 def _EndRecData(fpin
):
81 """Return data from the "End of Central Directory" record, or None.
83 The data is a list of the nine items in the ZIP "End of central dir"
84 record followed by a tenth item, the file seek offset of this record."""
85 fpin
.seek(-22, 2) # Assume no archive comment.
86 filesize
= fpin
.tell() + 22 # Get file size
88 if data
[0:4] == stringEndArchive
and data
[-2:] == "\000\000":
89 endrec
= struct
.unpack(structEndArchive
, data
)
91 endrec
.append("") # Append the archive comment
92 endrec
.append(filesize
- 22) # Append the record start offset
94 # Search the last END_BLOCK bytes of the file for the record signature.
95 # The comment is appended to the ZIP file and has a 16 bit length.
96 # So the comment may be up to 64K long. We limit the search for the
97 # signature to a few Kbytes at the end of the file for efficiency.
98 # also, the signature must not appear in the comment.
99 END_BLOCK
= min(filesize
, 1024 * 4)
100 fpin
.seek(filesize
- END_BLOCK
, 0)
102 start
= data
.rfind(stringEndArchive
)
103 if start
>= 0: # Correct signature string was found
104 endrec
= struct
.unpack(structEndArchive
, data
[start
:start
+22])
105 endrec
= list(endrec
)
106 comment
= data
[start
+22:]
107 if endrec
[7] == len(comment
): # Comment length checks out
108 # Append the archive comment and start offset
109 endrec
.append(comment
)
110 endrec
.append(filesize
- END_BLOCK
+ start
)
112 return # Error, return None
116 """Class with attributes describing each file in the ZIP archive."""
118 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
119 self
.filename
= _normpath(filename
) # Name of the file in the archive
120 self
.date_time
= date_time
# year, month, day, hour, min, sec
122 self
.compress_type
= ZIP_STORED
# Type of compression for the file
123 self
.comment
= "" # Comment for each file
124 self
.extra
= "" # ZIP extra data
125 self
.create_system
= 0 # System which created ZIP archive
126 self
.create_version
= 20 # Version which created ZIP archive
127 self
.extract_version
= 20 # Version needed to extract archive
128 self
.reserved
= 0 # Must be zero
129 self
.flag_bits
= 0 # ZIP flag bits
130 self
.volume
= 0 # Volume number of file header
131 self
.internal_attr
= 0 # Internal attributes
132 self
.external_attr
= 0 # External file attributes
133 # Other attributes are set by class ZipFile:
134 # header_offset Byte offset to the file header
135 # file_offset Byte offset to the start of the file data
136 # CRC CRC-32 of the uncompressed file
137 # compress_size Size of the compressed file
138 # file_size Size of the uncompressed file
140 def FileHeader(self
):
141 """Return the per-file header as a string."""
143 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
144 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
145 if self
.flag_bits
& 0x08:
146 # Set these to zero because we write them after the file data
147 CRC
= compress_size
= file_size
= 0
150 compress_size
= self
.compress_size
151 file_size
= self
.file_size
152 header
= struct
.pack(structFileHeader
, stringFileHeader
,
153 self
.extract_version
, self
.reserved
, self
.flag_bits
,
154 self
.compress_type
, dostime
, dosdate
, CRC
,
155 compress_size
, file_size
,
156 len(self
.filename
), len(self
.extra
))
157 return header
+ self
.filename
+ self
.extra
160 # This is used to ensure paths in generated ZIP files always use
161 # forward slashes as the directory separator, as required by the
162 # ZIP format specification.
165 return path
.replace(os
.sep
, "/")
172 """ Class with methods to open, read, write, close, list zip files.
174 z = ZipFile(file, mode="r", compression=ZIP_STORED)
176 file: Either the path to the file, or a file-like object.
177 If it is a path, the file will be opened and closed by ZipFile.
178 mode: The mode can be either read "r", write "w" or append "a".
179 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
182 fp
= None # Set here since __del__ checks it
184 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
):
185 """Open the ZIP file with mode read "r", write "w" or append "a"."""
186 if compression
== ZIP_STORED
:
188 elif compression
== ZIP_DEFLATED
:
191 "Compression requires the (missing) zlib module"
193 raise RuntimeError, "That compression method is not supported"
194 self
.debug
= 0 # Level of printing: 0 through 3
195 self
.NameToInfo
= {} # Find file info given name
196 self
.filelist
= [] # List of ZipInfo instances for archive
197 self
.compression
= compression
# Method of compression
198 self
.mode
= key
= mode
[0]
200 # Check if we were passed a file-like object
201 if isinstance(file, basestring
):
204 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
205 self
.fp
= open(file, modeDict
[mode
])
209 self
.filename
= getattr(file, 'name', None)
216 try: # See if file is a zip file
217 self
._RealGetContents
()
218 # seek to start of directory and overwrite
219 self
.fp
.seek(self
.start_dir
, 0)
220 except BadZipfile
: # file is not a zip file, just append
223 if not self
._filePassed
:
226 raise RuntimeError, 'Mode must be "r", "w" or "a"'
228 def _GetContents(self
):
229 """Read the directory, making sure we close the file if the format
232 self
._RealGetContents
()
234 if not self
._filePassed
:
239 def _RealGetContents(self
):
240 """Read in the table of contents for the ZIP file."""
242 endrec
= _EndRecData(fp
)
244 raise BadZipfile
, "File is not a zip file"
247 size_cd
= endrec
[5] # bytes in central directory
248 offset_cd
= endrec
[6] # offset of central directory
249 self
.comment
= endrec
[8] # archive comment
250 # endrec[9] is the offset of the "End of Central Dir" record
251 x
= endrec
[9] - size_cd
252 # "concat" is zero, unless zip was concatenated to another file
253 concat
= x
- offset_cd
255 print "given, inferred, offset", offset_cd
, x
, concat
256 # self.start_dir: Position of start of central directory
257 self
.start_dir
= offset_cd
+ concat
258 fp
.seek(self
.start_dir
, 0)
260 while total
< size_cd
:
261 centdir
= fp
.read(46)
263 if centdir
[0:4] != stringCentralDir
:
264 raise BadZipfile
, "Bad magic number for central directory"
265 centdir
= struct
.unpack(structCentralDir
, centdir
)
268 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
269 # Create ZipInfo instance to store file information
270 x
= ZipInfo(filename
)
271 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
272 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
273 total
= (total
+ centdir
[_CD_FILENAME_LENGTH
]
274 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
275 + centdir
[_CD_COMMENT_LENGTH
])
276 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
] + concat
277 # file_offset must be computed below...
278 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
279 x
.flag_bits
, x
.compress_type
, t
, d
,
280 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
281 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
282 # Convert date/time code to (year, month, day, hour, min, sec)
283 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
284 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
285 self
.filelist
.append(x
)
286 self
.NameToInfo
[x
.filename
] = x
289 for data
in self
.filelist
:
290 fp
.seek(data
.header_offset
, 0)
291 fheader
= fp
.read(30)
292 if fheader
[0:4] != stringFileHeader
:
293 raise BadZipfile
, "Bad magic number for file header"
294 fheader
= struct
.unpack(structFileHeader
, fheader
)
295 # file_offset is computed here, since the extra field for
296 # the central directory and for the local file header
297 # refer to different fields, and they can have different
299 data
.file_offset
= (data
.header_offset
+ 30
300 + fheader
[_FH_FILENAME_LENGTH
]
301 + fheader
[_FH_EXTRA_FIELD_LENGTH
])
302 fname
= fp
.read(fheader
[_FH_FILENAME_LENGTH
])
303 if fname
!= data
.filename
:
304 raise RuntimeError, \
305 'File name in directory "%s" and header "%s" differ.' % (
306 data
.filename
, fname
)
309 """Return a list of file names in the archive."""
311 for data
in self
.filelist
:
312 l
.append(data
.filename
)
316 """Return a list of class ZipInfo instances for files in the
321 """Print a table of contents for the zip file."""
322 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
323 for zinfo
in self
.filelist
:
324 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
325 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
328 """Read all the files and check the CRC."""
329 for zinfo
in self
.filelist
:
331 self
.read(zinfo
.filename
) # Check CRC-32
333 return zinfo
.filename
335 def getinfo(self
, name
):
336 """Return the instance of ZipInfo given 'name'."""
337 return self
.NameToInfo
[name
]
339 def read(self
, name
):
340 """Return file bytes (as a string) for name."""
341 if self
.mode
not in ("r", "a"):
342 raise RuntimeError, 'read() requires mode "r" or "a"'
344 raise RuntimeError, \
345 "Attempt to read ZIP archive that was already closed"
346 zinfo
= self
.getinfo(name
)
347 filepos
= self
.fp
.tell()
348 self
.fp
.seek(zinfo
.file_offset
, 0)
349 bytes
= self
.fp
.read(zinfo
.compress_size
)
350 self
.fp
.seek(filepos
, 0)
351 if zinfo
.compress_type
== ZIP_STORED
:
353 elif zinfo
.compress_type
== ZIP_DEFLATED
:
355 raise RuntimeError, \
356 "De-compression requires the (missing) zlib module"
357 # zlib compress/decompress code by Jeremy Hylton of CNRI
358 dc
= zlib
.decompressobj(-15)
359 bytes
= dc
.decompress(bytes
)
360 # need to feed in unused pad byte so that zlib won't choke
361 ex
= dc
.decompress('Z') + dc
.flush()
366 "Unsupported compression method %d for file %s" % \
367 (zinfo
.compress_type
, name
)
368 crc
= binascii
.crc32(bytes
)
370 raise BadZipfile
, "Bad CRC-32 for file %s" % name
373 def _writecheck(self
, zinfo
):
374 """Check for errors before writing a file to the archive."""
375 if zinfo
.filename
in self
.NameToInfo
:
376 if self
.debug
: # Warning for duplicate names
377 print "Duplicate name:", zinfo
.filename
378 if self
.mode
not in ("w", "a"):
379 raise RuntimeError, 'write() requires mode "w" or "a"'
381 raise RuntimeError, \
382 "Attempt to write ZIP archive that was already closed"
383 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
384 raise RuntimeError, \
385 "Compression requires the (missing) zlib module"
386 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
387 raise RuntimeError, \
388 "That compression method is not supported"
390 def write(self
, filename
, arcname
=None, compress_type
=None):
391 """Put the bytes from filename into the archive under the name
393 st
= os
.stat(filename
)
394 mtime
= time
.localtime(st
.st_mtime
)
395 date_time
= mtime
[0:6]
396 # Create ZipInfo instance to store file information
398 zinfo
= ZipInfo(filename
, date_time
)
400 zinfo
= ZipInfo(arcname
, date_time
)
401 zinfo
.external_attr
= st
[0] << 16L # Unix attributes
402 if compress_type
is None:
403 zinfo
.compress_type
= self
.compression
405 zinfo
.compress_type
= compress_type
406 self
._writecheck
(zinfo
)
407 fp
= open(filename
, "rb")
408 zinfo
.flag_bits
= 0x00
409 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
410 # Must overwrite CRC and sizes with correct data later
412 zinfo
.compress_size
= compress_size
= 0
413 zinfo
.file_size
= file_size
= 0
414 self
.fp
.write(zinfo
.FileHeader())
415 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
416 if zinfo
.compress_type
== ZIP_DEFLATED
:
417 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
422 buf
= fp
.read(1024 * 8)
425 file_size
= file_size
+ len(buf
)
426 CRC
= binascii
.crc32(buf
, CRC
)
428 buf
= cmpr
.compress(buf
)
429 compress_size
= compress_size
+ len(buf
)
434 compress_size
= compress_size
+ len(buf
)
436 zinfo
.compress_size
= compress_size
438 zinfo
.compress_size
= file_size
440 zinfo
.file_size
= file_size
441 # Seek backwards and write CRC and file sizes
442 position
= self
.fp
.tell() # Preserve current position in file
443 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
444 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
446 self
.fp
.seek(position
, 0)
447 self
.filelist
.append(zinfo
)
448 self
.NameToInfo
[zinfo
.filename
] = zinfo
450 def writestr(self
, zinfo_or_arcname
, bytes
):
451 """Write a file into the archive. The contents is the string
452 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
453 the name of the file in the archive."""
454 if not isinstance(zinfo_or_arcname
, ZipInfo
):
455 zinfo
= ZipInfo(filename
=zinfo_or_arcname
,
456 date_time
=time
.localtime(time
.time()))
457 zinfo
.compress_type
= self
.compression
459 zinfo
= zinfo_or_arcname
460 self
._writecheck
(zinfo
)
461 zinfo
.file_size
= len(bytes
) # Uncompressed size
462 zinfo
.CRC
= binascii
.crc32(bytes
) # CRC-32 checksum
463 if zinfo
.compress_type
== ZIP_DEFLATED
:
464 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
466 bytes
= co
.compress(bytes
) + co
.flush()
467 zinfo
.compress_size
= len(bytes
) # Compressed size
469 zinfo
.compress_size
= zinfo
.file_size
470 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
471 self
.fp
.write(zinfo
.FileHeader())
472 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
474 if zinfo
.flag_bits
& 0x08:
475 # Write CRC and file sizes after the file data
476 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
478 self
.filelist
.append(zinfo
)
479 self
.NameToInfo
[zinfo
.filename
] = zinfo
482 """Call the "close()" method in case the user forgot."""
486 """Close the file, and for mode "w" and "a" write the ending
490 if self
.mode
in ("w", "a"): # write ending records
492 pos1
= self
.fp
.tell()
493 for zinfo
in self
.filelist
: # write central directory
496 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
497 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
498 centdir
= struct
.pack(structCentralDir
,
499 stringCentralDir
, zinfo
.create_version
,
500 zinfo
.create_system
, zinfo
.extract_version
, zinfo
.reserved
,
501 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
502 zinfo
.CRC
, zinfo
.compress_size
, zinfo
.file_size
,
503 len(zinfo
.filename
), len(zinfo
.extra
), len(zinfo
.comment
),
504 0, zinfo
.internal_attr
, zinfo
.external_attr
,
506 self
.fp
.write(centdir
)
507 self
.fp
.write(zinfo
.filename
)
508 self
.fp
.write(zinfo
.extra
)
509 self
.fp
.write(zinfo
.comment
)
510 pos2
= self
.fp
.tell()
511 # Write end-of-zip-archive record
512 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
513 0, 0, count
, count
, pos2
- pos1
, pos1
, 0)
514 self
.fp
.write(endrec
)
516 if not self
._filePassed
:
521 class PyZipFile(ZipFile
):
522 """Class to create ZIP archives with Python library files and packages."""
524 def writepy(self
, pathname
, basename
= ""):
525 """Add all files from "pathname" to the ZIP archive.
527 If pathname is a package directory, search the directory and
528 all package subdirectories recursively for all *.py and enter
529 the modules into the archive. If pathname is a plain
530 directory, listdir *.py and enter all modules. Else, pathname
531 must be a Python *.py file and the module will be put into the
532 archive. Added modules are always module.pyo or module.pyc.
533 This method will compile the module.py into module.pyc if
536 dir, name
= os
.path
.split(pathname
)
537 if os
.path
.isdir(pathname
):
538 initname
= os
.path
.join(pathname
, "__init__.py")
539 if os
.path
.isfile(initname
):
540 # This is a package directory, add it
542 basename
= "%s/%s" % (basename
, name
)
546 print "Adding package in", pathname
, "as", basename
547 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
549 print "Adding", arcname
550 self
.write(fname
, arcname
)
551 dirlist
= os
.listdir(pathname
)
552 dirlist
.remove("__init__.py")
553 # Add all *.py files and package subdirectories
554 for filename
in dirlist
:
555 path
= os
.path
.join(pathname
, filename
)
556 root
, ext
= os
.path
.splitext(filename
)
557 if os
.path
.isdir(path
):
558 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
559 # This is a package directory, add it
560 self
.writepy(path
, basename
) # Recursive call
562 fname
, arcname
= self
._get
_codename
(path
[0:-3],
565 print "Adding", arcname
566 self
.write(fname
, arcname
)
568 # This is NOT a package directory, add its files at top level
570 print "Adding files from directory", pathname
571 for filename
in os
.listdir(pathname
):
572 path
= os
.path
.join(pathname
, filename
)
573 root
, ext
= os
.path
.splitext(filename
)
575 fname
, arcname
= self
._get
_codename
(path
[0:-3],
578 print "Adding", arcname
579 self
.write(fname
, arcname
)
581 if pathname
[-3:] != ".py":
582 raise RuntimeError, \
583 'Files added with writepy() must end with ".py"'
584 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
586 print "Adding file", arcname
587 self
.write(fname
, arcname
)
589 def _get_codename(self
, pathname
, basename
):
590 """Return (filename, archivename) for the path.
592 Given a module name path, return the correct file path and
593 archive name, compiling if necessary. For example, given
594 /python/lib/string, return (/python/lib/string.pyc, string).
596 file_py
= pathname
+ ".py"
597 file_pyc
= pathname
+ ".pyc"
598 file_pyo
= pathname
+ ".pyo"
599 if os
.path
.isfile(file_pyo
) and \
600 os
.stat(file_pyo
).st_mtime
>= os
.stat(file_py
).st_mtime
:
601 fname
= file_pyo
# Use .pyo file
602 elif not os
.path
.isfile(file_pyc
) or \
603 os
.stat(file_pyc
).st_mtime
< os
.stat(file_py
).st_mtime
:
606 print "Compiling", file_py
608 py_compile
.compile(file_py
, file_pyc
, None, True)
609 except py_compile
.PyCompileError
,err
:
614 archivename
= os
.path
.split(fname
)[1]
616 archivename
= "%s/%s" % (basename
, archivename
)
617 return (fname
, archivename
)