1 "Read and write ZIP files."
3 import struct
, os
, time
7 import zlib
# We may need its compression method
11 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
12 "ZipInfo", "ZipFile", "PyZipFile"]
14 class BadZipfile(Exception):
16 error
= BadZipfile
# The exception raised by this module
18 # constants for Zip file compression methods
21 # Other ZIP compression methods not supported
23 # Here are some struct module formats for reading headers
24 structEndArchive
= "<4s4H2lH" # 9 items, end of archive, 22 bytes
25 stringEndArchive
= "PK\005\006" # magic number for end of archive record
26 structCentralDir
= "<4s4B4H3l5HLl"# 19 items, central directory, 46 bytes
27 stringCentralDir
= "PK\001\002" # magic number for central directory
28 structFileHeader
= "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
29 stringFileHeader
= "PK\003\004" # magic number for file header
31 # indexes of entries in the central directory structure
33 _CD_CREATE_VERSION
= 1
35 _CD_EXTRACT_VERSION
= 3
36 _CD_EXTRACT_SYSTEM
= 4 # is this meaningful?
42 _CD_COMPRESSED_SIZE
= 10
43 _CD_UNCOMPRESSED_SIZE
= 11
44 _CD_FILENAME_LENGTH
= 12
45 _CD_EXTRA_FIELD_LENGTH
= 13
46 _CD_COMMENT_LENGTH
= 14
47 _CD_DISK_NUMBER_START
= 15
48 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
49 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
50 _CD_LOCAL_HEADER_OFFSET
= 18
52 # indexes of entries in the local file header structure
54 _FH_EXTRACT_VERSION
= 1
55 _FH_EXTRACT_SYSTEM
= 2 # is this meaningful?
56 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
57 _FH_COMPRESSION_METHOD
= 4
61 _FH_COMPRESSED_SIZE
= 8
62 _FH_UNCOMPRESSED_SIZE
= 9
63 _FH_FILENAME_LENGTH
= 10
64 _FH_EXTRA_FIELD_LENGTH
= 11
66 def is_zipfile(filename
):
67 """Quickly see if file is a ZIP file by checking the magic number."""
69 fpin
= open(filename
, "rb")
70 endrec
= _EndRecData(fpin
)
73 return True # file has correct magic number
78 def _EndRecData(fpin
):
79 """Return data from the "End of Central Directory" record, or None.
81 The data is a list of the nine items in the ZIP "End of central dir"
82 record followed by a tenth item, the file seek offset of this record."""
83 fpin
.seek(-22, 2) # Assume no archive comment.
84 filesize
= fpin
.tell() + 22 # Get file size
86 if data
[0:4] == stringEndArchive
and data
[-2:] == "\000\000":
87 endrec
= struct
.unpack(structEndArchive
, data
)
89 endrec
.append("") # Append the archive comment
90 endrec
.append(filesize
- 22) # Append the record start offset
92 # Search the last END_BLOCK bytes of the file for the record signature.
93 # The comment is appended to the ZIP file and has a 16 bit length.
94 # So the comment may be up to 64K long. We limit the search for the
95 # signature to a few Kbytes at the end of the file for efficiency.
96 # also, the signature must not appear in the comment.
97 END_BLOCK
= min(filesize
, 1024 * 4)
98 fpin
.seek(filesize
- END_BLOCK
, 0)
100 start
= data
.rfind(stringEndArchive
)
101 if start
>= 0: # Correct signature string was found
102 endrec
= struct
.unpack(structEndArchive
, data
[start
:start
+22])
103 endrec
= list(endrec
)
104 comment
= data
[start
+22:]
105 if endrec
[7] == len(comment
): # Comment length checks out
106 # Append the archive comment and start offset
107 endrec
.append(comment
)
108 endrec
.append(filesize
- END_BLOCK
+ start
)
110 return # Error, return None
114 """Class with attributes describing each file in the ZIP archive."""
116 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
117 self
.orig_filename
= filename
# Original file name in archive
118 # Terminate the file name at the first null byte. Null bytes in file
119 # names are used as tricks by viruses in archives.
120 null_byte
= filename
.find(chr(0))
122 filename
= filename
[0:null_byte
]
123 # This is used to ensure paths in generated ZIP files always use
124 # forward slashes as the directory separator, as required by the
125 # ZIP format specification.
127 filename
= filename
.replace(os
.sep
, "/")
128 self
.filename
= filename
# Normalized file name
129 self
.date_time
= date_time
# year, month, day, hour, min, sec
131 self
.compress_type
= ZIP_STORED
# Type of compression for the file
132 self
.comment
= "" # Comment for each file
133 self
.extra
= "" # ZIP extra data
134 self
.create_system
= 0 # System which created ZIP archive
135 self
.create_version
= 20 # Version which created ZIP archive
136 self
.extract_version
= 20 # Version needed to extract archive
137 self
.reserved
= 0 # Must be zero
138 self
.flag_bits
= 0 # ZIP flag bits
139 self
.volume
= 0 # Volume number of file header
140 self
.internal_attr
= 0 # Internal attributes
141 self
.external_attr
= 0 # External file attributes
142 # Other attributes are set by class ZipFile:
143 # header_offset Byte offset to the file header
144 # file_offset Byte offset to the start of the file data
145 # CRC CRC-32 of the uncompressed file
146 # compress_size Size of the compressed file
147 # file_size Size of the uncompressed file
149 def FileHeader(self
):
150 """Return the per-file header as a string."""
152 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
153 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
154 if self
.flag_bits
& 0x08:
155 # Set these to zero because we write them after the file data
156 CRC
= compress_size
= file_size
= 0
159 compress_size
= self
.compress_size
160 file_size
= self
.file_size
161 header
= struct
.pack(structFileHeader
, stringFileHeader
,
162 self
.extract_version
, self
.reserved
, self
.flag_bits
,
163 self
.compress_type
, dostime
, dosdate
, CRC
,
164 compress_size
, file_size
,
165 len(self
.filename
), len(self
.extra
))
166 return header
+ self
.filename
+ self
.extra
170 """ Class with methods to open, read, write, close, list zip files.
172 z = ZipFile(file, mode="r", compression=ZIP_STORED)
174 file: Either the path to the file, or a file-like object.
175 If it is a path, the file will be opened and closed by ZipFile.
176 mode: The mode can be either read "r", write "w" or append "a".
177 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
180 fp
= None # Set here since __del__ checks it
182 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
):
183 """Open the ZIP file with mode read "r", write "w" or append "a"."""
184 if compression
== ZIP_STORED
:
186 elif compression
== ZIP_DEFLATED
:
189 "Compression requires the (missing) zlib module"
191 raise RuntimeError, "That compression method is not supported"
192 self
.debug
= 0 # Level of printing: 0 through 3
193 self
.NameToInfo
= {} # Find file info given name
194 self
.filelist
= [] # List of ZipInfo instances for archive
195 self
.compression
= compression
# Method of compression
196 self
.mode
= key
= mode
[0]
198 # Check if we were passed a file-like object
199 if isinstance(file, basestring
):
202 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
203 self
.fp
= open(file, modeDict
[mode
])
207 self
.filename
= getattr(file, 'name', None)
214 try: # See if file is a zip file
215 self
._RealGetContents
()
216 # seek to start of directory and overwrite
217 self
.fp
.seek(self
.start_dir
, 0)
218 except BadZipfile
: # file is not a zip file, just append
221 if not self
._filePassed
:
224 raise RuntimeError, 'Mode must be "r", "w" or "a"'
226 def _GetContents(self
):
227 """Read the directory, making sure we close the file if the format
230 self
._RealGetContents
()
232 if not self
._filePassed
:
237 def _RealGetContents(self
):
238 """Read in the table of contents for the ZIP file."""
240 endrec
= _EndRecData(fp
)
242 raise BadZipfile
, "File is not a zip file"
245 size_cd
= endrec
[5] # bytes in central directory
246 offset_cd
= endrec
[6] # offset of central directory
247 self
.comment
= endrec
[8] # archive comment
248 # endrec[9] is the offset of the "End of Central Dir" record
249 x
= endrec
[9] - size_cd
250 # "concat" is zero, unless zip was concatenated to another file
251 concat
= x
- offset_cd
253 print "given, inferred, offset", offset_cd
, x
, concat
254 # self.start_dir: Position of start of central directory
255 self
.start_dir
= offset_cd
+ concat
256 fp
.seek(self
.start_dir
, 0)
258 while total
< size_cd
:
259 centdir
= fp
.read(46)
261 if centdir
[0:4] != stringCentralDir
:
262 raise BadZipfile
, "Bad magic number for central directory"
263 centdir
= struct
.unpack(structCentralDir
, centdir
)
266 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
267 # Create ZipInfo instance to store file information
268 x
= ZipInfo(filename
)
269 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
270 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
271 total
= (total
+ centdir
[_CD_FILENAME_LENGTH
]
272 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
273 + centdir
[_CD_COMMENT_LENGTH
])
274 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
] + concat
275 # file_offset must be computed below...
276 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
277 x
.flag_bits
, x
.compress_type
, t
, d
,
278 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
279 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
280 # Convert date/time code to (year, month, day, hour, min, sec)
281 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
282 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
283 self
.filelist
.append(x
)
284 self
.NameToInfo
[x
.filename
] = x
287 for data
in self
.filelist
:
288 fp
.seek(data
.header_offset
, 0)
289 fheader
= fp
.read(30)
290 if fheader
[0:4] != stringFileHeader
:
291 raise BadZipfile
, "Bad magic number for file header"
292 fheader
= struct
.unpack(structFileHeader
, fheader
)
293 # file_offset is computed here, since the extra field for
294 # the central directory and for the local file header
295 # refer to different fields, and they can have different
297 data
.file_offset
= (data
.header_offset
+ 30
298 + fheader
[_FH_FILENAME_LENGTH
]
299 + fheader
[_FH_EXTRA_FIELD_LENGTH
])
300 fname
= fp
.read(fheader
[_FH_FILENAME_LENGTH
])
301 if fname
!= data
.orig_filename
:
302 raise RuntimeError, \
303 'File name in directory "%s" and header "%s" differ.' % (
304 data
.orig_filename
, fname
)
307 """Return a list of file names in the archive."""
309 for data
in self
.filelist
:
310 l
.append(data
.filename
)
314 """Return a list of class ZipInfo instances for files in the
319 """Print a table of contents for the zip file."""
320 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
321 for zinfo
in self
.filelist
:
322 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
323 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
326 """Read all the files and check the CRC."""
327 for zinfo
in self
.filelist
:
329 self
.read(zinfo
.filename
) # Check CRC-32
331 return zinfo
.filename
333 def getinfo(self
, name
):
334 """Return the instance of ZipInfo given 'name'."""
335 return self
.NameToInfo
[name
]
337 def read(self
, name
):
338 """Return file bytes (as a string) for name."""
339 if self
.mode
not in ("r", "a"):
340 raise RuntimeError, 'read() requires mode "r" or "a"'
342 raise RuntimeError, \
343 "Attempt to read ZIP archive that was already closed"
344 zinfo
= self
.getinfo(name
)
345 filepos
= self
.fp
.tell()
346 self
.fp
.seek(zinfo
.file_offset
, 0)
347 bytes
= self
.fp
.read(zinfo
.compress_size
)
348 self
.fp
.seek(filepos
, 0)
349 if zinfo
.compress_type
== ZIP_STORED
:
351 elif zinfo
.compress_type
== ZIP_DEFLATED
:
353 raise RuntimeError, \
354 "De-compression requires the (missing) zlib module"
355 # zlib compress/decompress code by Jeremy Hylton of CNRI
356 dc
= zlib
.decompressobj(-15)
357 bytes
= dc
.decompress(bytes
)
358 # need to feed in unused pad byte so that zlib won't choke
359 ex
= dc
.decompress('Z') + dc
.flush()
364 "Unsupported compression method %d for file %s" % \
365 (zinfo
.compress_type
, name
)
366 crc
= binascii
.crc32(bytes
)
368 raise BadZipfile
, "Bad CRC-32 for file %s" % name
371 def _writecheck(self
, zinfo
):
372 """Check for errors before writing a file to the archive."""
373 if zinfo
.filename
in self
.NameToInfo
:
374 if self
.debug
: # Warning for duplicate names
375 print "Duplicate name:", zinfo
.filename
376 if self
.mode
not in ("w", "a"):
377 raise RuntimeError, 'write() requires mode "w" or "a"'
379 raise RuntimeError, \
380 "Attempt to write ZIP archive that was already closed"
381 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
382 raise RuntimeError, \
383 "Compression requires the (missing) zlib module"
384 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
385 raise RuntimeError, \
386 "That compression method is not supported"
388 def write(self
, filename
, arcname
=None, compress_type
=None):
389 """Put the bytes from filename into the archive under the name
391 st
= os
.stat(filename
)
392 mtime
= time
.localtime(st
.st_mtime
)
393 date_time
= mtime
[0:6]
394 # Create ZipInfo instance to store file information
396 zinfo
= ZipInfo(filename
, date_time
)
398 zinfo
= ZipInfo(arcname
, date_time
)
399 zinfo
.external_attr
= st
[0] << 16L # Unix attributes
400 if compress_type
is None:
401 zinfo
.compress_type
= self
.compression
403 zinfo
.compress_type
= compress_type
404 self
._writecheck
(zinfo
)
405 fp
= open(filename
, "rb")
406 zinfo
.flag_bits
= 0x00
407 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
408 # Must overwrite CRC and sizes with correct data later
410 zinfo
.compress_size
= compress_size
= 0
411 zinfo
.file_size
= file_size
= 0
412 self
.fp
.write(zinfo
.FileHeader())
413 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
414 if zinfo
.compress_type
== ZIP_DEFLATED
:
415 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
420 buf
= fp
.read(1024 * 8)
423 file_size
= file_size
+ len(buf
)
424 CRC
= binascii
.crc32(buf
, CRC
)
426 buf
= cmpr
.compress(buf
)
427 compress_size
= compress_size
+ len(buf
)
432 compress_size
= compress_size
+ len(buf
)
434 zinfo
.compress_size
= compress_size
436 zinfo
.compress_size
= file_size
438 zinfo
.file_size
= file_size
439 # Seek backwards and write CRC and file sizes
440 position
= self
.fp
.tell() # Preserve current position in file
441 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
442 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
444 self
.fp
.seek(position
, 0)
445 self
.filelist
.append(zinfo
)
446 self
.NameToInfo
[zinfo
.filename
] = zinfo
448 def writestr(self
, zinfo_or_arcname
, bytes
):
449 """Write a file into the archive. The contents is the string
450 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
451 the name of the file in the archive."""
452 if not isinstance(zinfo_or_arcname
, ZipInfo
):
453 zinfo
= ZipInfo(filename
=zinfo_or_arcname
,
454 date_time
=time
.localtime(time
.time()))
455 zinfo
.compress_type
= self
.compression
457 zinfo
= zinfo_or_arcname
458 self
._writecheck
(zinfo
)
459 zinfo
.file_size
= len(bytes
) # Uncompressed size
460 zinfo
.CRC
= binascii
.crc32(bytes
) # CRC-32 checksum
461 if zinfo
.compress_type
== ZIP_DEFLATED
:
462 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
464 bytes
= co
.compress(bytes
) + co
.flush()
465 zinfo
.compress_size
= len(bytes
) # Compressed size
467 zinfo
.compress_size
= zinfo
.file_size
468 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
469 self
.fp
.write(zinfo
.FileHeader())
470 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
472 if zinfo
.flag_bits
& 0x08:
473 # Write CRC and file sizes after the file data
474 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
476 self
.filelist
.append(zinfo
)
477 self
.NameToInfo
[zinfo
.filename
] = zinfo
480 """Call the "close()" method in case the user forgot."""
484 """Close the file, and for mode "w" and "a" write the ending
488 if self
.mode
in ("w", "a"): # write ending records
490 pos1
= self
.fp
.tell()
491 for zinfo
in self
.filelist
: # write central directory
494 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
495 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
496 centdir
= struct
.pack(structCentralDir
,
497 stringCentralDir
, zinfo
.create_version
,
498 zinfo
.create_system
, zinfo
.extract_version
, zinfo
.reserved
,
499 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
500 zinfo
.CRC
, zinfo
.compress_size
, zinfo
.file_size
,
501 len(zinfo
.filename
), len(zinfo
.extra
), len(zinfo
.comment
),
502 0, zinfo
.internal_attr
, zinfo
.external_attr
,
504 self
.fp
.write(centdir
)
505 self
.fp
.write(zinfo
.filename
)
506 self
.fp
.write(zinfo
.extra
)
507 self
.fp
.write(zinfo
.comment
)
508 pos2
= self
.fp
.tell()
509 # Write end-of-zip-archive record
510 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
511 0, 0, count
, count
, pos2
- pos1
, pos1
, 0)
512 self
.fp
.write(endrec
)
514 if not self
._filePassed
:
519 class PyZipFile(ZipFile
):
520 """Class to create ZIP archives with Python library files and packages."""
522 def writepy(self
, pathname
, basename
= ""):
523 """Add all files from "pathname" to the ZIP archive.
525 If pathname is a package directory, search the directory and
526 all package subdirectories recursively for all *.py and enter
527 the modules into the archive. If pathname is a plain
528 directory, listdir *.py and enter all modules. Else, pathname
529 must be a Python *.py file and the module will be put into the
530 archive. Added modules are always module.pyo or module.pyc.
531 This method will compile the module.py into module.pyc if
534 dir, name
= os
.path
.split(pathname
)
535 if os
.path
.isdir(pathname
):
536 initname
= os
.path
.join(pathname
, "__init__.py")
537 if os
.path
.isfile(initname
):
538 # This is a package directory, add it
540 basename
= "%s/%s" % (basename
, name
)
544 print "Adding package in", pathname
, "as", basename
545 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
547 print "Adding", arcname
548 self
.write(fname
, arcname
)
549 dirlist
= os
.listdir(pathname
)
550 dirlist
.remove("__init__.py")
551 # Add all *.py files and package subdirectories
552 for filename
in dirlist
:
553 path
= os
.path
.join(pathname
, filename
)
554 root
, ext
= os
.path
.splitext(filename
)
555 if os
.path
.isdir(path
):
556 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
557 # This is a package directory, add it
558 self
.writepy(path
, basename
) # Recursive call
560 fname
, arcname
= self
._get
_codename
(path
[0:-3],
563 print "Adding", arcname
564 self
.write(fname
, arcname
)
566 # This is NOT a package directory, add its files at top level
568 print "Adding files from directory", pathname
569 for filename
in os
.listdir(pathname
):
570 path
= os
.path
.join(pathname
, filename
)
571 root
, ext
= os
.path
.splitext(filename
)
573 fname
, arcname
= self
._get
_codename
(path
[0:-3],
576 print "Adding", arcname
577 self
.write(fname
, arcname
)
579 if pathname
[-3:] != ".py":
580 raise RuntimeError, \
581 'Files added with writepy() must end with ".py"'
582 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
584 print "Adding file", arcname
585 self
.write(fname
, arcname
)
587 def _get_codename(self
, pathname
, basename
):
588 """Return (filename, archivename) for the path.
590 Given a module name path, return the correct file path and
591 archive name, compiling if necessary. For example, given
592 /python/lib/string, return (/python/lib/string.pyc, string).
594 file_py
= pathname
+ ".py"
595 file_pyc
= pathname
+ ".pyc"
596 file_pyo
= pathname
+ ".pyo"
597 if os
.path
.isfile(file_pyo
) and \
598 os
.stat(file_pyo
).st_mtime
>= os
.stat(file_py
).st_mtime
:
599 fname
= file_pyo
# Use .pyo file
600 elif not os
.path
.isfile(file_pyc
) or \
601 os
.stat(file_pyc
).st_mtime
< os
.stat(file_py
).st_mtime
:
604 print "Compiling", file_py
606 py_compile
.compile(file_py
, file_pyc
, None, True)
607 except py_compile
.PyCompileError
,err
:
612 archivename
= os
.path
.split(fname
)[1]
614 archivename
= "%s/%s" % (basename
, archivename
)
615 return (fname
, archivename
)