1 "Read and write ZIP files."
2 # Written by James C. Ahlstrom jim@interet.com
3 # All rights transferred to CNRI pursuant to the Python contribution agreement
5 import struct
, os
, time
9 import zlib
# We may need its compression method
13 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
16 class BadZipfile(Exception):
18 error
= BadZipfile
# The exception raised by this module
20 # constants for Zip file compression methods
23 # Other ZIP compression methods not supported
25 # Here are some struct module formats for reading headers
26 structEndArchive
= "<4s4H2lH" # 9 items, end of archive, 22 bytes
27 stringEndArchive
= "PK\005\006" # magic number for end of archive record
28 structCentralDir
= "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
29 stringCentralDir
= "PK\001\002" # magic number for central directory
30 structFileHeader
= "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31 stringFileHeader
= "PK\003\004" # magic number for file header
33 # indexes of entries in the central directory structure
35 _CD_CREATE_VERSION
= 1
37 _CD_EXTRACT_VERSION
= 3
38 _CD_EXTRACT_SYSTEM
= 4 # is this meaningful?
44 _CD_COMPRESSED_SIZE
= 10
45 _CD_UNCOMPRESSED_SIZE
= 11
46 _CD_FILENAME_LENGTH
= 12
47 _CD_EXTRA_FIELD_LENGTH
= 13
48 _CD_COMMENT_LENGTH
= 14
49 _CD_DISK_NUMBER_START
= 15
50 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
51 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
52 _CD_LOCAL_HEADER_OFFSET
= 18
54 # indexes of entries in the local file header structure
56 _FH_EXTRACT_VERSION
= 1
57 _FH_EXTRACT_SYSTEM
= 2 # is this meaningful?
58 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
59 _FH_COMPRESSION_METHOD
= 4
63 _FH_COMPRESSED_SIZE
= 8
64 _FH_UNCOMPRESSED_SIZE
= 9
65 _FH_FILENAME_LENGTH
= 10
66 _FH_EXTRA_FIELD_LENGTH
= 11
69 def is_zipfile(filename
):
70 """Quickly see if file is a ZIP file by checking the magic number.
72 Will not accept a ZIP archive with an ending comment.
75 fpin
= open(filename
, "rb")
76 fpin
.seek(-22, 2) # Seek to end-of-file record
79 if endrec
[0:4] == "PK\005\006" and endrec
[-2:] == "\000\000":
80 return 1 # file has correct magic number
86 """Class with attributes describing each file in the ZIP archive."""
88 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
89 self
.filename
= filename
# Name of the file in the archive
90 self
.date_time
= date_time
# year, month, day, hour, min, sec
92 self
.compress_type
= ZIP_STORED
# Type of compression for the file
93 self
.comment
= "" # Comment for each file
94 self
.extra
= "" # ZIP extra data
95 self
.create_system
= 0 # System which created ZIP archive
96 self
.create_version
= 20 # Version which created ZIP archive
97 self
.extract_version
= 20 # Version needed to extract archive
98 self
.reserved
= 0 # Must be zero
99 self
.flag_bits
= 0 # ZIP flag bits
100 self
.volume
= 0 # Volume number of file header
101 self
.internal_attr
= 0 # Internal attributes
102 self
.external_attr
= 0 # External file attributes
103 # Other attributes are set by class ZipFile:
104 # header_offset Byte offset to the file header
105 # file_offset Byte offset to the start of the file data
106 # CRC CRC-32 of the uncompressed file
107 # compress_size Size of the compressed file
108 # file_size Size of the uncompressed file
110 def FileHeader(self
):
111 """Return the per-file header as a string."""
113 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
114 dostime
= dt
[3] << 11 | dt
[4] << 5 | dt
[5] / 2
115 if self
.flag_bits
& 0x08:
116 # Set these to zero because we write them after the file data
117 CRC
= compress_size
= file_size
= 0
120 compress_size
= self
.compress_size
121 file_size
= self
.file_size
122 header
= struct
.pack(structFileHeader
, stringFileHeader
,
123 self
.extract_version
, self
.reserved
, self
.flag_bits
,
124 self
.compress_type
, dostime
, dosdate
, CRC
,
125 compress_size
, file_size
,
126 len(self
.filename
), len(self
.extra
))
127 return header
+ self
.filename
+ self
.extra
131 """Class with methods to open, read, write, close, list zip files."""
133 fp
= None # Set here since __del__ checks it
135 def __init__(self
, filename
, mode
="r", compression
=ZIP_STORED
):
136 """Open the ZIP file with mode read "r", write "w" or append "a"."""
137 if compression
== ZIP_STORED
:
139 elif compression
== ZIP_DEFLATED
:
142 "Compression requires the (missing) zlib module"
144 raise RuntimeError, "That compression method is not supported"
145 self
.debug
= 0 # Level of printing: 0 through 3
146 self
.NameToInfo
= {} # Find file info given name
147 self
.filelist
= [] # List of ZipInfo instances for archive
148 self
.compression
= compression
# Method of compression
149 self
.filename
= filename
150 self
.mode
= key
= mode
[0]
152 self
.fp
= open(filename
, "rb")
155 self
.fp
= open(filename
, "wb")
157 fp
= self
.fp
= open(filename
, "r+b")
158 fp
.seek(-22, 2) # Seek to end-of-file record
160 if endrec
[0:4] == stringEndArchive
and \
161 endrec
[-2:] == "\000\000":
162 self
._GetContents
() # file is a zip file
163 # seek to start of directory and overwrite
164 fp
.seek(self
.start_dir
, 0)
165 else: # file is not a zip file, just append
168 raise RuntimeError, 'Mode must be "r", "w" or "a"'
170 def _GetContents(self
):
171 """Read in the table of contents for the ZIP file."""
173 fp
.seek(-22, 2) # Start of end-of-archive record
174 filesize
= fp
.tell() + 22 # Get file size
175 endrec
= fp
.read(22) # Archive must not end with a comment!
176 if endrec
[0:4] != stringEndArchive
or endrec
[-2:] != "\000\000":
177 raise BadZipfile
, "File is not a zip file, or ends with a comment"
178 endrec
= struct
.unpack(structEndArchive
, endrec
)
181 size_cd
= endrec
[5] # bytes in central directory
182 offset_cd
= endrec
[6] # offset of central directory
183 x
= filesize
- 22 - size_cd
184 # "concat" is zero, unless zip was concatenated to another file
185 concat
= x
- offset_cd
187 print "given, inferred, offset", offset_cd
, x
, concat
188 # self.start_dir: Position of start of central directory
189 self
.start_dir
= offset_cd
+ concat
190 fp
.seek(self
.start_dir
, 0)
192 while total
< size_cd
:
193 centdir
= fp
.read(46)
195 if centdir
[0:4] != stringCentralDir
:
196 raise BadZipfile
, "Bad magic number for central directory"
197 centdir
= struct
.unpack(structCentralDir
, centdir
)
200 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
201 # Create ZipInfo instance to store file information
202 x
= ZipInfo(filename
)
203 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
204 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
205 total
= (total
+ centdir
[_CD_FILENAME_LENGTH
]
206 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
207 + centdir
[_CD_COMMENT_LENGTH
])
208 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
] + concat
209 # file_offset must be computed below...
210 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
211 x
.flag_bits
, x
.compress_type
, t
, d
,
212 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
213 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
214 # Convert date/time code to (year, month, day, hour, min, sec)
215 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
216 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
217 self
.filelist
.append(x
)
218 self
.NameToInfo
[x
.filename
] = x
221 for data
in self
.filelist
:
222 fp
.seek(data
.header_offset
, 0)
223 fheader
= fp
.read(30)
224 if fheader
[0:4] != stringFileHeader
:
225 raise BadZipfile
, "Bad magic number for file header"
226 fheader
= struct
.unpack(structFileHeader
, fheader
)
227 # file_offset is computed here, since the extra field for
228 # the central directory and for the local file header
229 # refer to different fields, and they can have different
231 data
.file_offset
= (data
.header_offset
+ 30
232 + fheader
[_FH_FILENAME_LENGTH
]
233 + fheader
[_FH_EXTRA_FIELD_LENGTH
])
234 fname
= fp
.read(fheader
[_FH_FILENAME_LENGTH
])
235 if fname
!= data
.filename
:
236 raise RuntimeError, \
237 'File name in directory "%s" and header "%s" differ.' % (
238 data
.filename
, fname
)
241 """Return a list of file names in the archive."""
243 for data
in self
.filelist
:
244 l
.append(data
.filename
)
248 """Return a list of class ZipInfo instances for files in the
253 """Print a table of contents for the zip file."""
254 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
255 for zinfo
in self
.filelist
:
256 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
257 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
260 """Read all the files and check the CRC."""
261 for zinfo
in self
.filelist
:
263 self
.read(zinfo
.filename
) # Check CRC-32
265 return zinfo
.filename
267 def getinfo(self
, name
):
268 """Return the instance of ZipInfo given 'name'."""
269 return self
.NameToInfo
[name
]
271 def read(self
, name
):
272 """Return file bytes (as a string) for name."""
273 if self
.mode
not in ("r", "a"):
274 raise RuntimeError, 'read() requires mode "r" or "a"'
276 raise RuntimeError, \
277 "Attempt to read ZIP archive that was already closed"
278 zinfo
= self
.getinfo(name
)
279 filepos
= self
.fp
.tell()
280 self
.fp
.seek(zinfo
.file_offset
, 0)
281 bytes
= self
.fp
.read(zinfo
.compress_size
)
282 self
.fp
.seek(filepos
, 0)
283 if zinfo
.compress_type
== ZIP_STORED
:
285 elif zinfo
.compress_type
== ZIP_DEFLATED
:
287 raise RuntimeError, \
288 "De-compression requires the (missing) zlib module"
289 # zlib compress/decompress code by Jeremy Hylton of CNRI
290 dc
= zlib
.decompressobj(-15)
291 bytes
= dc
.decompress(bytes
)
292 # need to feed in unused pad byte so that zlib won't choke
293 ex
= dc
.decompress('Z') + dc
.flush()
298 "Unsupported compression method %d for file %s" % \
299 (zinfo
.compress_type
, name
)
300 crc
= binascii
.crc32(bytes
)
302 raise BadZipfile
, "Bad CRC-32 for file %s" % name
305 def _writecheck(self
, zinfo
):
306 """Check for errors before writing a file to the archive."""
307 if self
.NameToInfo
.has_key(zinfo
.filename
):
308 if self
.debug
: # Warning for duplicate names
309 print "Duplicate name:", zinfo
.filename
310 if self
.mode
not in ("w", "a"):
311 raise RuntimeError, 'write() requires mode "w" or "a"'
313 raise RuntimeError, \
314 "Attempt to write ZIP archive that was already closed"
315 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
316 raise RuntimeError, \
317 "Compression requires the (missing) zlib module"
318 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
319 raise RuntimeError, \
320 "That compression method is not supported"
322 def write(self
, filename
, arcname
=None, compress_type
=None):
323 """Put the bytes from filename into the archive under the name
325 st
= os
.stat(filename
)
326 mtime
= time
.localtime(st
[8])
327 date_time
= mtime
[0:6]
328 # Create ZipInfo instance to store file information
330 zinfo
= ZipInfo(filename
, date_time
)
332 zinfo
= ZipInfo(arcname
, date_time
)
333 zinfo
.external_attr
= st
[0] << 16 # Unix attributes
334 if compress_type
is None:
335 zinfo
.compress_type
= self
.compression
337 zinfo
.compress_type
= compress_type
338 self
._writecheck
(zinfo
)
339 fp
= open(filename
, "rb")
340 zinfo
.flag_bits
= 0x08
341 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
342 self
.fp
.write(zinfo
.FileHeader())
343 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
347 if zinfo
.compress_type
== ZIP_DEFLATED
:
348 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
353 buf
= fp
.read(1024 * 8)
356 file_size
= file_size
+ len(buf
)
357 CRC
= binascii
.crc32(buf
, CRC
)
359 buf
= cmpr
.compress(buf
)
360 compress_size
= compress_size
+ len(buf
)
365 compress_size
= compress_size
+ len(buf
)
367 zinfo
.compress_size
= compress_size
369 zinfo
.compress_size
= file_size
371 zinfo
.file_size
= file_size
372 # Write CRC and file sizes after the file data
373 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
375 self
.filelist
.append(zinfo
)
376 self
.NameToInfo
[zinfo
.filename
] = zinfo
378 def writestr(self
, zinfo
, bytes
):
379 """Write a file into the archive. The contents is the string
381 self
._writecheck
(zinfo
)
382 zinfo
.file_size
= len(bytes
) # Uncompressed size
383 zinfo
.CRC
= binascii
.crc32(bytes
) # CRC-32 checksum
384 if zinfo
.compress_type
== ZIP_DEFLATED
:
385 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
387 bytes
= co
.compress(bytes
) + co
.flush()
388 zinfo
.compress_size
= len(bytes
) # Compressed size
390 zinfo
.compress_size
= zinfo
.file_size
391 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
392 self
.fp
.write(zinfo
.FileHeader())
393 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
395 if zinfo
.flag_bits
& 0x08:
396 # Write CRC and file sizes after the file data
397 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
399 self
.filelist
.append(zinfo
)
400 self
.NameToInfo
[zinfo
.filename
] = zinfo
403 """Call the "close()" method in case the user forgot."""
409 """Close the file, and for mode "w" and "a" write the ending
411 if self
.mode
in ("w", "a"): # write ending records
413 pos1
= self
.fp
.tell()
414 for zinfo
in self
.filelist
: # write central directory
417 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
418 dostime
= dt
[3] << 11 | dt
[4] << 5 | dt
[5] / 2
419 centdir
= struct
.pack(structCentralDir
,
420 stringCentralDir
, zinfo
.create_version
,
421 zinfo
.create_system
, zinfo
.extract_version
, zinfo
.reserved
,
422 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
423 zinfo
.CRC
, zinfo
.compress_size
, zinfo
.file_size
,
424 len(zinfo
.filename
), len(zinfo
.extra
), len(zinfo
.comment
),
425 0, zinfo
.internal_attr
, zinfo
.external_attr
,
427 self
.fp
.write(centdir
)
428 self
.fp
.write(zinfo
.filename
)
429 self
.fp
.write(zinfo
.extra
)
430 self
.fp
.write(zinfo
.comment
)
431 pos2
= self
.fp
.tell()
432 # Write end-of-zip-archive record
433 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
434 0, 0, count
, count
, pos2
- pos1
, pos1
, 0)
435 self
.fp
.write(endrec
)
440 class PyZipFile(ZipFile
):
441 """Class to create ZIP archives with Python library files and packages."""
443 def writepy(self
, pathname
, basename
= ""):
444 """Add all files from "pathname" to the ZIP archive.
446 If pathname is a package directory, search the directory and
447 all package subdirectories recursively for all *.py and enter
448 the modules into the archive. If pathname is a plain
449 directory, listdir *.py and enter all modules. Else, pathname
450 must be a Python *.py file and the module will be put into the
451 archive. Added modules are always module.pyo or module.pyc.
452 This method will compile the module.py into module.pyc if
455 dir, name
= os
.path
.split(pathname
)
456 if os
.path
.isdir(pathname
):
457 initname
= os
.path
.join(pathname
, "__init__.py")
458 if os
.path
.isfile(initname
):
459 # This is a package directory, add it
461 basename
= "%s/%s" % (basename
, name
)
465 print "Adding package in", pathname
, "as", basename
466 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
468 print "Adding", arcname
469 self
.write(fname
, arcname
)
470 dirlist
= os
.listdir(pathname
)
471 dirlist
.remove("__init__.py")
472 # Add all *.py files and package subdirectories
473 for filename
in dirlist
:
474 path
= os
.path
.join(pathname
, filename
)
475 root
, ext
= os
.path
.splitext(filename
)
476 if os
.path
.isdir(path
):
477 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
478 # This is a package directory, add it
479 self
.writepy(path
, basename
) # Recursive call
481 fname
, arcname
= self
._get
_codename
(path
[0:-3],
484 print "Adding", arcname
485 self
.write(fname
, arcname
)
487 # This is NOT a package directory, add its files at top level
489 print "Adding files from directory", pathname
490 for filename
in os
.listdir(pathname
):
491 path
= os
.path
.join(pathname
, filename
)
492 root
, ext
= os
.path
.splitext(filename
)
494 fname
, arcname
= self
._get
_codename
(path
[0:-3],
497 print "Adding", arcname
498 self
.write(fname
, arcname
)
500 if pathname
[-3:] != ".py":
501 raise RuntimeError, \
502 'Files added with writepy() must end with ".py"'
503 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
505 print "Adding file", arcname
506 self
.write(fname
, arcname
)
508 def _get_codename(self
, pathname
, basename
):
509 """Return (filename, archivename) for the path.
511 Given a module name path, return the correct file path and
512 archive name, compiling if necessary. For example, given
513 /python/lib/string, return (/python/lib/string.pyc, string).
515 file_py
= pathname
+ ".py"
516 file_pyc
= pathname
+ ".pyc"
517 file_pyo
= pathname
+ ".pyo"
518 if os
.path
.isfile(file_pyo
) and \
519 os
.stat(file_pyo
)[8] >= os
.stat(file_py
)[8]:
520 fname
= file_pyo
# Use .pyo file
521 elif not os
.path
.isfile(file_pyc
) or \
522 os
.stat(file_pyc
)[8] < os
.stat(file_py
)[8]:
525 print "Compiling", file_py
526 py_compile
.compile(file_py
, file_pyc
)
530 archivename
= os
.path
.split(fname
)[1]
532 archivename
= "%s/%s" % (basename
, archivename
)
533 return (fname
, archivename
)