1 "Read and write ZIP files."
2 # Written by James C. Ahlstrom jim@interet.com
3 # All rights transferred to CNRI pursuant to the Python contribution agreement
5 import struct
, os
, time
9 import zlib
# We may need its compression method
13 class BadZipfile(Exception):
15 error
= BadZipfile
# The exception raised by this module
17 # constants for Zip file compression methods
20 # Other ZIP compression methods not supported
22 # Here are some struct module formats for reading headers
23 structEndArchive
= "<4s4H2lH" # 9 items, end of archive, 22 bytes
24 stringEndArchive
= "PK\005\006" # magic number for end of archive record
25 structCentralDir
= "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
26 stringCentralDir
= "PK\001\002" # magic number for central directory
27 structFileHeader
= "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
28 stringFileHeader
= "PK\003\004" # magic number for file header
31 def is_zipfile(filename
):
32 """Quickly see if file is a ZIP file by checking the magic number.
34 Will not accept a ZIP archive with an ending comment.
37 fpin
= open(filename
, "rb")
38 fpin
.seek(-22, 2) # Seek to end-of-file record
41 if endrec
[0:4] == "PK\005\006" and endrec
[-2:] == "\000\000":
42 return 1 # file has correct magic number
48 """Class with attributes describing each file in the ZIP archive."""
50 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
51 self
.filename
= filename
# Name of the file in the archive
52 self
.date_time
= date_time
# year, month, day, hour, min, sec
54 self
.compress_type
= ZIP_STORED
# Type of compression for the file
55 self
.comment
= "" # Comment for each file
56 self
.extra
= "" # ZIP extra data
57 self
.create_system
= 0 # System which created ZIP archive
58 self
.create_version
= 20 # Version which created ZIP archive
59 self
.extract_version
= 20 # Version needed to extract archive
60 self
.reserved
= 0 # Must be zero
61 self
.flag_bits
= 0 # ZIP flag bits
62 self
.volume
= 0 # Volume number of file header
63 self
.internal_attr
= 0 # Internal attributes
64 self
.external_attr
= 0 # External file attributes
65 # Other attributes are set by class ZipFile:
66 # header_offset Byte offset to the file header
67 # file_offset Byte offset to the start of the file data
68 # CRC CRC-32 of the uncompressed file
69 # compress_size Size of the compressed file
70 # file_size Size of the uncompressed file
73 """Return the per-file header as a string."""
75 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
76 dostime
= dt
[3] << 11 | dt
[4] << 5 | dt
[5] / 2
77 if self
.flag_bits
& 0x08:
78 # Set these to zero because we write them after the file data
79 CRC
= compress_size
= file_size
= 0
82 compress_size
= self
.compress_size
83 file_size
= self
.file_size
84 header
= struct
.pack(structFileHeader
, stringFileHeader
,
85 self
.extract_version
, self
.reserved
, self
.flag_bits
,
86 self
.compress_type
, dostime
, dosdate
, CRC
,
87 compress_size
, file_size
,
88 len(self
.filename
), len(self
.extra
))
89 return header
+ self
.filename
+ self
.extra
93 """Class with methods to open, read, write, close, list zip files."""
95 def __init__(self
, filename
, mode
="r", compression
=ZIP_STORED
):
96 """Open the ZIP file with mode read "r", write "w" or append "a"."""
97 if compression
== ZIP_STORED
:
99 elif compression
== ZIP_DEFLATED
:
102 "Compression requires the (missing) zlib module"
104 raise RuntimeError, "That compression method is not supported"
105 self
.debug
= 0 # Level of printing: 0 through 3
106 self
.NameToInfo
= {} # Find file info given name
107 self
.filelist
= [] # List of ZipInfo instances for archive
108 self
.compression
= compression
# Method of compression
109 self
.filename
= filename
110 self
.mode
= key
= mode
[0]
112 self
.fp
= open(filename
, "rb")
115 self
.fp
= open(filename
, "wb")
117 fp
= self
.fp
= open(filename
, "r+b")
118 fp
.seek(-22, 2) # Seek to end-of-file record
120 if endrec
[0:4] == stringEndArchive
and \
121 endrec
[-2:] == "\000\000":
122 self
._GetContents
() # file is a zip file
123 # seek to start of directory and overwrite
124 fp
.seek(self
.start_dir
, 0)
125 else: # file is not a zip file, just append
128 raise RuntimeError, 'Mode must be "r", "w" or "a"'
130 def _GetContents(self
):
131 """Read in the table of contents for the ZIP file."""
133 fp
.seek(-22, 2) # Start of end-of-archive record
134 filesize
= fp
.tell() + 22 # Get file size
135 endrec
= fp
.read(22) # Archive must not end with a comment!
136 if endrec
[0:4] != stringEndArchive
or endrec
[-2:] != "\000\000":
137 raise BadZipfile
, "File is not a zip file, or ends with a comment"
138 endrec
= struct
.unpack(structEndArchive
, endrec
)
141 size_cd
= endrec
[5] # bytes in central directory
142 offset_cd
= endrec
[6] # offset of central directory
143 x
= filesize
- 22 - size_cd
144 # "concat" is zero, unless zip was concatenated to another file
145 concat
= x
- offset_cd
147 print "given, inferred, offset", offset_cd
, x
, concat
148 # self.start_dir: Position of start of central directory
149 self
.start_dir
= offset_cd
+ concat
150 fp
.seek(self
.start_dir
, 0)
152 while total
< size_cd
:
153 centdir
= fp
.read(46)
155 if centdir
[0:4] != stringCentralDir
:
156 raise BadZipfile
, "Bad magic number for central directory"
157 centdir
= struct
.unpack(structCentralDir
, centdir
)
160 filename
= fp
.read(centdir
[12])
161 # Create ZipInfo instance to store file information
162 x
= ZipInfo(filename
)
163 x
.extra
= fp
.read(centdir
[13])
164 x
.comment
= fp
.read(centdir
[14])
165 total
= total
+ centdir
[12] + centdir
[13] + centdir
[14]
166 x
.header_offset
= centdir
[18] + concat
167 x
.file_offset
= x
.header_offset
+ 30 + centdir
[12] + centdir
[13]
168 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
169 x
.flag_bits
, x
.compress_type
, t
, d
,
170 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
171 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
172 # Convert date/time code to (year, month, day, hour, min, sec)
173 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
174 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
175 self
.filelist
.append(x
)
176 self
.NameToInfo
[x
.filename
] = x
179 for data
in self
.filelist
:
180 fp
.seek(data
.header_offset
, 0)
181 fheader
= fp
.read(30)
182 if fheader
[0:4] != stringFileHeader
:
183 raise BadZipfile
, "Bad magic number for file header"
184 fheader
= struct
.unpack(structFileHeader
, fheader
)
185 fname
= fp
.read(fheader
[10])
186 if fname
!= data
.filename
:
187 raise RuntimeError, \
188 'File name in directory "%s" and header "%s" differ.' % (
189 data
.filename
, fname
)
192 """Return a list of file names in the archive."""
194 for data
in self
.filelist
:
195 l
.append(data
.filename
)
199 """Return a list of class ZipInfo instances for files in the
204 """Print a table of contents for the zip file."""
205 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
206 for zinfo
in self
.filelist
:
207 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
208 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
211 """Read all the files and check the CRC."""
212 for zinfo
in self
.filelist
:
214 self
.read(zinfo
.filename
) # Check CRC-32
216 return zinfo
.filename
218 def getinfo(self
, name
):
219 """Return the instance of ZipInfo given 'name'."""
220 return self
.NameToInfo
[name
]
222 def read(self
, name
):
223 """Return file bytes (as a string) for name."""
224 if self
.mode
not in ("r", "a"):
225 raise RuntimeError, 'read() requires mode "r" or "a"'
227 raise RuntimeError, \
228 "Attempt to read ZIP archive that was already closed"
229 zinfo
= self
.getinfo(name
)
230 filepos
= self
.fp
.tell()
231 self
.fp
.seek(zinfo
.file_offset
, 0)
232 bytes
= self
.fp
.read(zinfo
.compress_size
)
233 self
.fp
.seek(filepos
, 0)
234 if zinfo
.compress_type
== ZIP_STORED
:
236 elif zinfo
.compress_type
== ZIP_DEFLATED
:
238 raise RuntimeError, \
239 "De-compression requires the (missing) zlib module"
240 # zlib compress/decompress code by Jeremy Hylton of CNRI
241 dc
= zlib
.decompressobj(-15)
242 bytes
= dc
.decompress(bytes
)
243 # need to feed in unused pad byte so that zlib won't choke
244 ex
= dc
.decompress('Z') + dc
.flush()
249 "Unsupported compression method %d for file %s" % \
250 (zinfo
.compress_type
, name
)
251 crc
= binascii
.crc32(bytes
)
253 raise BadZipfile
, "Bad CRC-32 for file %s" % name
256 def _writecheck(self
, zinfo
):
257 """Check for errors before writing a file to the archive."""
258 if self
.NameToInfo
.has_key(zinfo
.filename
):
259 if self
.debug
: # Warning for duplicate names
260 print "Duplicate name:", zinfo
.filename
261 if self
.mode
not in ("w", "a"):
262 raise RuntimeError, 'write() requires mode "w" or "a"'
264 raise RuntimeError, \
265 "Attempt to write ZIP archive that was already closed"
266 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
267 raise RuntimeError, \
268 "Compression requires the (missing) zlib module"
269 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
270 raise RuntimeError, \
271 "That compression method is not supported"
273 def write(self
, filename
, arcname
=None, compress_type
=None):
274 """Put the bytes from filename into the archive under the name
276 st
= os
.stat(filename
)
277 mtime
= time
.localtime(st
[8])
278 date_time
= mtime
[0:6]
279 # Create ZipInfo instance to store file information
281 zinfo
= ZipInfo(filename
, date_time
)
283 zinfo
= ZipInfo(arcname
, date_time
)
284 zinfo
.external_attr
= st
[0] << 16 # Unix attributes
285 if compress_type
is None:
286 zinfo
.compress_type
= self
.compression
288 zinfo
.compress_type
= compress_type
289 self
._writecheck
(zinfo
)
290 fp
= open(filename
, "rb")
291 zinfo
.flag_bits
= 0x08
292 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
293 self
.fp
.write(zinfo
.FileHeader())
294 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
298 if zinfo
.compress_type
== ZIP_DEFLATED
:
299 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
304 buf
= fp
.read(1024 * 8)
307 file_size
= file_size
+ len(buf
)
308 CRC
= binascii
.crc32(buf
, CRC
)
310 buf
= cmpr
.compress(buf
)
311 compress_size
= compress_size
+ len(buf
)
316 compress_size
= compress_size
+ len(buf
)
318 zinfo
.compress_size
= compress_size
320 zinfo
.compress_size
= file_size
322 zinfo
.file_size
= file_size
323 # Write CRC and file sizes after the file data
324 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
326 self
.filelist
.append(zinfo
)
327 self
.NameToInfo
[zinfo
.filename
] = zinfo
329 def writestr(self
, zinfo
, bytes
):
330 """Write a file into the archive. The contents is the string
332 self
._writecheck
(zinfo
)
333 zinfo
.file_size
= len(bytes
) # Uncompressed size
334 zinfo
.CRC
= binascii
.crc32(bytes
) # CRC-32 checksum
335 if zinfo
.compress_type
== ZIP_DEFLATED
:
336 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
338 bytes
= co
.compress(bytes
) + co
.flush()
339 zinfo
.compress_size
= len(bytes
) # Compressed size
341 zinfo
.compress_size
= zinfo
.file_size
342 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
343 self
.fp
.write(zinfo
.FileHeader())
344 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
346 if zinfo
.flag_bits
& 0x08:
347 # Write CRC and file sizes after the file data
348 self
.fp
.write(struct
.pack("<lll", zinfo
.CRC
, zinfo
.compress_size
,
350 self
.filelist
.append(zinfo
)
351 self
.NameToInfo
[zinfo
.filename
] = zinfo
354 """Call the "close()" method in case the user forgot."""
360 """Close the file, and for mode "w" and "a" write the ending
362 if self
.mode
in ("w", "a"): # write ending records
364 pos1
= self
.fp
.tell()
365 for zinfo
in self
.filelist
: # write central directory
368 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
369 dostime
= dt
[3] << 11 | dt
[4] << 5 | dt
[5] / 2
370 centdir
= struct
.pack(structCentralDir
,
371 stringCentralDir
, zinfo
.create_version
,
372 zinfo
.create_system
, zinfo
.extract_version
, zinfo
.reserved
,
373 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
374 zinfo
.CRC
, zinfo
.compress_size
, zinfo
.file_size
,
375 len(zinfo
.filename
), len(zinfo
.extra
), len(zinfo
.comment
),
376 0, zinfo
.internal_attr
, zinfo
.external_attr
,
378 self
.fp
.write(centdir
)
379 self
.fp
.write(zinfo
.filename
)
380 self
.fp
.write(zinfo
.extra
)
381 self
.fp
.write(zinfo
.comment
)
382 pos2
= self
.fp
.tell()
383 # Write end-of-zip-archive record
384 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
385 0, 0, count
, count
, pos2
- pos1
, pos1
, 0)
386 self
.fp
.write(endrec
)
391 class PyZipFile(ZipFile
):
392 """Class to create ZIP archives with Python library files and packages."""
394 def writepy(self
, pathname
, basename
= ""):
395 """Add all files from "pathname" to the ZIP archive.
397 If pathname is a package directory, search the directory and
398 all package subdirectories recursively for all *.py and enter
399 the modules into the archive. If pathname is a plain
400 directory, listdir *.py and enter all modules. Else, pathname
401 must be a Python *.py file and the module will be put into the
402 archive. Added modules are always module.pyo or module.pyc.
403 This method will compile the module.py into module.pyc if
406 dir, name
= os
.path
.split(pathname
)
407 if os
.path
.isdir(pathname
):
408 initname
= os
.path
.join(pathname
, "__init__.py")
409 if os
.path
.isfile(initname
):
410 # This is a package directory, add it
412 basename
= "%s/%s" % (basename
, name
)
416 print "Adding package in", pathname
, "as", basename
417 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
419 print "Adding", arcname
420 self
.write(fname
, arcname
)
421 dirlist
= os
.listdir(pathname
)
422 dirlist
.remove("__init__.py")
423 # Add all *.py files and package subdirectories
424 for filename
in dirlist
:
425 path
= os
.path
.join(pathname
, filename
)
426 root
, ext
= os
.path
.splitext(filename
)
427 if os
.path
.isdir(path
):
428 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
429 # This is a package directory, add it
430 self
.writepy(path
, basename
) # Recursive call
432 fname
, arcname
= self
._get
_codename
(path
[0:-3],
435 print "Adding", arcname
436 self
.write(fname
, arcname
)
438 # This is NOT a package directory, add its files at top level
440 print "Adding files from directory", pathname
441 for filename
in os
.listdir(pathname
):
442 path
= os
.path
.join(pathname
, filename
)
443 root
, ext
= os
.path
.splitext(filename
)
445 fname
, arcname
= self
._get
_codename
(path
[0:-3],
448 print "Adding", arcname
449 self
.write(fname
, arcname
)
451 if pathname
[-3:] != ".py":
452 raise RuntimeError, \
453 'Files added with writepy() must end with ".py"'
454 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
456 print "Adding file", arcname
457 self
.write(fname
, arcname
)
459 def _get_codename(self
, pathname
, basename
):
460 """Return (filename, archivename) for the path.
462 Given a module name path, return the correct file path and
463 archive name, compiling if necessary. For example, given
464 /python/lib/string, return (/python/lib/string.pyc, string).
466 file_py
= pathname
+ ".py"
467 file_pyc
= pathname
+ ".pyc"
468 file_pyo
= pathname
+ ".pyo"
469 if os
.path
.isfile(file_pyo
) and \
470 os
.stat(file_pyo
)[8] >= os
.stat(file_py
)[8]:
471 fname
= file_pyo
# Use .pyo file
472 elif not os
.path
.isfile(file_pyc
) or \
473 os
.stat(file_pyc
)[8] < os
.stat(file_py
)[8]:
476 print "Compiling", file_py
477 py_compile
.compile(file_py
, file_pyc
)
481 archivename
= os
.path
.split(fname
)[1]
483 archivename
= "%s/%s" % (basename
, archivename
)
484 return (fname
, archivename
)