Files for 2.1b1 distribution.
[python/dscho.git] / Lib / zipfile.py
blob39b1511667e7a61ab4e8dc8d0416afed4a027a28
1 "Read and write ZIP files."
2 # Written by James C. Ahlstrom jim@interet.com
3 # All rights transferred to CNRI pursuant to the Python contribution agreement
5 import struct, os, time
6 import binascii
8 try:
9 import zlib # We may need its compression method
10 except:
11 zlib = None
13 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
16 class BadZipfile(Exception):
17 pass
18 error = BadZipfile # The exception raised by this module
20 # constants for Zip file compression methods
21 ZIP_STORED = 0
22 ZIP_DEFLATED = 8
23 # Other ZIP compression methods not supported
25 # Here are some struct module formats for reading headers
26 structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
27 stringEndArchive = "PK\005\006" # magic number for end of archive record
28 structCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
29 stringCentralDir = "PK\001\002" # magic number for central directory
30 structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31 stringFileHeader = "PK\003\004" # magic number for file header
33 # indexes of entries in the central directory structure
34 _CD_SIGNATURE = 0
35 _CD_CREATE_VERSION = 1
36 _CD_CREATE_SYSTEM = 2
37 _CD_EXTRACT_VERSION = 3
38 _CD_EXTRACT_SYSTEM = 4 # is this meaningful?
39 _CD_FLAG_BITS = 5
40 _CD_COMPRESS_TYPE = 6
41 _CD_TIME = 7
42 _CD_DATE = 8
43 _CD_CRC = 9
44 _CD_COMPRESSED_SIZE = 10
45 _CD_UNCOMPRESSED_SIZE = 11
46 _CD_FILENAME_LENGTH = 12
47 _CD_EXTRA_FIELD_LENGTH = 13
48 _CD_COMMENT_LENGTH = 14
49 _CD_DISK_NUMBER_START = 15
50 _CD_INTERNAL_FILE_ATTRIBUTES = 16
51 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
52 _CD_LOCAL_HEADER_OFFSET = 18
54 # indexes of entries in the local file header structure
55 _FH_SIGNATURE = 0
56 _FH_EXTRACT_VERSION = 1
57 _FH_EXTRACT_SYSTEM = 2 # is this meaningful?
58 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
59 _FH_COMPRESSION_METHOD = 4
60 _FH_LAST_MOD_TIME = 5
61 _FH_LAST_MOD_DATE = 6
62 _FH_CRC = 7
63 _FH_COMPRESSED_SIZE = 8
64 _FH_UNCOMPRESSED_SIZE = 9
65 _FH_FILENAME_LENGTH = 10
66 _FH_EXTRA_FIELD_LENGTH = 11
69 def is_zipfile(filename):
70 """Quickly see if file is a ZIP file by checking the magic number.
72 Will not accept a ZIP archive with an ending comment.
73 """
74 try:
75 fpin = open(filename, "rb")
76 fpin.seek(-22, 2) # Seek to end-of-file record
77 endrec = fpin.read()
78 fpin.close()
79 if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
80 return 1 # file has correct magic number
81 except:
82 pass
85 class ZipInfo:
86 """Class with attributes describing each file in the ZIP archive."""
88 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
89 self.filename = filename # Name of the file in the archive
90 self.date_time = date_time # year, month, day, hour, min, sec
91 # Standard values:
92 self.compress_type = ZIP_STORED # Type of compression for the file
93 self.comment = "" # Comment for each file
94 self.extra = "" # ZIP extra data
95 self.create_system = 0 # System which created ZIP archive
96 self.create_version = 20 # Version which created ZIP archive
97 self.extract_version = 20 # Version needed to extract archive
98 self.reserved = 0 # Must be zero
99 self.flag_bits = 0 # ZIP flag bits
100 self.volume = 0 # Volume number of file header
101 self.internal_attr = 0 # Internal attributes
102 self.external_attr = 0 # External file attributes
103 # Other attributes are set by class ZipFile:
104 # header_offset Byte offset to the file header
105 # file_offset Byte offset to the start of the file data
106 # CRC CRC-32 of the uncompressed file
107 # compress_size Size of the compressed file
108 # file_size Size of the uncompressed file
110 def FileHeader(self):
111 """Return the per-file header as a string."""
112 dt = self.date_time
113 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
114 dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2
115 if self.flag_bits & 0x08:
116 # Set these to zero because we write them after the file data
117 CRC = compress_size = file_size = 0
118 else:
119 CRC = self.CRC
120 compress_size = self.compress_size
121 file_size = self.file_size
122 header = struct.pack(structFileHeader, stringFileHeader,
123 self.extract_version, self.reserved, self.flag_bits,
124 self.compress_type, dostime, dosdate, CRC,
125 compress_size, file_size,
126 len(self.filename), len(self.extra))
127 return header + self.filename + self.extra
130 class ZipFile:
131 """Class with methods to open, read, write, close, list zip files."""
133 fp = None # Set here since __del__ checks it
135 def __init__(self, filename, mode="r", compression=ZIP_STORED):
136 """Open the ZIP file with mode read "r", write "w" or append "a"."""
137 if compression == ZIP_STORED:
138 pass
139 elif compression == ZIP_DEFLATED:
140 if not zlib:
141 raise RuntimeError,\
142 "Compression requires the (missing) zlib module"
143 else:
144 raise RuntimeError, "That compression method is not supported"
145 self.debug = 0 # Level of printing: 0 through 3
146 self.NameToInfo = {} # Find file info given name
147 self.filelist = [] # List of ZipInfo instances for archive
148 self.compression = compression # Method of compression
149 self.filename = filename
150 self.mode = key = mode[0]
151 if key == 'r':
152 self.fp = open(filename, "rb")
153 self._GetContents()
154 elif key == 'w':
155 self.fp = open(filename, "wb")
156 elif key == 'a':
157 fp = self.fp = open(filename, "r+b")
158 fp.seek(-22, 2) # Seek to end-of-file record
159 endrec = fp.read()
160 if endrec[0:4] == stringEndArchive and \
161 endrec[-2:] == "\000\000":
162 self._GetContents() # file is a zip file
163 # seek to start of directory and overwrite
164 fp.seek(self.start_dir, 0)
165 else: # file is not a zip file, just append
166 fp.seek(0, 2)
167 else:
168 raise RuntimeError, 'Mode must be "r", "w" or "a"'
170 def _GetContents(self):
171 """Read in the table of contents for the ZIP file."""
172 fp = self.fp
173 fp.seek(-22, 2) # Start of end-of-archive record
174 filesize = fp.tell() + 22 # Get file size
175 endrec = fp.read(22) # Archive must not end with a comment!
176 if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
177 raise BadZipfile, "File is not a zip file, or ends with a comment"
178 endrec = struct.unpack(structEndArchive, endrec)
179 if self.debug > 1:
180 print endrec
181 size_cd = endrec[5] # bytes in central directory
182 offset_cd = endrec[6] # offset of central directory
183 x = filesize - 22 - size_cd
184 # "concat" is zero, unless zip was concatenated to another file
185 concat = x - offset_cd
186 if self.debug > 2:
187 print "given, inferred, offset", offset_cd, x, concat
188 # self.start_dir: Position of start of central directory
189 self.start_dir = offset_cd + concat
190 fp.seek(self.start_dir, 0)
191 total = 0
192 while total < size_cd:
193 centdir = fp.read(46)
194 total = total + 46
195 if centdir[0:4] != stringCentralDir:
196 raise BadZipfile, "Bad magic number for central directory"
197 centdir = struct.unpack(structCentralDir, centdir)
198 if self.debug > 2:
199 print centdir
200 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
201 # Create ZipInfo instance to store file information
202 x = ZipInfo(filename)
203 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
204 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
205 total = (total + centdir[_CD_FILENAME_LENGTH]
206 + centdir[_CD_EXTRA_FIELD_LENGTH]
207 + centdir[_CD_COMMENT_LENGTH])
208 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
209 # file_offset must be computed below...
210 (x.create_version, x.create_system, x.extract_version, x.reserved,
211 x.flag_bits, x.compress_type, t, d,
212 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
213 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
214 # Convert date/time code to (year, month, day, hour, min, sec)
215 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
216 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
217 self.filelist.append(x)
218 self.NameToInfo[x.filename] = x
219 if self.debug > 2:
220 print "total", total
221 for data in self.filelist:
222 fp.seek(data.header_offset, 0)
223 fheader = fp.read(30)
224 if fheader[0:4] != stringFileHeader:
225 raise BadZipfile, "Bad magic number for file header"
226 fheader = struct.unpack(structFileHeader, fheader)
227 # file_offset is computed here, since the extra field for
228 # the central directory and for the local file header
229 # refer to different fields, and they can have different
230 # lengths
231 data.file_offset = (data.header_offset + 30
232 + fheader[_FH_FILENAME_LENGTH]
233 + fheader[_FH_EXTRA_FIELD_LENGTH])
234 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
235 if fname != data.filename:
236 raise RuntimeError, \
237 'File name in directory "%s" and header "%s" differ.' % (
238 data.filename, fname)
240 def namelist(self):
241 """Return a list of file names in the archive."""
242 l = []
243 for data in self.filelist:
244 l.append(data.filename)
245 return l
247 def infolist(self):
248 """Return a list of class ZipInfo instances for files in the
249 archive."""
250 return self.filelist
252 def printdir(self):
253 """Print a table of contents for the zip file."""
254 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
255 for zinfo in self.filelist:
256 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
257 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
259 def testzip(self):
260 """Read all the files and check the CRC."""
261 for zinfo in self.filelist:
262 try:
263 self.read(zinfo.filename) # Check CRC-32
264 except:
265 return zinfo.filename
267 def getinfo(self, name):
268 """Return the instance of ZipInfo given 'name'."""
269 return self.NameToInfo[name]
271 def read(self, name):
272 """Return file bytes (as a string) for name."""
273 if self.mode not in ("r", "a"):
274 raise RuntimeError, 'read() requires mode "r" or "a"'
275 if not self.fp:
276 raise RuntimeError, \
277 "Attempt to read ZIP archive that was already closed"
278 zinfo = self.getinfo(name)
279 filepos = self.fp.tell()
280 self.fp.seek(zinfo.file_offset, 0)
281 bytes = self.fp.read(zinfo.compress_size)
282 self.fp.seek(filepos, 0)
283 if zinfo.compress_type == ZIP_STORED:
284 pass
285 elif zinfo.compress_type == ZIP_DEFLATED:
286 if not zlib:
287 raise RuntimeError, \
288 "De-compression requires the (missing) zlib module"
289 # zlib compress/decompress code by Jeremy Hylton of CNRI
290 dc = zlib.decompressobj(-15)
291 bytes = dc.decompress(bytes)
292 # need to feed in unused pad byte so that zlib won't choke
293 ex = dc.decompress('Z') + dc.flush()
294 if ex:
295 bytes = bytes + ex
296 else:
297 raise BadZipfile, \
298 "Unsupported compression method %d for file %s" % \
299 (zinfo.compress_type, name)
300 crc = binascii.crc32(bytes)
301 if crc != zinfo.CRC:
302 raise BadZipfile, "Bad CRC-32 for file %s" % name
303 return bytes
305 def _writecheck(self, zinfo):
306 """Check for errors before writing a file to the archive."""
307 if self.NameToInfo.has_key(zinfo.filename):
308 if self.debug: # Warning for duplicate names
309 print "Duplicate name:", zinfo.filename
310 if self.mode not in ("w", "a"):
311 raise RuntimeError, 'write() requires mode "w" or "a"'
312 if not self.fp:
313 raise RuntimeError, \
314 "Attempt to write ZIP archive that was already closed"
315 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
316 raise RuntimeError, \
317 "Compression requires the (missing) zlib module"
318 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
319 raise RuntimeError, \
320 "That compression method is not supported"
322 def write(self, filename, arcname=None, compress_type=None):
323 """Put the bytes from filename into the archive under the name
324 arcname."""
325 st = os.stat(filename)
326 mtime = time.localtime(st[8])
327 date_time = mtime[0:6]
328 # Create ZipInfo instance to store file information
329 if arcname is None:
330 zinfo = ZipInfo(filename, date_time)
331 else:
332 zinfo = ZipInfo(arcname, date_time)
333 zinfo.external_attr = st[0] << 16 # Unix attributes
334 if compress_type is None:
335 zinfo.compress_type = self.compression
336 else:
337 zinfo.compress_type = compress_type
338 self._writecheck(zinfo)
339 fp = open(filename, "rb")
340 zinfo.flag_bits = 0x08
341 zinfo.header_offset = self.fp.tell() # Start of header bytes
342 self.fp.write(zinfo.FileHeader())
343 zinfo.file_offset = self.fp.tell() # Start of file bytes
344 CRC = 0
345 compress_size = 0
346 file_size = 0
347 if zinfo.compress_type == ZIP_DEFLATED:
348 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
349 zlib.DEFLATED, -15)
350 else:
351 cmpr = None
352 while 1:
353 buf = fp.read(1024 * 8)
354 if not buf:
355 break
356 file_size = file_size + len(buf)
357 CRC = binascii.crc32(buf, CRC)
358 if cmpr:
359 buf = cmpr.compress(buf)
360 compress_size = compress_size + len(buf)
361 self.fp.write(buf)
362 fp.close()
363 if cmpr:
364 buf = cmpr.flush()
365 compress_size = compress_size + len(buf)
366 self.fp.write(buf)
367 zinfo.compress_size = compress_size
368 else:
369 zinfo.compress_size = file_size
370 zinfo.CRC = CRC
371 zinfo.file_size = file_size
372 # Write CRC and file sizes after the file data
373 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
374 zinfo.file_size))
375 self.filelist.append(zinfo)
376 self.NameToInfo[zinfo.filename] = zinfo
378 def writestr(self, zinfo, bytes):
379 """Write a file into the archive. The contents is the string
380 'bytes'."""
381 self._writecheck(zinfo)
382 zinfo.file_size = len(bytes) # Uncompressed size
383 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
384 if zinfo.compress_type == ZIP_DEFLATED:
385 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
386 zlib.DEFLATED, -15)
387 bytes = co.compress(bytes) + co.flush()
388 zinfo.compress_size = len(bytes) # Compressed size
389 else:
390 zinfo.compress_size = zinfo.file_size
391 zinfo.header_offset = self.fp.tell() # Start of header bytes
392 self.fp.write(zinfo.FileHeader())
393 zinfo.file_offset = self.fp.tell() # Start of file bytes
394 self.fp.write(bytes)
395 if zinfo.flag_bits & 0x08:
396 # Write CRC and file sizes after the file data
397 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
398 zinfo.file_size))
399 self.filelist.append(zinfo)
400 self.NameToInfo[zinfo.filename] = zinfo
402 def __del__(self):
403 """Call the "close()" method in case the user forgot."""
404 if self.fp:
405 self.fp.close()
406 self.fp = None
408 def close(self):
409 """Close the file, and for mode "w" and "a" write the ending
410 records."""
411 if self.mode in ("w", "a"): # write ending records
412 count = 0
413 pos1 = self.fp.tell()
414 for zinfo in self.filelist: # write central directory
415 count = count + 1
416 dt = zinfo.date_time
417 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
418 dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2
419 centdir = struct.pack(structCentralDir,
420 stringCentralDir, zinfo.create_version,
421 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
422 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
423 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
424 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
425 0, zinfo.internal_attr, zinfo.external_attr,
426 zinfo.header_offset)
427 self.fp.write(centdir)
428 self.fp.write(zinfo.filename)
429 self.fp.write(zinfo.extra)
430 self.fp.write(zinfo.comment)
431 pos2 = self.fp.tell()
432 # Write end-of-zip-archive record
433 endrec = struct.pack(structEndArchive, stringEndArchive,
434 0, 0, count, count, pos2 - pos1, pos1, 0)
435 self.fp.write(endrec)
436 self.fp.close()
437 self.fp = None
440 class PyZipFile(ZipFile):
441 """Class to create ZIP archives with Python library files and packages."""
443 def writepy(self, pathname, basename = ""):
444 """Add all files from "pathname" to the ZIP archive.
446 If pathname is a package directory, search the directory and
447 all package subdirectories recursively for all *.py and enter
448 the modules into the archive. If pathname is a plain
449 directory, listdir *.py and enter all modules. Else, pathname
450 must be a Python *.py file and the module will be put into the
451 archive. Added modules are always module.pyo or module.pyc.
452 This method will compile the module.py into module.pyc if
453 necessary.
455 dir, name = os.path.split(pathname)
456 if os.path.isdir(pathname):
457 initname = os.path.join(pathname, "__init__.py")
458 if os.path.isfile(initname):
459 # This is a package directory, add it
460 if basename:
461 basename = "%s/%s" % (basename, name)
462 else:
463 basename = name
464 if self.debug:
465 print "Adding package in", pathname, "as", basename
466 fname, arcname = self._get_codename(initname[0:-3], basename)
467 if self.debug:
468 print "Adding", arcname
469 self.write(fname, arcname)
470 dirlist = os.listdir(pathname)
471 dirlist.remove("__init__.py")
472 # Add all *.py files and package subdirectories
473 for filename in dirlist:
474 path = os.path.join(pathname, filename)
475 root, ext = os.path.splitext(filename)
476 if os.path.isdir(path):
477 if os.path.isfile(os.path.join(path, "__init__.py")):
478 # This is a package directory, add it
479 self.writepy(path, basename) # Recursive call
480 elif ext == ".py":
481 fname, arcname = self._get_codename(path[0:-3],
482 basename)
483 if self.debug:
484 print "Adding", arcname
485 self.write(fname, arcname)
486 else:
487 # This is NOT a package directory, add its files at top level
488 if self.debug:
489 print "Adding files from directory", pathname
490 for filename in os.listdir(pathname):
491 path = os.path.join(pathname, filename)
492 root, ext = os.path.splitext(filename)
493 if ext == ".py":
494 fname, arcname = self._get_codename(path[0:-3],
495 basename)
496 if self.debug:
497 print "Adding", arcname
498 self.write(fname, arcname)
499 else:
500 if pathname[-3:] != ".py":
501 raise RuntimeError, \
502 'Files added with writepy() must end with ".py"'
503 fname, arcname = self._get_codename(pathname[0:-3], basename)
504 if self.debug:
505 print "Adding file", arcname
506 self.write(fname, arcname)
508 def _get_codename(self, pathname, basename):
509 """Return (filename, archivename) for the path.
511 Given a module name path, return the correct file path and
512 archive name, compiling if necessary. For example, given
513 /python/lib/string, return (/python/lib/string.pyc, string).
515 file_py = pathname + ".py"
516 file_pyc = pathname + ".pyc"
517 file_pyo = pathname + ".pyo"
518 if os.path.isfile(file_pyo) and \
519 os.stat(file_pyo)[8] >= os.stat(file_py)[8]:
520 fname = file_pyo # Use .pyo file
521 elif not os.path.isfile(file_pyc) or \
522 os.stat(file_pyc)[8] < os.stat(file_py)[8]:
523 import py_compile
524 if self.debug:
525 print "Compiling", file_py
526 py_compile.compile(file_py, file_pyc)
527 fname = file_pyc
528 else:
529 fname = file_pyc
530 archivename = os.path.split(fname)[1]
531 if basename:
532 archivename = "%s/%s" % (basename, archivename)
533 return (fname, archivename)