This commit was manufactured by cvs2svn to create tag 'r221c2'.
[python/dscho.git] / Lib / zipfile.py
blob0efcad3a8a4f38fd90bbb5164b5e201df717d5c0
1 "Read and write ZIP files."
2 # Written by James C. Ahlstrom jim@interet.com
3 # All rights transferred to CNRI pursuant to the Python contribution agreement
5 import struct, os, time
6 import binascii
8 try:
9 import zlib # We may need its compression method
10 except ImportError:
11 zlib = None
13 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
16 class BadZipfile(Exception):
17 pass
18 error = BadZipfile # The exception raised by this module
20 # constants for Zip file compression methods
21 ZIP_STORED = 0
22 ZIP_DEFLATED = 8
23 # Other ZIP compression methods not supported
25 # Here are some struct module formats for reading headers
26 structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
27 stringEndArchive = "PK\005\006" # magic number for end of archive record
28 structCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
29 stringCentralDir = "PK\001\002" # magic number for central directory
30 structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31 stringFileHeader = "PK\003\004" # magic number for file header
33 # indexes of entries in the central directory structure
34 _CD_SIGNATURE = 0
35 _CD_CREATE_VERSION = 1
36 _CD_CREATE_SYSTEM = 2
37 _CD_EXTRACT_VERSION = 3
38 _CD_EXTRACT_SYSTEM = 4 # is this meaningful?
39 _CD_FLAG_BITS = 5
40 _CD_COMPRESS_TYPE = 6
41 _CD_TIME = 7
42 _CD_DATE = 8
43 _CD_CRC = 9
44 _CD_COMPRESSED_SIZE = 10
45 _CD_UNCOMPRESSED_SIZE = 11
46 _CD_FILENAME_LENGTH = 12
47 _CD_EXTRA_FIELD_LENGTH = 13
48 _CD_COMMENT_LENGTH = 14
49 _CD_DISK_NUMBER_START = 15
50 _CD_INTERNAL_FILE_ATTRIBUTES = 16
51 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
52 _CD_LOCAL_HEADER_OFFSET = 18
54 # indexes of entries in the local file header structure
55 _FH_SIGNATURE = 0
56 _FH_EXTRACT_VERSION = 1
57 _FH_EXTRACT_SYSTEM = 2 # is this meaningful?
58 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
59 _FH_COMPRESSION_METHOD = 4
60 _FH_LAST_MOD_TIME = 5
61 _FH_LAST_MOD_DATE = 6
62 _FH_CRC = 7
63 _FH_COMPRESSED_SIZE = 8
64 _FH_UNCOMPRESSED_SIZE = 9
65 _FH_FILENAME_LENGTH = 10
66 _FH_EXTRA_FIELD_LENGTH = 11
68 # Used to compare file passed to ZipFile
69 import types
70 _STRING_TYPES = (types.StringType,)
71 if hasattr(types, "UnicodeType"):
72 _STRING_TYPES = _STRING_TYPES + (types.UnicodeType,)
75 def is_zipfile(filename):
76 """Quickly see if file is a ZIP file by checking the magic number.
78 Will not accept a ZIP archive with an ending comment.
79 """
80 try:
81 fpin = open(filename, "rb")
82 fpin.seek(-22, 2) # Seek to end-of-file record
83 endrec = fpin.read()
84 fpin.close()
85 if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
86 return 1 # file has correct magic number
87 except IOError:
88 pass
91 class ZipInfo:
92 """Class with attributes describing each file in the ZIP archive."""
94 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
95 self.filename = _normpath(filename) # Name of the file in the archive
96 self.date_time = date_time # year, month, day, hour, min, sec
97 # Standard values:
98 self.compress_type = ZIP_STORED # Type of compression for the file
99 self.comment = "" # Comment for each file
100 self.extra = "" # ZIP extra data
101 self.create_system = 0 # System which created ZIP archive
102 self.create_version = 20 # Version which created ZIP archive
103 self.extract_version = 20 # Version needed to extract archive
104 self.reserved = 0 # Must be zero
105 self.flag_bits = 0 # ZIP flag bits
106 self.volume = 0 # Volume number of file header
107 self.internal_attr = 0 # Internal attributes
108 self.external_attr = 0 # External file attributes
109 # Other attributes are set by class ZipFile:
110 # header_offset Byte offset to the file header
111 # file_offset Byte offset to the start of the file data
112 # CRC CRC-32 of the uncompressed file
113 # compress_size Size of the compressed file
114 # file_size Size of the uncompressed file
116 def FileHeader(self):
117 """Return the per-file header as a string."""
118 dt = self.date_time
119 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
120 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
121 if self.flag_bits & 0x08:
122 # Set these to zero because we write them after the file data
123 CRC = compress_size = file_size = 0
124 else:
125 CRC = self.CRC
126 compress_size = self.compress_size
127 file_size = self.file_size
128 header = struct.pack(structFileHeader, stringFileHeader,
129 self.extract_version, self.reserved, self.flag_bits,
130 self.compress_type, dostime, dosdate, CRC,
131 compress_size, file_size,
132 len(self.filename), len(self.extra))
133 return header + self.filename + self.extra
136 # This is used to ensure paths in generated ZIP files always use
137 # forward slashes as the directory separator, as required by the
138 # ZIP format specification.
139 if os.sep != "/":
140 def _normpath(path):
141 return path.replace(os.sep, "/")
142 else:
143 def _normpath(path):
144 return path
147 class ZipFile:
148 """ Class with methods to open, read, write, close, list zip files.
150 z = ZipFile(file, mode="r", compression=ZIP_STORED)
152 file: Either the path to the file, or a file-like object.
153 If it is a path, the file will be opened and closed by ZipFile.
154 mode: The mode can be either read "r", write "w" or append "a".
155 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
158 fp = None # Set here since __del__ checks it
160 def __init__(self, file, mode="r", compression=ZIP_STORED):
161 """Open the ZIP file with mode read "r", write "w" or append "a"."""
162 if compression == ZIP_STORED:
163 pass
164 elif compression == ZIP_DEFLATED:
165 if not zlib:
166 raise RuntimeError,\
167 "Compression requires the (missing) zlib module"
168 else:
169 raise RuntimeError, "That compression method is not supported"
170 self.debug = 0 # Level of printing: 0 through 3
171 self.NameToInfo = {} # Find file info given name
172 self.filelist = [] # List of ZipInfo instances for archive
173 self.compression = compression # Method of compression
174 self.mode = key = mode[0]
176 # Check if we were passed a file-like object
177 if type(file) in _STRING_TYPES:
178 self._filePassed = 0
179 self.filename = file
180 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
181 self.fp = open(file, modeDict[mode])
182 else:
183 self._filePassed = 1
184 self.fp = file
185 self.filename = getattr(file, 'name', None)
187 if key == 'r':
188 self._GetContents()
189 elif key == 'w':
190 pass
191 elif key == 'a':
192 fp = self.fp
193 fp.seek(-22, 2) # Seek to end-of-file record
194 endrec = fp.read()
195 if endrec[0:4] == stringEndArchive and \
196 endrec[-2:] == "\000\000":
197 self._GetContents() # file is a zip file
198 # seek to start of directory and overwrite
199 fp.seek(self.start_dir, 0)
200 else: # file is not a zip file, just append
201 fp.seek(0, 2)
202 else:
203 if not self._filePassed:
204 self.fp.close()
205 self.fp = None
206 raise RuntimeError, 'Mode must be "r", "w" or "a"'
208 def _GetContents(self):
209 """Read the directory, making sure we close the file if the format
210 is bad."""
211 try:
212 self._RealGetContents()
213 except BadZipfile:
214 if not self._filePassed:
215 self.fp.close()
216 self.fp = None
217 raise
219 def _RealGetContents(self):
220 """Read in the table of contents for the ZIP file."""
221 fp = self.fp
222 fp.seek(-22, 2) # Start of end-of-archive record
223 filesize = fp.tell() + 22 # Get file size
224 endrec = fp.read(22) # Archive must not end with a comment!
225 if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
226 raise BadZipfile, "File is not a zip file, or ends with a comment"
227 endrec = struct.unpack(structEndArchive, endrec)
228 if self.debug > 1:
229 print endrec
230 size_cd = endrec[5] # bytes in central directory
231 offset_cd = endrec[6] # offset of central directory
232 x = filesize - 22 - size_cd
233 # "concat" is zero, unless zip was concatenated to another file
234 concat = x - offset_cd
235 if self.debug > 2:
236 print "given, inferred, offset", offset_cd, x, concat
237 # self.start_dir: Position of start of central directory
238 self.start_dir = offset_cd + concat
239 fp.seek(self.start_dir, 0)
240 total = 0
241 while total < size_cd:
242 centdir = fp.read(46)
243 total = total + 46
244 if centdir[0:4] != stringCentralDir:
245 raise BadZipfile, "Bad magic number for central directory"
246 centdir = struct.unpack(structCentralDir, centdir)
247 if self.debug > 2:
248 print centdir
249 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
250 # Create ZipInfo instance to store file information
251 x = ZipInfo(filename)
252 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
253 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
254 total = (total + centdir[_CD_FILENAME_LENGTH]
255 + centdir[_CD_EXTRA_FIELD_LENGTH]
256 + centdir[_CD_COMMENT_LENGTH])
257 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
258 # file_offset must be computed below...
259 (x.create_version, x.create_system, x.extract_version, x.reserved,
260 x.flag_bits, x.compress_type, t, d,
261 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
262 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
263 # Convert date/time code to (year, month, day, hour, min, sec)
264 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
265 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
266 self.filelist.append(x)
267 self.NameToInfo[x.filename] = x
268 if self.debug > 2:
269 print "total", total
270 for data in self.filelist:
271 fp.seek(data.header_offset, 0)
272 fheader = fp.read(30)
273 if fheader[0:4] != stringFileHeader:
274 raise BadZipfile, "Bad magic number for file header"
275 fheader = struct.unpack(structFileHeader, fheader)
276 # file_offset is computed here, since the extra field for
277 # the central directory and for the local file header
278 # refer to different fields, and they can have different
279 # lengths
280 data.file_offset = (data.header_offset + 30
281 + fheader[_FH_FILENAME_LENGTH]
282 + fheader[_FH_EXTRA_FIELD_LENGTH])
283 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
284 if fname != data.filename:
285 raise RuntimeError, \
286 'File name in directory "%s" and header "%s" differ.' % (
287 data.filename, fname)
289 def namelist(self):
290 """Return a list of file names in the archive."""
291 l = []
292 for data in self.filelist:
293 l.append(data.filename)
294 return l
296 def infolist(self):
297 """Return a list of class ZipInfo instances for files in the
298 archive."""
299 return self.filelist
301 def printdir(self):
302 """Print a table of contents for the zip file."""
303 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
304 for zinfo in self.filelist:
305 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
306 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
308 def testzip(self):
309 """Read all the files and check the CRC."""
310 for zinfo in self.filelist:
311 try:
312 self.read(zinfo.filename) # Check CRC-32
313 except:
314 return zinfo.filename
316 def getinfo(self, name):
317 """Return the instance of ZipInfo given 'name'."""
318 return self.NameToInfo[name]
320 def read(self, name):
321 """Return file bytes (as a string) for name."""
322 if self.mode not in ("r", "a"):
323 raise RuntimeError, 'read() requires mode "r" or "a"'
324 if not self.fp:
325 raise RuntimeError, \
326 "Attempt to read ZIP archive that was already closed"
327 zinfo = self.getinfo(name)
328 filepos = self.fp.tell()
329 self.fp.seek(zinfo.file_offset, 0)
330 bytes = self.fp.read(zinfo.compress_size)
331 self.fp.seek(filepos, 0)
332 if zinfo.compress_type == ZIP_STORED:
333 pass
334 elif zinfo.compress_type == ZIP_DEFLATED:
335 if not zlib:
336 raise RuntimeError, \
337 "De-compression requires the (missing) zlib module"
338 # zlib compress/decompress code by Jeremy Hylton of CNRI
339 dc = zlib.decompressobj(-15)
340 bytes = dc.decompress(bytes)
341 # need to feed in unused pad byte so that zlib won't choke
342 ex = dc.decompress('Z') + dc.flush()
343 if ex:
344 bytes = bytes + ex
345 else:
346 raise BadZipfile, \
347 "Unsupported compression method %d for file %s" % \
348 (zinfo.compress_type, name)
349 crc = binascii.crc32(bytes)
350 if crc != zinfo.CRC:
351 raise BadZipfile, "Bad CRC-32 for file %s" % name
352 return bytes
354 def _writecheck(self, zinfo):
355 """Check for errors before writing a file to the archive."""
356 if self.NameToInfo.has_key(zinfo.filename):
357 if self.debug: # Warning for duplicate names
358 print "Duplicate name:", zinfo.filename
359 if self.mode not in ("w", "a"):
360 raise RuntimeError, 'write() requires mode "w" or "a"'
361 if not self.fp:
362 raise RuntimeError, \
363 "Attempt to write ZIP archive that was already closed"
364 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
365 raise RuntimeError, \
366 "Compression requires the (missing) zlib module"
367 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
368 raise RuntimeError, \
369 "That compression method is not supported"
371 def write(self, filename, arcname=None, compress_type=None):
372 """Put the bytes from filename into the archive under the name
373 arcname."""
374 st = os.stat(filename)
375 mtime = time.localtime(st[8])
376 date_time = mtime[0:6]
377 # Create ZipInfo instance to store file information
378 if arcname is None:
379 zinfo = ZipInfo(filename, date_time)
380 else:
381 zinfo = ZipInfo(arcname, date_time)
382 zinfo.external_attr = st[0] << 16 # Unix attributes
383 if compress_type is None:
384 zinfo.compress_type = self.compression
385 else:
386 zinfo.compress_type = compress_type
387 self._writecheck(zinfo)
388 fp = open(filename, "rb")
389 zinfo.flag_bits = 0x00
390 zinfo.header_offset = self.fp.tell() # Start of header bytes
391 # Must overwrite CRC and sizes with correct data later
392 zinfo.CRC = CRC = 0
393 zinfo.compress_size = compress_size = 0
394 zinfo.file_size = file_size = 0
395 self.fp.write(zinfo.FileHeader())
396 zinfo.file_offset = self.fp.tell() # Start of file bytes
397 if zinfo.compress_type == ZIP_DEFLATED:
398 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
399 zlib.DEFLATED, -15)
400 else:
401 cmpr = None
402 while 1:
403 buf = fp.read(1024 * 8)
404 if not buf:
405 break
406 file_size = file_size + len(buf)
407 CRC = binascii.crc32(buf, CRC)
408 if cmpr:
409 buf = cmpr.compress(buf)
410 compress_size = compress_size + len(buf)
411 self.fp.write(buf)
412 fp.close()
413 if cmpr:
414 buf = cmpr.flush()
415 compress_size = compress_size + len(buf)
416 self.fp.write(buf)
417 zinfo.compress_size = compress_size
418 else:
419 zinfo.compress_size = file_size
420 zinfo.CRC = CRC
421 zinfo.file_size = file_size
422 # Seek backwards and write CRC and file sizes
423 position = self.fp.tell() # Preserve current position in file
424 self.fp.seek(zinfo.header_offset + 14, 0)
425 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
426 zinfo.file_size))
427 self.fp.seek(position, 0)
428 self.filelist.append(zinfo)
429 self.NameToInfo[zinfo.filename] = zinfo
431 def writestr(self, zinfo, bytes):
432 """Write a file into the archive. The contents is the string
433 'bytes'."""
434 self._writecheck(zinfo)
435 zinfo.file_size = len(bytes) # Uncompressed size
436 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
437 if zinfo.compress_type == ZIP_DEFLATED:
438 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
439 zlib.DEFLATED, -15)
440 bytes = co.compress(bytes) + co.flush()
441 zinfo.compress_size = len(bytes) # Compressed size
442 else:
443 zinfo.compress_size = zinfo.file_size
444 zinfo.header_offset = self.fp.tell() # Start of header bytes
445 self.fp.write(zinfo.FileHeader())
446 zinfo.file_offset = self.fp.tell() # Start of file bytes
447 self.fp.write(bytes)
448 if zinfo.flag_bits & 0x08:
449 # Write CRC and file sizes after the file data
450 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
451 zinfo.file_size))
452 self.filelist.append(zinfo)
453 self.NameToInfo[zinfo.filename] = zinfo
455 def __del__(self):
456 """Call the "close()" method in case the user forgot."""
457 self.close()
459 def close(self):
460 """Close the file, and for mode "w" and "a" write the ending
461 records."""
462 if self.fp is None:
463 return
464 if self.mode in ("w", "a"): # write ending records
465 count = 0
466 pos1 = self.fp.tell()
467 for zinfo in self.filelist: # write central directory
468 count = count + 1
469 dt = zinfo.date_time
470 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
471 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
472 centdir = struct.pack(structCentralDir,
473 stringCentralDir, zinfo.create_version,
474 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
475 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
476 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
477 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
478 0, zinfo.internal_attr, zinfo.external_attr,
479 zinfo.header_offset)
480 self.fp.write(centdir)
481 self.fp.write(zinfo.filename)
482 self.fp.write(zinfo.extra)
483 self.fp.write(zinfo.comment)
484 pos2 = self.fp.tell()
485 # Write end-of-zip-archive record
486 endrec = struct.pack(structEndArchive, stringEndArchive,
487 0, 0, count, count, pos2 - pos1, pos1, 0)
488 self.fp.write(endrec)
489 self.fp.flush()
490 if not self._filePassed:
491 self.fp.close()
492 self.fp = None
495 class PyZipFile(ZipFile):
496 """Class to create ZIP archives with Python library files and packages."""
498 def writepy(self, pathname, basename = ""):
499 """Add all files from "pathname" to the ZIP archive.
501 If pathname is a package directory, search the directory and
502 all package subdirectories recursively for all *.py and enter
503 the modules into the archive. If pathname is a plain
504 directory, listdir *.py and enter all modules. Else, pathname
505 must be a Python *.py file and the module will be put into the
506 archive. Added modules are always module.pyo or module.pyc.
507 This method will compile the module.py into module.pyc if
508 necessary.
510 dir, name = os.path.split(pathname)
511 if os.path.isdir(pathname):
512 initname = os.path.join(pathname, "__init__.py")
513 if os.path.isfile(initname):
514 # This is a package directory, add it
515 if basename:
516 basename = "%s/%s" % (basename, name)
517 else:
518 basename = name
519 if self.debug:
520 print "Adding package in", pathname, "as", basename
521 fname, arcname = self._get_codename(initname[0:-3], basename)
522 if self.debug:
523 print "Adding", arcname
524 self.write(fname, arcname)
525 dirlist = os.listdir(pathname)
526 dirlist.remove("__init__.py")
527 # Add all *.py files and package subdirectories
528 for filename in dirlist:
529 path = os.path.join(pathname, filename)
530 root, ext = os.path.splitext(filename)
531 if os.path.isdir(path):
532 if os.path.isfile(os.path.join(path, "__init__.py")):
533 # This is a package directory, add it
534 self.writepy(path, basename) # Recursive call
535 elif ext == ".py":
536 fname, arcname = self._get_codename(path[0:-3],
537 basename)
538 if self.debug:
539 print "Adding", arcname
540 self.write(fname, arcname)
541 else:
542 # This is NOT a package directory, add its files at top level
543 if self.debug:
544 print "Adding files from directory", pathname
545 for filename in os.listdir(pathname):
546 path = os.path.join(pathname, filename)
547 root, ext = os.path.splitext(filename)
548 if ext == ".py":
549 fname, arcname = self._get_codename(path[0:-3],
550 basename)
551 if self.debug:
552 print "Adding", arcname
553 self.write(fname, arcname)
554 else:
555 if pathname[-3:] != ".py":
556 raise RuntimeError, \
557 'Files added with writepy() must end with ".py"'
558 fname, arcname = self._get_codename(pathname[0:-3], basename)
559 if self.debug:
560 print "Adding file", arcname
561 self.write(fname, arcname)
563 def _get_codename(self, pathname, basename):
564 """Return (filename, archivename) for the path.
566 Given a module name path, return the correct file path and
567 archive name, compiling if necessary. For example, given
568 /python/lib/string, return (/python/lib/string.pyc, string).
570 file_py = pathname + ".py"
571 file_pyc = pathname + ".pyc"
572 file_pyo = pathname + ".pyo"
573 if os.path.isfile(file_pyo) and \
574 os.stat(file_pyo)[8] >= os.stat(file_py)[8]:
575 fname = file_pyo # Use .pyo file
576 elif not os.path.isfile(file_pyc) or \
577 os.stat(file_pyc)[8] < os.stat(file_py)[8]:
578 import py_compile
579 if self.debug:
580 print "Compiling", file_py
581 py_compile.compile(file_py, file_pyc)
582 fname = file_pyc
583 else:
584 fname = file_pyc
585 archivename = os.path.split(fname)[1]
586 if basename:
587 archivename = "%s/%s" % (basename, archivename)
588 return (fname, archivename)