6 # implements a python function that reads and writes a gzipped file
7 # the user of the file doesn't have to worry about the compression,
8 # but random access is not allowed
10 # based on Andrew Kuchling's minigzip.py distributed with the zlib module
12 FTEXT
, FHCRC
, FEXTRA
, FNAME
, FCOMMENT
= 1, 2, 4, 8, 16
16 def write32(output
, value
):
17 t
= divmod(value
, 256)
29 buf
= b1
+ b2
+ b3
+ b4
36 v
= v
+ (ord(buf
[1]) << 8)
37 v
= v
+ (ord(buf
[2]) << 16)
38 v
= v
+ (ord(buf
[3]) << 24)
41 def open(filename
, mode
="r", compresslevel
=9):
42 return GzipFile(filename
, mode
, compresslevel
)
48 def __init__(self
, filename
=None, mode
=None,
49 compresslevel
=9, fileobj
=None):
51 fileobj
= self
.myfileobj
= __builtin__
.open(filename
, mode
or 'r')
53 if hasattr(fileobj
, 'name'): filename
= fileobj
.name
56 if hasattr(fileobj
, 'mode'): mode
= fileobj
.mode
62 self
.filename
= filename
63 self
.decompress
= zlib
.decompressobj(-zlib
.MAX_WBITS
)
65 elif mode
[0:1] == 'w':
67 self
._init
_write
(filename
)
68 self
.compress
= zlib
.compressobj(compresslevel
,
74 raise ValueError, "Mode " + mode
+ " not supported"
76 self
.fileobj
= fileobj
78 if self
.mode
== WRITE
:
79 self
._write
_gzip
_header
()
80 elif self
.mode
== READ
:
81 self
._read
_gzip
_header
()
84 s
= repr(self
.fileobj
)
85 return '<gzip ' + s
[1:-1] + ' ' + hex(id(self
)) + '>'
87 def _init_write(self
, filename
):
88 if filename
[-3:] != '.gz':
89 filename
= filename
+ '.gz'
90 self
.filename
= filename
91 self
.crc
= zlib
.crc32("")
96 def _write_gzip_header(self
):
97 self
.fileobj
.write('\037\213') # magic header
98 self
.fileobj
.write('\010') # compression method
99 fname
= self
.filename
[:-3]
103 self
.fileobj
.write(chr(flags
))
104 write32(self
.fileobj
, int(time
.time()))
105 self
.fileobj
.write('\002')
106 self
.fileobj
.write('\377')
108 self
.fileobj
.write(fname
+ '\000')
110 def _init_read(self
):
111 self
.crc
= zlib
.crc32("")
116 def _read_gzip_header(self
):
117 magic
= self
.fileobj
.read(2)
118 if magic
!= '\037\213':
119 raise RuntimeError, 'Not a gzipped file'
120 method
= ord( self
.fileobj
.read(1) )
122 raise RuntimeError, 'Unknown compression method'
123 flag
= ord( self
.fileobj
.read(1) )
124 # modtime = self.fileobj.read(4)
125 # extraflag = self.fileobj.read(1)
126 # os = self.fileobj.read(1)
130 # Read & discard the extra field, if present
131 xlen
=ord(self
.fileobj
.read(1))
132 xlen
=xlen
+256*ord(self
.fileobj
.read(1))
133 self
.fileobj
.read(xlen
)
135 # Read and discard a null-terminated string containing the filename
137 s
=self
.fileobj
.read(1)
138 if not s
or s
=='\000': break
140 # Read and discard a null-terminated string containing a comment
142 s
=self
.fileobj
.read(1)
143 if not s
or s
=='\000': break
145 self
.fileobj
.read(2) # Read & discard the 16-bit header CRC
148 def write(self
,data
):
149 if self
.fileobj
is None:
150 raise ValueError, "write() on closed GzipFile object"
152 self
.size
= self
.size
+ len(data
)
153 self
.crc
= zlib
.crc32(data
, self
.crc
)
154 self
.fileobj
.write( self
.compress
.compress(data
) )
156 def writelines(self
,lines
):
157 self
.write(string
.join(lines
))
159 def read(self
, size
=None):
160 if self
.extrasize
<= 0 and self
.fileobj
is None:
164 if not size
: # get the whole thing
168 readsize
= readsize
* 2
170 size
= self
.extrasize
171 else: # just get some more of it
173 while size
> self
.extrasize
:
175 readsize
= readsize
* 2
177 if size
> self
.extrasize
:
178 size
= self
.extrasize
180 chunk
= self
.extrabuf
[:size
]
181 self
.extrabuf
= self
.extrabuf
[size
:]
182 self
.extrasize
= self
.extrasize
- size
186 def _unread(self
, buf
):
187 self
.extrabuf
= buf
+ self
.extrabuf
188 self
.extrasize
= len(buf
) + self
.extrasize
190 def _read(self
, size
=1024):
192 buf
= self
.fileobj
.read(size
)
193 except AttributeError:
194 raise EOFError, "Reached EOF"
196 uncompress
= self
.decompress
.flush()
200 raise EOFError, 'Reached EOF'
202 uncompress
= self
.decompress
.decompress(buf
)
203 self
.crc
= zlib
.crc32(uncompress
, self
.crc
)
204 self
.extrabuf
= self
.extrabuf
+ uncompress
205 self
.extrasize
= self
.extrasize
+ len(uncompress
)
206 self
.size
= self
.size
+ len(uncompress
)
210 ## We've read to the end of the file, so we have to rewind in order
211 ## to reread the 8 bytes containing the CRC and the file size. The
212 ## decompressor is smart and knows when to stop, so feeding it
213 ## extra data is harmless.
214 self
.fileobj
.seek(-8, 2)
215 crc32
= read32(self
.fileobj
)
216 isize
= read32(self
.fileobj
)
217 if crc32
!= self
.crc
:
218 self
.error
= "CRC check failed"
219 elif isize
!= self
.size
:
220 self
.error
= "Incorrect length of data produced"
223 if self
.mode
== WRITE
:
224 self
.fileobj
.write(self
.compress
.flush())
225 write32(self
.fileobj
, self
.crc
)
226 write32(self
.fileobj
, self
.size
)
228 elif self
.mode
== READ
:
231 self
.myfileobj
.close()
232 self
.myfileobj
= None
238 raise IOError, 'Random access not allowed in gzip files'
241 raise IOError, 'I won\'t tell() you for gzip files'
250 c
= self
.read(readsize
)
251 i
= string
.find(c
, '\n')
252 if i
>= 0 or c
== '':
254 self
._unread
(c
[i
+1:])
255 return string
.join(bufs
, '')
257 readsize
= readsize
* 2
261 lines
= string
.split(buf
, '\n')
262 for i
in range(len(lines
)-1):
263 lines
[i
] = lines
[i
] + '\n'
264 if lines
and not lines
[-1]:
268 def writelines(self
, L
):
274 # Act like gzip; with -d, act like gunzip.
275 # The input file is not deleted, however, nor are any other gzip
276 # options or features supported.
279 decompress
= args
and args
[0] == "-d"
287 f
= GzipFile(filename
="", mode
="rb", fileobj
=sys
.stdin
)
290 if arg
[-3:] != ".gz":
291 print "filename doesn't end in .gz:", `arg`
294 g
= __builtin__
.open(arg
[:-3], "wb")
298 g
= GzipFile(filename
="", mode
="wb", fileobj
=sys
.stdout
)
300 f
= __builtin__
.open(arg
, "rb")
301 g
= open(arg
+ ".gz", "wb")
307 if g
is not sys
.stdout
:
309 if f
is not sys
.stdin
:
312 if __name__
== '__main__':