This commit was manufactured by cvs2svn to create tag 'cnrisync'.
[python/dscho.git] / Lib / binhex.py
blob2f97a33d52d3b3558c53d5bef4fb1d76010d0325
1 """binhex - Macintosh binhex compression/decompression
2 easy interface:
3 binhex(inputfilename, outputfilename)
4 hexbin(inputfilename, outputfilename)
5 """
8 # Jack Jansen, CWI, August 1995.
10 # The module is supposed to be as compatible as possible. Especially the
11 # easy interface should work "as expected" on any platform.
12 # XXXX Note: currently, textfiles appear in mac-form on all platforms.
13 # We seem to lack a simple character-translate in python.
14 # (we should probably use ISO-Latin-1 on all but the mac platform).
15 # XXXX The simeple routines are too simple: they expect to hold the complete
16 # files in-core. Should be fixed.
17 # XXXX It would be nice to handle AppleDouble format on unix (for servers serving
18 # macs).
19 # XXXX I don't understand what happens when you get 0x90 times the same byte on
20 # input. The resulting code (xx 90 90) would appear to be interpreted as an
21 # escaped *value* of 0x90. All coders I've seen appear to ignore this nicety...
23 import sys
24 import os
25 import struct
26 import string
27 import binascii
29 DEBUG=0
30 if DEBUG:
31 testf=open('@binhex.dbg.out', 'w')
33 Error = 'binhex.Error'
35 # States (what have we written)
36 [_DID_HEADER, _DID_DATA, _DID_RSRC] = range(3)
38 # Various constants
39 REASONABLY_LARGE=32768 # Minimal amount we pass the rle-coder
40 LINELEN=48 # What we pass to hqx-coder at once
41 # *NOTE* Must be divisible by 3!
42 RUNCHAR=chr(0x90) # run-length introducer
45 # The code is currently byte-order dependent
46 if struct.pack('i', 0177) != '\0\0\0\177':
47 raise ImportError, 'Module binhex is big-endian only'
50 # Workarounds for non-mac machines.
51 if os.name == 'mac':
52 import macfs
54 def FInfo():
55 return macfs.FInfo()
57 def getfileinfo(name):
58 finfo = macfs.FSSpec(name).GetFInfo()
59 dir, file = os.path.split(name)
60 # XXXX Get resource/data sizes
61 fp = open(name, 'rb')
62 fp.seek(0, 2)
63 dlen = fp.tell()
64 fp = open(name, '*rb')
65 fp.seek(0, 2)
66 rlen = fp.tell()
67 return file, finfo, dlen, rlen
69 def openrsrc(name, *mode):
70 if mode:
71 mode = mode[0]
72 else:
73 mode = 'rb'
74 mode = '*' + mode
75 return open(name, mode)
77 else:
79 # Glue code for non-macintosh useage
81 import regsub
83 class FInfo:
84 def __init__(self):
85 self.Type = '????'
86 self.Creator = '????'
87 self.Flags = 0
89 def getfileinfo(name):
90 finfo = FInfo()
91 # Quick check for textfile
92 fp = open(name)
93 data = open(name).read(256)
94 for c in data:
95 if not c in string.whitespace and (c<' ' or ord(c) > 0177):
96 break
97 else:
98 finfo.Type = 'TEXT'
99 fp.seek(0, 2)
100 dsize = fp.tell()
101 fp.close()
102 dir, file = os.path.split(name)
103 file = regsub.sub(':', '-', file)
104 return file, finfo, dsize, 0
106 class openrsrc:
107 def __init__(self, *args):
108 pass
110 def read(self, *args):
111 return ''
113 def write(self, *args):
114 pass
116 def close(self):
117 pass
119 class _Hqxcoderengine:
120 """Write data to the coder in 3-byte chunks"""
122 def __init__(self, ofp):
123 self.ofp = ofp
124 self.data = ''
126 def write(self, data):
127 self.data = self.data + data
128 while len(self.data) > LINELEN:
129 hqxdata = binascii.b2a_hqx(self.data[:LINELEN])
130 self.ofp.write(hqxdata+'\n')
131 self.data = self.data[LINELEN:]
133 def close(self):
134 if self.data:
135 self.ofp.write(binascii.b2a_hqx(self.data))
136 self.ofp.write(':\n')
137 self.ofp.close()
139 class _Rlecoderengine:
140 """Write data to the RLE-coder in suitably large chunks"""
142 def __init__(self, ofp):
143 self.ofp = ofp
144 self.data = ''
146 def write(self, data):
147 if DEBUG:
148 testf.write(data) # XXXX
149 self.data = self.data + data
150 if len(self.data) < REASONABLY_LARGE:
151 return
152 rledata = binascii.rlecode_hqx(self.data)
153 self.ofp.write(rledata)
154 self.data = ''
156 def close(self):
157 if self.data:
158 rledata = binascii.rlecode_hqx(self.data)
159 self.ofp.write(rledata)
160 self.ofp.close()
162 class BinHex:
163 def __init__(self, (name, finfo, dlen, rlen), ofp):
164 if type(ofp) == type(''):
165 ofname = ofp
166 ofp = open(ofname, 'w')
167 if os.name == 'mac':
168 fss = macfs.FSSpec(ofname)
169 fss.SetCreatorType('BnHq', 'TEXT')
170 ofp.write('(This file may be decompressed with BinHex 4.0)\n\n:')
171 hqxer = _Hqxcoderengine(ofp)
172 self.ofp = _Rlecoderengine(hqxer)
173 self.crc = 0
174 if finfo == None:
175 finfo = FInfo()
176 self.dlen = dlen
177 self.rlen = rlen
178 self._writeinfo(name, finfo)
179 self.state = _DID_HEADER
181 def _writeinfo(self, name, finfo):
182 if DEBUG:
183 print 'binhex info:', name, finfo.Type, finfo.Creator, self.dlen, self.rlen
184 name = name
185 nl = len(name)
186 if nl > 63:
187 raise Error, 'Filename too long'
188 d = chr(nl) + name + '\0'
189 d2 = finfo.Type + finfo.Creator
190 d3 = struct.pack('h', finfo.Flags)
191 d4 = struct.pack('ii', self.dlen, self.rlen)
192 info = d + d2 + d3 + d4
193 self._write(info)
194 self._writecrc()
196 def _write(self, data):
197 self.crc = binascii.crc_hqx(data, self.crc)
198 self.ofp.write(data)
200 def _writecrc(self):
201 ## self.crc = binascii.crc_hqx('\0\0', self.crc) # XXXX Should this be here??
202 self.ofp.write(struct.pack('h', self.crc))
203 self.crc = 0
205 def write(self, data):
206 if self.state != _DID_HEADER:
207 raise Error, 'Writing data at the wrong time'
208 self.dlen = self.dlen - len(data)
209 self._write(data)
211 def close_data(self):
212 if self.dlen <> 0:
213 raise Error, 'Incorrect data size, diff='+`self.rlen`
214 self._writecrc()
215 self.state = _DID_DATA
217 def write_rsrc(self, data):
218 if self.state < _DID_DATA:
219 self.close_data()
220 if self.state != _DID_DATA:
221 raise Error, 'Writing resource data at the wrong time'
222 self.rlen = self.rlen - len(data)
223 self._write(data)
225 def close(self):
226 if self.state < _DID_DATA:
227 self.close_data()
228 if self.state != _DID_DATA:
229 raise Error, 'Close at the wrong time'
230 if self.rlen <> 0:
231 raise Error, "Incorrect resource-datasize, diff="+`self.rlen`
232 self._writecrc()
233 self.ofp.close()
234 self.state = None
236 def binhex(inp, out):
237 """(infilename, outfilename) - Create binhex-encoded copy of a file"""
238 finfo = getfileinfo(inp)
239 ofp = BinHex(finfo, out)
241 ifp = open(inp, 'rb')
242 # XXXX Do textfile translation on non-mac systems
243 d = ifp.read()
244 ofp.write(d)
245 ofp.close_data()
246 ifp.close()
248 ifp = openrsrc(inp, 'rb')
249 d = ifp.read()
250 ofp.write_rsrc(d)
251 ofp.close()
252 ifp.close()
254 class _Hqxdecoderengine:
255 """Read data via the decoder in 4-byte chunks"""
257 def __init__(self, ifp):
258 self.ifp = ifp
259 self.eof = 0
261 def read(self, totalwtd):
262 """Read at least wtd bytes (or until EOF)"""
263 decdata = ''
264 wtd = totalwtd
266 # The loop here is convoluted, since we don't really now how much
267 # to decode: there may be newlines in the incoming data.
268 while wtd > 0:
269 if self.eof: return decdata
270 wtd = ((wtd+2)/3)*4
271 data = self.ifp.read(wtd)
273 # Next problem: there may not be a complete number of bytes in what we
274 # pass to a2b. Solve by yet another loop.
276 while 1:
277 try:
278 decdatacur, self.eof = binascii.a2b_hqx(data)
279 if self.eof: print 'EOF'
280 break
281 except binascii.Incomplete:
282 pass
283 newdata = self.ifp.read(1)
284 if not newdata:
285 raise Error, 'Premature EOF on binhex file'
286 data = data + newdata
287 decdata = decdata + decdatacur
288 wtd = totalwtd - len(decdata)
289 if not decdata and not self.eof:
290 raise Error, 'Premature EOF on binhex file'
291 return decdata
293 def close(self):
294 self.ifp.close()
296 class _Rledecoderengine:
297 """Read data via the RLE-coder"""
299 def __init__(self, ifp):
300 self.ifp = ifp
301 self.pre_buffer = ''
302 self.post_buffer = ''
303 self.eof = 0
305 def read(self, wtd):
306 if wtd > len(self.post_buffer):
307 self._fill(wtd-len(self.post_buffer))
308 rv = self.post_buffer[:wtd]
309 self.post_buffer = self.post_buffer[wtd:]
310 print 'WTD', wtd, 'GOT', len(rv)
311 return rv
313 def _fill(self, wtd):
315 # Obfuscated code ahead. We keep at least one byte in the pre_buffer,
316 # so we don't stumble over an orphaned RUNCHAR later on. If the
317 # last or second-last char is a RUNCHAR we keep more bytes.
319 self.pre_buffer = self.pre_buffer + self.ifp.read(wtd+2)
320 if self.ifp.eof:
321 self.post_buffer = self.post_buffer + \
322 binascii.rledecode_hqx(self.pre_buffer)
323 self.pre_buffer = ''
324 return
326 lastrle = string.rfind(self.pre_buffer, RUNCHAR)
327 if lastrle > 0 and lastrle == len(self.pre_buffer)-1:
328 # Last byte is an RLE, keep two bytes
329 mark = len(self.pre_buffer)-2
330 elif lastrle > 0 and lastrle == len(self.pre_buffer)-2:
331 # second-last byte is an RLE. Decode all.
332 mark = len(self.pre_buffer)
333 else:
334 mark = len(self.pre_buffer)-1
335 self.post_buffer = self.post_buffer + \
336 binascii.rledecode_hqx(self.pre_buffer[:mark])
337 self.pre_buffer = self.pre_buffer[mark:]
339 def close(self):
340 self.ifp.close()
342 class HexBin:
343 def __init__(self, ifp):
344 if type(ifp) == type(''):
345 ifp = open(ifp)
347 # Find initial colon.
349 while 1:
350 ch = ifp.read(1)
351 if not ch:
352 raise Error, "No binhex data found"
353 if ch == ':':
354 break
355 if ch != '\n':
356 dummy = ifp.readline()
357 if DEBUG:
358 print 'SKIP:', ch+dummy
360 hqxifp = _Hqxdecoderengine(ifp)
361 self.ifp = _Rledecoderengine(hqxifp)
362 self.crc = 0
363 self._readheader()
365 def _read(self, len):
366 data = self.ifp.read(len)
367 self.crc = binascii.crc_hqx(data, self.crc)
368 return data
370 def _checkcrc(self):
371 filecrc = struct.unpack('h', self.ifp.read(2))[0] & 0xffff
372 ## self.crc = binascii.crc_hqx('\0\0', self.crc) # XXXX Is this needed??
373 self.crc = self.crc & 0xffff
374 if DEBUG:
375 print 'DBG CRC %x %x'%(self.crc, filecrc)
376 if filecrc != self.crc:
377 raise Error, 'CRC error, computed %x, read %x'%(self.crc, filecrc)
378 self.crc = 0
380 def _readheader(self):
381 len = self._read(1)
382 fname = self._read(ord(len))
383 rest = self._read(1+4+4+2+4+4)
384 self._checkcrc()
386 type = rest[1:5]
387 creator = rest[5:9]
388 flags = struct.unpack('h', rest[9:11])[0]
389 self.dlen = struct.unpack('l', rest[11:15])[0]
390 self.rlen = struct.unpack('l', rest[15:19])[0]
392 if DEBUG:
393 print 'DATA, RLEN', self.dlen, self.rlen
395 self.FName = fname
396 self.FInfo = FInfo()
397 self.FInfo.Creator = creator
398 self.FInfo.Type = type
399 self.FInfo.Flags = flags
401 self.state = _DID_HEADER
403 def read(self, *n):
404 if self.state != _DID_HEADER:
405 raise Error, 'Read data at wrong time'
406 if n:
407 n = n[0]
408 n = min(n, self.dlen)
409 else:
410 n = self.dlen
411 self.dlen = self.dlen - n
412 return self._read(n)
414 def close_data(self):
415 if self.state != _DID_HEADER:
416 raise Error, 'close_data at wrong time'
417 if self.dlen:
418 dummy = self._read(self.dlen)
419 self._checkcrc()
420 self.state = _DID_DATA
422 def read_rsrc(self, *n):
423 if self.state == _DID_HEADER:
424 self.close_data()
425 if self.state != _DID_DATA:
426 raise Error, 'Read resource data at wrong time'
427 if n:
428 n = n[0]
429 n = min(n, self.rlen)
430 else:
431 n = self.rlen
432 self.rlen = self.rlen - n
433 return self._read(n)
435 def close(self):
436 if self.rlen:
437 dummy = self.read_rsrc(self.rlen)
438 self._checkcrc()
439 self.state = _DID_RSRC
440 self.ifp.close()
442 def hexbin(inp, out):
443 """(infilename, outfilename) - Decode binhexed file"""
444 ifp = HexBin(inp)
445 finfo = ifp.FInfo
446 if not out:
447 out = ifp.FName
448 if os.name == 'mac':
449 ofss = macfs.FSSpec(out)
450 out = ofss.as_pathname()
452 ofp = open(out, 'wb')
453 # XXXX Do translation on non-mac systems
454 d = ifp.read()
455 ofp.write(d)
456 ofp.close()
457 ifp.close_data()
459 d = ifp.read_rsrc()
460 if d:
461 ofp = openrsrc(out, 'wb')
462 ofp.write(d)
463 ofp.close()
465 if os.name == 'mac':
466 nfinfo = ofss.GetFInfo()
467 nfinfo.Creator = finfo.Creator
468 nfinfo.Type = finfo.Type
469 nfinfo.Flags = finfo.Flags
470 ofss.SetFInfo(nfinfo)
472 ifp.close()
474 def _test():
475 if os.name == 'mac':
476 fss, ok = macfs.PromptGetFile('File to convert:')
477 if not ok:
478 sys.exit(0)
479 fname = fss.as_pathname()
480 else:
481 fname = sys.argv[1]
482 #binhex(fname, fname+'.hqx')
483 #hexbin(fname+'.hqx', fname+'.viahqx')
484 hexbin(fname, fname+'.unpacked')
485 sys.exit(1)
487 if __name__ == '__main__':
488 _test()