updated on Thu Jan 26 12:02:26 UTC 2012
[aur-mirror.git] / daggerfall / unpk.py
bloba6445324c01f2cefae56f1467734f57206834892
1 import sys, os, struct, itertools, array
3 # class PKStream is based on information from Ben Rudiak-Gould:
4 # http://groups.google.com/group/comp.compression/msg/48ea9de6d71a575b
5 # and implementation of Douglas Kane:
6 # http://groups.google.com/group/comp.compression/msg/aa014556d706c525
7 # Archive extraction code was highly influenced by Gavin Claytons Daggerfall Jukebox:
8 # http://www.dfworkshop.net/?page_id=61
10 def file_bytestream(f):
11 while True:
12 temp = f.read(1)
13 if not temp: raise StopIteration
14 yield struct.unpack('B',temp)[0]
16 class PKStream(object):
17 def __init__(self, source):
18 if isinstance(source, file):
19 self.bytestream = file_bytestream(source)
20 elif hasattr(source, "next"):
21 self.bytestream = source
22 elif hasattr(source, "__iter__"):
23 self.bytestream = source.__iter__()
24 else:
25 raise TypeError("Expected file or iterable object, got %s" % type(source))
26 self.bits_read = 0
27 self.prefixed_literals = self.read(8)
28 assert(self.prefixed_literals==0 or self.prefixed_literals==1)
29 self.dict_bytes = self.read(8)
30 assert(self.dict_bytes==4 or self.dict_bytes==5 or self.dict_bytes==6)
31 self.dict_size = 2**(self.dict_bytes+6)
32 self.dictionary = array.array('B',itertools.repeat(0,self.dict_size))
33 self.current_key = 0
35 def read_byte(self):
36 self.bits_read = 8
37 self.last_byte = self.bytestream.next()
39 def read(self, n):
40 if self.bits_read==0:
41 self.read_byte()
42 if n<=self.bits_read:
43 temp = (self.last_byte >> (8-self.bits_read))&(0xff>>(8-n))
44 self.bits_read -= n
45 return temp
46 else:
47 shift = 8-self.bits_read
48 res = [self.last_byte>>shift]
49 n -= self.bits_read
50 self.read_byte()
51 while n>8:
52 res[-1] = res[-1] | ((self.last_byte << (8-shift)) & 0xff)
53 res.append(self.last_byte >> shift)
54 self.read_byte()
55 n-=8
56 self.bits_read = (8-n)
57 if n>shift:
58 res[-1] = res[-1] | ((self.last_byte << (8-shift)) & 0xff)
59 res.append((self.last_byte>>shift)&(0xff>>(8-n+shift)))
60 else:
61 res[-1] = res[-1] | (self.last_byte << (8-shift)) & (0xff >> (shift-n))
62 temp = 0
63 for i,v in enumerate(res):
64 temp += v<<(i*8)
65 return temp
67 def read_rev(self,bits):
68 value = self.read(bits)
69 temp = 0
70 for i in xrange(bits):
71 temp = temp << 1
72 temp = temp | value & 0x0001
73 value = value >> 1
74 return temp
76 def decode_literal(self):
77 if self.prefixed_literals:
78 temp - self.read_rev(4)
79 if temp==0xf: #1111
80 return 0x20
81 if temp==0xe: #1110
82 if self.read(1): #11101
83 return 0x45
84 #11100
85 return 0x61
86 if temp==0xd: #1101
87 if self.read(1): #11011
88 return 0x65
89 #11010
90 return 0x69
91 if temp==0xc: #1100
92 if self.read(1): #11001
93 return 0x6c
94 #11000
95 return 0x6e
96 if temp==0xb: #1011
97 if self.read(1): #10111
98 return 0x6f
99 #10110
100 return 0x72
101 if temp==0xa: #1010x
102 return 0x74-self.read(1)
103 if temp==0x9: #1001
104 if self.read(1): #10011
105 return 0x75
106 if self.read(1): #100101
107 return 0x2d
108 #100100
109 return 0x31
110 if temp==0x8: #1000
111 temp = self.read_rev(2)
112 if temp==0x3: #100011
113 return 0x41
114 if temp==0x2: #100010
115 return 0x43
116 if temp==0x1: #100001
117 return 0x44
118 #100000
119 return 0x49
120 if temp==0x7: #0111
121 temp = self.read_rev(2)
122 if temp==0x3: #011111
123 return 0x4c
124 if temp==0x2: #011110
125 return 0x4e
126 if temp==0x1: #011101
127 return 0x4f
128 #011100
129 return 0x52
130 if temp==0x6: #0110
131 if self.read(1): #01101x
132 return 0x54-self.read(1)
133 #01100x
134 return 0x63-self.read(1)
135 if temp==0x5: #0101
136 temp = self.read_rev(2)
137 if temp==0x3: #010111
138 return 0x64
139 #0101xx
140 return 0x68-temp
141 if temp==0x4: #0100
142 if self.read(1): #01001
143 if self.read(1): #010011
144 return 0x6d
145 #010010
146 return 0x70
147 #01000
148 if self.read(1): #010001
149 if self.read(1): #0100011
150 return 0x0a
151 #0100010
152 return 0x0d
153 #010000x
154 return 0x29-self.read(1)
155 if temp==0x3: #0011
156 temp = self.read_rev(3)
157 if temp==0x7: #0011111
158 return 0x2c
159 if temp==0x6: #0011110
160 return 0x2e
161 if temp==0x5: #0011101
162 return 0x30
163 if temp==0x0: #0011000
164 return 0x37
165 #0011xxx
166 return 0x36-temp
167 if temp==0x2: #0010
168 temp = self.read_rev(3)
169 if temp==0x7: #0010111
170 return 0x38
171 if temp==0x6: #0010110
172 return 0x3d
173 if temp==0x5: #0010101
174 return 0x42
175 if temp==0x4: #0010100
176 return 0x46
177 if temp==0x3: #0010011
178 return 0x4d
179 if temp==0x2: #0010010
180 return 0x50
181 if temp==0x1: #0010001
182 return 0x55
183 #0010000
184 return 0x6b
185 if temp==0x1: #0001
186 temp = self.read_rev(3)
187 if temp==0x7: #0001111
188 return 0x77
189 if temp==0x6: #0001110
190 if self.read(1): #00011100
191 return 0x09
192 #00011101
193 return 0x22
194 if temp==0x5: #0001101
195 if self.read(1): #00011011
196 return 0x27
197 #00011010
198 return 0x2a
199 if temp==0x4: #0001100
200 if self.read(1): #00011001
201 return 0x2f
202 #00011000
203 return 0x36
204 if temp==0x3: #0001011x
205 return 0x3a-self.read(1)
206 if temp==0x2: #0001010x
207 return 0x48-self.read(1)
208 if temp==0x1: #0001001
209 if self.read(1): #00010011
210 return 0x57
211 #00010010
212 return 0x5b
213 if self.read(1): #00010001
214 return 0x5f
215 #00010000
216 return 0x76
217 #0000
218 temp = self.read_rev(3)
219 if temp==0x7: #0000111x
220 return 0x79-self.read(1)
221 if temp==0x6: #0000110
222 temp = self.read_rev(2)
223 if temp==0x3: #000011011
224 return 0x2b
225 if temp==0x2: #000011010
226 return 0x3e
227 if temp==0x1: #000011001
228 return 0x4b
229 #000011000
230 return 0x56
231 if temp==0x5: #0000101
232 temp = self.read_rev(2)
233 if temp==0x3: #000010111
234 return 0x58
235 if temp==0x2: #000010110
236 return 0x59
237 if temp==0x1: #000010101
238 return 0x5d
239 if self.read(1): #0000101001
240 return 0x21
241 #0000101000
242 return 0x24
243 if temp==0x4: #0000100
244 if self.read(1): #00001001
245 temp = self.read_rev(2)
246 if temp==0x3: #0000100111
247 return 0x26
248 if temp==0x2: #0000100110
249 return 0x71
250 if temp==0x1: #0000100101
251 return 0x7a
252 if self.read(1): #00001001001
253 return 0x00
254 #00001001000
255 return 0x3c
256 temp = self.read_rev(3)
257 if temp==0x7: #00001000111
258 return 0x3f
259 if temp==0x6: #00001000110
260 return 0x4a
261 if temp==0x5: #00001000101
262 return 0x51
263 if temp==0x4: #00001000100
264 return 0x5a
265 if temp==0x3: #00001000011
266 return 0x5c
267 if temp==0x2: #00001000010
268 return 0x6a
269 if temp==0x1: #00001000001
270 return 0x7b
271 #00001000000
272 return 0x7c
273 if temp==0x3: #0000011
274 temp = self.read_rev(5)
275 if temp>=0x18:
276 return 0x20-temp
277 if temp>=0x16:
278 return 0x22-temp
279 if temp>=0x0a:
280 return 0x23-temp
281 if temp>=0x05:
282 return 0x24-temp
283 if temp==0x04:
284 return 0x23
285 if temp==0x03:
286 return 0x25
287 if temp==0x02:
288 return 0x3b
289 if temp==0x01:
290 return 0x40
291 return 0x5e
292 if temp==0x2: #0000010
293 temp = self.read_rev(5)
294 if temp==0x1f:
295 return 0x60
296 if temp>=0x1c:
297 return 0x9b-temp
298 return 0xcb-temp
299 if temp==0x1: #0000001
300 temp = self.read_rev(5)
301 if temp>=0x0c:
302 return 0xeb-temp
303 if temp==0x0b:
304 return 0xe1
305 if temp==0x0a:
306 return 0xe5
307 if temp==0x09:
308 return 0xe9
309 if temp==0x08:
310 return 0xee
311 if temp>=0x05:
312 return 0xf9-temp
313 if temp==0x04:
314 if self.read(1):
315 return 0x1a
316 return 0x80
317 if temp==0x03:
318 return 0x82-self.read(1)
319 if temp==0x02:
320 return 0x84-self.read(1)
321 if temp==0x01:
322 return 0x86-self.read(1)
323 return 0x88-self.read(1)
324 #0000000
325 temp = self.read_rev(6)
326 if temp>=0x19:
327 return 0xc8-temp
328 if temp==0x18:
329 return 0xe0
330 if temp>=0x15:
331 return 0xf9-temp
332 if temp>=0x12:
333 return 0xfa-temp
334 if temp>=0x0e:
335 return 0xfb-temp
336 if temp>=0x0b:
337 return 0xfc-temp
338 return 0xff-temp
339 return self.read(8)
340 def decode_copy_length(self):
341 temp = self.read_rev(2)
342 if temp==0x1: #01
343 if self.read(1): #011
344 return 5
345 #010
346 if self.read(1): #0101
347 return 6
348 #0100
349 return 7
350 if temp==0x2: # 10
351 if self.read(1): #101
352 return 2
353 #100
354 return 4
355 if temp==0x3: #11
356 return 3
358 if self.read(1): #001
359 if self.read(1): #0011
360 return 8
361 #0010
362 if self.read(1): #00101
363 return 9
364 #00100x
365 return 10 + self.read(1)
366 #000
367 if self.read(1): #0001
368 if self.read(1): #00011xx
369 return 12+self.read(2)
370 #00010xxx
371 return 16+self.read(3)
372 #0000
373 temp = self.read_rev(2)
374 if temp==0x3: #000011xxxx
375 return 24+self.read(4)
376 if temp==0x2: #000010xxxxx
377 return 40+self.read(5)
378 if temp==0x1: #000001xxxxxx
379 return 72+self.read(6)
380 #000000
381 if self.read(1): #0000001xxxxxxx
382 return 136+self.read(7)
383 #0000000
384 return 264+self.read(8)
386 def calc_offset(self, high, low) :
387 return (high << low) | self.read(low)
389 def decode_copy_offset(self, low):
390 temp = self.read(2)
391 if temp==0x3: #11
392 return self.calc_offset(0x00, low)
393 if temp==0x1: #10
394 if self.read(1): # 101
395 if self.read(1): #1011
396 return self.calc_offset(0x01, low)
397 #1010
398 return self.calc_offset(0x02, low)
399 #100
400 return self.calc_offset(0x06-self.read_rev(2), low)
401 if temp==0x2: #01
402 temp = self.read_rev(4)
403 if temp: # 01xxxx
404 return self.calc_offset(0x16-temp, low)
405 # 010000
406 return self.calc_offset(0x17-self.read(1), low)
408 if self.read(1):
409 return self.calc_offset(0x27-self.read_rev(4), low)
410 #000
411 if self.read(1):
412 return self.calc_offset(0x2f-self.read_rev(3), low)
413 #0000
414 return self.calc_offset(0x3f-self.read_rev(4), low)
416 def get_next_token(self):
417 temp = self.read(1)
418 if temp==0:
419 return (0, self.decode_literal(),0,0)
420 length = self.decode_copy_length()
421 if length==519: # end of stream
422 return (-1,0,0,0)
423 if length==2:
424 low = 2
425 else:
426 low = self.dict_bytes
427 return (1, 0, length, self.decode_copy_offset(low))
429 def decode(self):
430 tktype = 0
431 apBuffer = array.array('B')
432 while tktype>=0:
433 (tktype, literal, length, offset) = self.get_next_token()
434 if tktype==0:
435 apBuffer.append(literal)
436 self.dictionary[self.current_key] = literal
437 self.current_key += 1
438 if self.current_key == self.dict_size:
439 self.current_key = 0
440 elif tktype==1:
441 start = (self.current_key-1-offset)%self.dict_size
442 ind = start
443 nexti = self.current_key
444 copies = 0
445 while copies<length:
446 copies += 1
447 apBuffer.append(self.dictionary[ind])
448 self.dictionary[nexti] = self.dictionary[ind]
449 nexti += 1
450 ind += 1
451 if ind==self.current_key:
452 ind = start
453 if ind==self.dict_size:
454 ind = 0
455 if nexti == self.dict_size:
456 nexti = 0
457 self.current_key = nexti
458 return apBuffer
460 def unpack_file(f, out, length):
461 g = open(out, "wb")
462 lenout = 0
463 while lenout<length:
464 f.seek(36,1)
465 stream = PKStream(f).decode()
466 lenout += len(stream)
467 print (lenout*100/length),"%\x0d"
468 stream.write(g)
469 g.close()
471 def unpack_header(f, at, names, offset):
472 f.seek(offset+at)
473 length, = struct.unpack('I', f.read(4))
474 to = names[struct.unpack('I',f.read(4))[0]]
475 name = f.read(13).strip('\x00')
476 start, = struct.unpack('I', f.read(4))
477 out = os.path.join(to, name)
478 f.seek(offset+start)
479 print "\"%s\" (length: %s bytes)"%(out,length)
480 unpack_file(f, out, length)
481 print "File \"%s\" unpacked\n"%out
483 def unpack_archive(archive, directory, offset=0):
484 f = open(archive, "rb")
485 f.seek(offset)
486 start,end = struct.unpack('II', f.read(8))
487 nfiles = (end-start)/25
488 if not os.path.exists(directory):
489 os.mkdir(directory)
490 ndirs = 0
491 f.seek(offset+start+4)
492 for i in xrange(nfiles):
493 ndirs=max(ndirs, struct.unpack('I',f.read(4))[0])
494 f.seek(21,1)
495 names = []
496 f.seek(offset+end)
497 for i in xrange(ndirs+1):
498 name = f.read(60).strip('\x00').replace("\\",os.path.sep)
499 if name!=".":
500 to = os.path.join(directory,name)
501 else:
502 to = directory
503 if not os.path.exists(to):
504 os.mkdir(to)
505 names.append(to)
506 print "Found %s files in archive %s at offset %s.\n"%(nfiles,archive,offset)
507 for i in xrange(nfiles):
508 print "Extracting file %s of %s,"%(i+1,nfiles),
509 unpack_header(f, start+i*25, names, offset)
510 f.close()
512 if __name__ == "__main__":
513 if len(sys.argv)==3:
514 unpack_archive(*map(os.path.expandvars,map(os.path.expanduser,sys.argv[1:])))
515 elif len(sys.argv)==4:
516 unpack_archive(*map(os.path.expandvars,map(os.path.expanduser,sys.argv[1:3])),offset=int(sys.argv[3]))
517 else:
518 print "usage: python2 unpk.py <archive> <target directory> [offset]"