1 import sys
, os
, struct
, itertools
, array
3 # class PKStream is based on information from Ben Rudiak-Gould:
4 # http://groups.google.com/group/comp.compression/msg/48ea9de6d71a575b
5 # and implementation of Douglas Kane:
6 # http://groups.google.com/group/comp.compression/msg/aa014556d706c525
7 # Archive extraction code was highly influenced by Gavin Claytons Daggerfall Jukebox:
8 # http://www.dfworkshop.net/?page_id=61
10 def file_bytestream(f
):
13 if not temp
: raise StopIteration
14 yield struct
.unpack('B',temp
)[0]
16 class PKStream(object):
17 def __init__(self
, source
):
18 if isinstance(source
, file):
19 self
.bytestream
= file_bytestream(source
)
20 elif hasattr(source
, "next"):
21 self
.bytestream
= source
22 elif hasattr(source
, "__iter__"):
23 self
.bytestream
= source
.__iter
__()
25 raise TypeError("Expected file or iterable object, got %s" % type(source
))
27 self
.prefixed_literals
= self
.read(8)
28 assert(self
.prefixed_literals
==0 or self
.prefixed_literals
==1)
29 self
.dict_bytes
= self
.read(8)
30 assert(self
.dict_bytes
==4 or self
.dict_bytes
==5 or self
.dict_bytes
==6)
31 self
.dict_size
= 2**(self
.dict_bytes
+6)
32 self
.dictionary
= array
.array('B',itertools
.repeat(0,self
.dict_size
))
37 self
.last_byte
= self
.bytestream
.next()
43 temp
= (self
.last_byte
>> (8-self
.bits_read
))&(0xff>>(8-n
))
47 shift
= 8-self
.bits_read
48 res
= [self
.last_byte
>>shift
]
52 res
[-1] = res
[-1] |
((self
.last_byte
<< (8-shift
)) & 0xff)
53 res
.append(self
.last_byte
>> shift
)
56 self
.bits_read
= (8-n
)
58 res
[-1] = res
[-1] |
((self
.last_byte
<< (8-shift
)) & 0xff)
59 res
.append((self
.last_byte
>>shift
)&(0xff>>(8-n
+shift
)))
61 res
[-1] = res
[-1] |
(self
.last_byte
<< (8-shift
)) & (0xff >> (shift
-n
))
63 for i
,v
in enumerate(res
):
67 def read_rev(self
,bits
):
68 value
= self
.read(bits
)
70 for i
in xrange(bits
):
72 temp
= temp | value
& 0x0001
76 def decode_literal(self
):
77 if self
.prefixed_literals
:
78 temp
- self
.read_rev(4)
82 if self
.read(1): #11101
87 if self
.read(1): #11011
92 if self
.read(1): #11001
97 if self
.read(1): #10111
102 return 0x74-self
.read(1)
104 if self
.read(1): #10011
106 if self
.read(1): #100101
111 temp
= self
.read_rev(2)
112 if temp
==0x3: #100011
114 if temp
==0x2: #100010
116 if temp
==0x1: #100001
121 temp
= self
.read_rev(2)
122 if temp
==0x3: #011111
124 if temp
==0x2: #011110
126 if temp
==0x1: #011101
131 if self
.read(1): #01101x
132 return 0x54-self
.read(1)
134 return 0x63-self
.read(1)
136 temp
= self
.read_rev(2)
137 if temp
==0x3: #010111
142 if self
.read(1): #01001
143 if self
.read(1): #010011
148 if self
.read(1): #010001
149 if self
.read(1): #0100011
154 return 0x29-self
.read(1)
156 temp
= self
.read_rev(3)
157 if temp
==0x7: #0011111
159 if temp
==0x6: #0011110
161 if temp
==0x5: #0011101
163 if temp
==0x0: #0011000
168 temp
= self
.read_rev(3)
169 if temp
==0x7: #0010111
171 if temp
==0x6: #0010110
173 if temp
==0x5: #0010101
175 if temp
==0x4: #0010100
177 if temp
==0x3: #0010011
179 if temp
==0x2: #0010010
181 if temp
==0x1: #0010001
186 temp
= self
.read_rev(3)
187 if temp
==0x7: #0001111
189 if temp
==0x6: #0001110
190 if self
.read(1): #00011100
194 if temp
==0x5: #0001101
195 if self
.read(1): #00011011
199 if temp
==0x4: #0001100
200 if self
.read(1): #00011001
204 if temp
==0x3: #0001011x
205 return 0x3a-self
.read(1)
206 if temp
==0x2: #0001010x
207 return 0x48-self
.read(1)
208 if temp
==0x1: #0001001
209 if self
.read(1): #00010011
213 if self
.read(1): #00010001
218 temp
= self
.read_rev(3)
219 if temp
==0x7: #0000111x
220 return 0x79-self
.read(1)
221 if temp
==0x6: #0000110
222 temp
= self
.read_rev(2)
223 if temp
==0x3: #000011011
225 if temp
==0x2: #000011010
227 if temp
==0x1: #000011001
231 if temp
==0x5: #0000101
232 temp
= self
.read_rev(2)
233 if temp
==0x3: #000010111
235 if temp
==0x2: #000010110
237 if temp
==0x1: #000010101
239 if self
.read(1): #0000101001
243 if temp
==0x4: #0000100
244 if self
.read(1): #00001001
245 temp
= self
.read_rev(2)
246 if temp
==0x3: #0000100111
248 if temp
==0x2: #0000100110
250 if temp
==0x1: #0000100101
252 if self
.read(1): #00001001001
256 temp
= self
.read_rev(3)
257 if temp
==0x7: #00001000111
259 if temp
==0x6: #00001000110
261 if temp
==0x5: #00001000101
263 if temp
==0x4: #00001000100
265 if temp
==0x3: #00001000011
267 if temp
==0x2: #00001000010
269 if temp
==0x1: #00001000001
273 if temp
==0x3: #0000011
274 temp
= self
.read_rev(5)
292 if temp
==0x2: #0000010
293 temp
= self
.read_rev(5)
299 if temp
==0x1: #0000001
300 temp
= self
.read_rev(5)
318 return 0x82-self
.read(1)
320 return 0x84-self
.read(1)
322 return 0x86-self
.read(1)
323 return 0x88-self
.read(1)
325 temp
= self
.read_rev(6)
340 def decode_copy_length(self
):
341 temp
= self
.read_rev(2)
343 if self
.read(1): #011
346 if self
.read(1): #0101
351 if self
.read(1): #101
358 if self
.read(1): #001
359 if self
.read(1): #0011
362 if self
.read(1): #00101
365 return 10 + self
.read(1)
367 if self
.read(1): #0001
368 if self
.read(1): #00011xx
369 return 12+self
.read(2)
371 return 16+self
.read(3)
373 temp
= self
.read_rev(2)
374 if temp
==0x3: #000011xxxx
375 return 24+self
.read(4)
376 if temp
==0x2: #000010xxxxx
377 return 40+self
.read(5)
378 if temp
==0x1: #000001xxxxxx
379 return 72+self
.read(6)
381 if self
.read(1): #0000001xxxxxxx
382 return 136+self
.read(7)
384 return 264+self
.read(8)
386 def calc_offset(self
, high
, low
) :
387 return (high
<< low
) | self
.read(low
)
389 def decode_copy_offset(self
, low
):
392 return self
.calc_offset(0x00, low
)
394 if self
.read(1): # 101
395 if self
.read(1): #1011
396 return self
.calc_offset(0x01, low
)
398 return self
.calc_offset(0x02, low
)
400 return self
.calc_offset(0x06-self
.read_rev(2), low
)
402 temp
= self
.read_rev(4)
404 return self
.calc_offset(0x16-temp
, low
)
406 return self
.calc_offset(0x17-self
.read(1), low
)
409 return self
.calc_offset(0x27-self
.read_rev(4), low
)
412 return self
.calc_offset(0x2f-self
.read_rev(3), low
)
414 return self
.calc_offset(0x3f-self
.read_rev(4), low
)
416 def get_next_token(self
):
419 return (0, self
.decode_literal(),0,0)
420 length
= self
.decode_copy_length()
421 if length
==519: # end of stream
426 low
= self
.dict_bytes
427 return (1, 0, length
, self
.decode_copy_offset(low
))
431 apBuffer
= array
.array('B')
433 (tktype
, literal
, length
, offset
) = self
.get_next_token()
435 apBuffer
.append(literal
)
436 self
.dictionary
[self
.current_key
] = literal
437 self
.current_key
+= 1
438 if self
.current_key
== self
.dict_size
:
441 start
= (self
.current_key
-1-offset
)%self
.dict_size
443 nexti
= self
.current_key
447 apBuffer
.append(self
.dictionary
[ind
])
448 self
.dictionary
[nexti
] = self
.dictionary
[ind
]
451 if ind
==self
.current_key
:
453 if ind
==self
.dict_size
:
455 if nexti
== self
.dict_size
:
457 self
.current_key
= nexti
460 def unpack_file(f
, out
, length
):
465 stream
= PKStream(f
).decode()
466 lenout
+= len(stream
)
467 print (lenout
*100/length
),"%\x0d"
471 def unpack_header(f
, at
, names
, offset
):
473 length
, = struct
.unpack('I', f
.read(4))
474 to
= names
[struct
.unpack('I',f
.read(4))[0]]
475 name
= f
.read(13).strip('\x00')
476 start
, = struct
.unpack('I', f
.read(4))
477 out
= os
.path
.join(to
, name
)
479 print "\"%s\" (length: %s bytes)"%(out
,length
)
480 unpack_file(f
, out
, length
)
481 print "File \"%s\" unpacked\n"%out
483 def unpack_archive(archive
, directory
, offset
=0):
484 f
= open(archive
, "rb")
486 start
,end
= struct
.unpack('II', f
.read(8))
487 nfiles
= (end
-start
)/25
488 if not os
.path
.exists(directory
):
491 f
.seek(offset
+start
+4)
492 for i
in xrange(nfiles
):
493 ndirs
=max(ndirs
, struct
.unpack('I',f
.read(4))[0])
497 for i
in xrange(ndirs
+1):
498 name
= f
.read(60).strip('\x00').replace("\\",os
.path
.sep
)
500 to
= os
.path
.join(directory
,name
)
503 if not os
.path
.exists(to
):
506 print "Found %s files in archive %s at offset %s.\n"%(nfiles
,archive
,offset
)
507 for i
in xrange(nfiles
):
508 print "Extracting file %s of %s,"%(i
+1,nfiles
),
509 unpack_header(f
, start
+i
*25, names
, offset
)
512 if __name__
== "__main__":
514 unpack_archive(*map(os
.path
.expandvars
,map(os
.path
.expanduser
,sys
.argv
[1:])))
515 elif len(sys
.argv
)==4:
516 unpack_archive(*map(os
.path
.expandvars
,map(os
.path
.expanduser
,sys
.argv
[1:3])),offset
=int(sys
.argv
[3]))
518 print "usage: python2 unpk.py <archive> <target directory> [offset]"