1 /* $NetBSD: bzip2recover.c,v 1.6 2008/03/18 14:47:07 christos Exp $ */
4 /*-----------------------------------------------------------*/
5 /*--- Block recoverer program for bzip2 ---*/
6 /*--- bzip2recover.c ---*/
7 /*-----------------------------------------------------------*/
9 /* ------------------------------------------------------------------
10 This file is part of bzip2/libbzip2, a program and library for
11 lossless, block-sorting data compression.
13 bzip2/libbzip2 version 1.0.5 of 10 December 2007
14 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
16 Please read the WARNING, DISCLAIMER and PATENTS sections in the
19 This program is released under the terms of the license contained
21 ------------------------------------------------------------------ */
23 /* This program is a complete hack and should be rewritten properly.
24 It isn't very complicated. */
32 /* This program records bit locations in the file to be recovered.
33 That means that if 64-bit ints are not supported, we will not
34 be able to recover .bz2 files over 512MB (2^32 bits) long.
35 On GNU supported platforms, we take advantage of the 64-bit
36 int support to circumvent this problem. Ditto MSVC.
38 This change occurred in version 1.0.2; all prior versions have
42 typedef unsigned long long int MaybeUInt64
;
43 # define MaybeUInt64_FMT "%Lu"
46 typedef unsigned __int64 MaybeUInt64
;
47 # define MaybeUInt64_FMT "%I64u"
49 typedef unsigned int MaybeUInt64
;
50 # define MaybeUInt64_FMT "%u"
54 typedef unsigned int UInt32
;
56 typedef unsigned char UChar
;
58 typedef unsigned char Bool
;
59 #define True ((Bool)1)
60 #define False ((Bool)0)
63 #define BZ_MAX_FILENAME 2000
65 Char inFileName
[BZ_MAX_FILENAME
];
66 Char outFileName
[BZ_MAX_FILENAME
];
67 Char progName
[BZ_MAX_FILENAME
];
69 MaybeUInt64 bytesOut
= 0;
70 MaybeUInt64 bytesIn
= 0;
72 /*---------------------------------------------------*/
73 /*--- Bit stream I/O ---*/
74 /*---------------------------------------------------*/
85 static void readError ( void );
86 static void writeError ( void );
87 static void mallocFail ( Int32 n
);
88 static BitStream
* bsOpenReadStream ( FILE* stream
);
89 static BitStream
* bsOpenWriteStream ( FILE* stream
);
90 static void bsPutBit ( BitStream
* bs
, Int32 bit
);
91 static Int32
bsGetBit ( BitStream
* bs
);
92 static void bsClose ( BitStream
* bs
);
93 static void bsPutUChar ( BitStream
* bs
, UChar c
);
94 static void bsPutUInt32 ( BitStream
* bs
, UInt32 c
);
95 static Bool
endsInBz2 ( Char
* name
);
96 static void tooManyBlocks ( Int32 max_handled_blocks
);
99 /*---------------------------------------------------*/
100 /*--- Header bytes ---*/
101 /*---------------------------------------------------*/
103 #define BZ_HDR_B 0x42 /* 'B' */
104 #define BZ_HDR_Z 0x5a /* 'Z' */
105 #define BZ_HDR_h 0x68 /* 'h' */
106 #define BZ_HDR_0 0x30 /* '0' */
109 /*---------------------------------------------------*/
110 /*--- I/O errors ---*/
111 /*---------------------------------------------------*/
113 /*---------------------------------------------*/
114 static void readError ( void )
117 "%s: I/O error reading `%s', possible reason follows.\n",
118 progName
, inFileName
);
120 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
126 /*---------------------------------------------*/
127 static void writeError ( void )
130 "%s: I/O error reading `%s', possible reason follows.\n",
131 progName
, inFileName
);
133 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
139 /*---------------------------------------------*/
140 static void mallocFail ( Int32 n
)
143 "%s: malloc failed on request for %d bytes.\n",
145 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
151 /*---------------------------------------------*/
152 static void tooManyBlocks ( Int32 max_handled_blocks
)
155 "%s: `%s' appears to contain more than %d blocks\n",
156 progName
, inFileName
, max_handled_blocks
);
158 "%s: and cannot be handled. To fix, increase\n",
161 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
168 /*---------------------------------------------*/
169 static BitStream
* bsOpenReadStream ( FILE* stream
)
171 BitStream
*bs
= malloc ( sizeof(BitStream
) );
172 if (bs
== NULL
) mallocFail ( sizeof(BitStream
) );
181 /*---------------------------------------------*/
182 static BitStream
* bsOpenWriteStream ( FILE* stream
)
184 BitStream
*bs
= malloc ( sizeof(BitStream
) );
185 if (bs
== NULL
) mallocFail ( sizeof(BitStream
) );
194 /*---------------------------------------------*/
195 static void bsPutBit ( BitStream
* bs
, Int32 bit
)
197 if (bs
->buffLive
== 8) {
198 Int32 retVal
= putc ( (UChar
) bs
->buffer
, bs
->handle
);
199 if (retVal
== EOF
) writeError();
202 bs
->buffer
= bit
& 0x1;
204 bs
->buffer
= ( (bs
->buffer
<< 1) | (bit
& 0x1) );
210 /*---------------------------------------------*/
212 Returns 0 or 1, or 2 to indicate EOF.
214 static Int32
bsGetBit ( BitStream
* bs
)
216 if (bs
->buffLive
> 0) {
218 return ( ((bs
->buffer
) >> (bs
->buffLive
)) & 0x1 );
220 Int32 retVal
= getc ( bs
->handle
);
221 if ( retVal
== EOF
) {
222 if (errno
!= 0) readError();
227 return ( ((bs
->buffer
) >> 7) & 0x1 );
232 /*---------------------------------------------*/
233 static void bsClose ( BitStream
* bs
)
237 if ( bs
->mode
== 'w' ) {
238 while ( bs
->buffLive
< 8 ) {
242 retVal
= putc ( (UChar
) (bs
->buffer
), bs
->handle
);
243 if (retVal
== EOF
) writeError();
245 retVal
= fflush ( bs
->handle
);
246 if (retVal
== EOF
) writeError();
248 retVal
= fclose ( bs
->handle
);
250 if (bs
->mode
== 'w') writeError(); else readError();
256 /*---------------------------------------------*/
257 static void bsPutUChar ( BitStream
* bs
, UChar c
)
260 for (i
= 7; i
>= 0; i
--)
261 bsPutBit ( bs
, (((UInt32
) c
) >> i
) & 0x1 );
265 /*---------------------------------------------*/
266 static void bsPutUInt32 ( BitStream
* bs
, UInt32 c
)
270 for (i
= 31; i
>= 0; i
--)
271 bsPutBit ( bs
, (c
>> i
) & 0x1 );
275 /*---------------------------------------------*/
276 static Bool
endsInBz2 ( Char
* name
)
278 Int32 n
= strlen ( name
);
279 if (n
<= 4) return False
;
288 /*---------------------------------------------------*/
290 /*---------------------------------------------------*/
292 /* This logic isn't really right when it comes to Cygwin. */
294 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
296 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
299 #define BLOCK_HEADER_HI 0x00003141UL
300 #define BLOCK_HEADER_LO 0x59265359UL
302 #define BLOCK_ENDMARK_HI 0x00001772UL
303 #define BLOCK_ENDMARK_LO 0x45385090UL
305 /* Increase if necessary. However, a .bz2 file with > 50000 blocks
306 would have an uncompressed size of at least 40GB, so the chances
307 are low you'll need to up this.
309 #define BZ_MAX_HANDLED_BLOCKS 50000
311 MaybeUInt64 bStart
[BZ_MAX_HANDLED_BLOCKS
];
312 MaybeUInt64 bEnd
[BZ_MAX_HANDLED_BLOCKS
];
313 MaybeUInt64 rbStart
[BZ_MAX_HANDLED_BLOCKS
];
314 MaybeUInt64 rbEnd
[BZ_MAX_HANDLED_BLOCKS
];
316 Int32
main ( Int32 argc
, Char
** argv
)
320 BitStream
* bsIn
, *bsWr
;
321 Int32 b
, wrBlock
, currBlock
, rbCtr
;
322 MaybeUInt64 bitsRead
;
324 UInt32 buffHi
, buffLo
, blockCRC
;
327 strcpy ( progName
, argv
[0] );
328 inFileName
[0] = outFileName
[0] = 0;
331 "bzip2recover 1.0.5: extracts blocks from damaged .bz2 files.\n" );
334 fprintf ( stderr
, "%s: usage is `%s damaged_file_name'.\n",
335 progName
, progName
);
336 switch (sizeof(MaybeUInt64
)) {
339 "\trestrictions on size of recovered file: None\n");
343 "\trestrictions on size of recovered file: 512 MB\n");
345 "\tto circumvent, recompile with MaybeUInt64 as an\n"
346 "\tunsigned 64-bit int.\n");
350 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
351 "configuration error.\n");
357 if (strlen(argv
[1]) >= BZ_MAX_FILENAME
-20) {
359 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
360 progName
, (int)strlen(argv
[1]) );
364 strcpy ( inFileName
, argv
[1] );
366 inFile
= fopen ( inFileName
, "rb" );
367 if (inFile
== NULL
) {
368 fprintf ( stderr
, "%s: can't read `%s'\n", progName
, inFileName
);
372 bsIn
= bsOpenReadStream ( inFile
);
373 fprintf ( stderr
, "%s: searching for block boundaries ...\n", progName
);
378 bStart
[currBlock
] = 0;
383 b
= bsGetBit ( bsIn
);
386 if (bitsRead
>= bStart
[currBlock
] &&
387 (bitsRead
- bStart
[currBlock
]) >= 40) {
388 bEnd
[currBlock
] = bitsRead
-1;
390 fprintf ( stderr
, " block %d runs from " MaybeUInt64_FMT
391 " to " MaybeUInt64_FMT
" (incomplete)\n",
392 currBlock
, bStart
[currBlock
], bEnd
[currBlock
] );
397 buffHi
= (buffHi
<< 1) | (buffLo
>> 31);
398 buffLo
= (buffLo
<< 1) | (b
& 1);
399 if ( ( (buffHi
& 0x0000ffff) == BLOCK_HEADER_HI
400 && buffLo
== BLOCK_HEADER_LO
)
402 ( (buffHi
& 0x0000ffff) == BLOCK_ENDMARK_HI
403 && buffLo
== BLOCK_ENDMARK_LO
)
406 bEnd
[currBlock
] = bitsRead
-49;
411 (bEnd
[currBlock
] - bStart
[currBlock
]) >= 130) {
412 fprintf ( stderr
, " block %d runs from " MaybeUInt64_FMT
413 " to " MaybeUInt64_FMT
"\n",
414 rbCtr
+1, bStart
[currBlock
], bEnd
[currBlock
] );
415 rbStart
[rbCtr
] = bStart
[currBlock
];
416 rbEnd
[rbCtr
] = bEnd
[currBlock
];
419 if (currBlock
>= BZ_MAX_HANDLED_BLOCKS
)
420 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS
);
423 bStart
[currBlock
] = bitsRead
;
429 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
433 "%s: sorry, I couldn't find any block boundaries.\n",
438 fprintf ( stderr
, "%s: splitting into blocks\n", progName
);
440 inFile
= fopen ( inFileName
, "rb" );
441 if (inFile
== NULL
) {
442 fprintf ( stderr
, "%s: can't open `%s'\n", progName
, inFileName
);
445 bsIn
= bsOpenReadStream ( inFile
);
447 /*-- placate gcc's dataflow analyser --*/
448 blockCRC
= 0; bsWr
= 0;
456 buffHi
= (buffHi
<< 1) | (buffLo
>> 31);
457 buffLo
= (buffLo
<< 1) | (b
& 1);
458 if (bitsRead
== 47+rbStart
[wrBlock
])
459 blockCRC
= (buffHi
<< 16) | (buffLo
>> 16);
461 if (outFile
!= NULL
&& bitsRead
>= rbStart
[wrBlock
]
462 && bitsRead
<= rbEnd
[wrBlock
]) {
463 bsPutBit ( bsWr
, b
);
468 if (bitsRead
== rbEnd
[wrBlock
]+1) {
469 if (outFile
!= NULL
) {
470 bsPutUChar ( bsWr
, 0x17 ); bsPutUChar ( bsWr
, 0x72 );
471 bsPutUChar ( bsWr
, 0x45 ); bsPutUChar ( bsWr
, 0x38 );
472 bsPutUChar ( bsWr
, 0x50 ); bsPutUChar ( bsWr
, 0x90 );
473 bsPutUInt32 ( bsWr
, blockCRC
);
476 if (wrBlock
>= rbCtr
) break;
479 if (bitsRead
== rbStart
[wrBlock
]) {
480 /* Create the output file name, correctly handling leading paths.
481 (31.10.2001 by Sergey E. Kusikov) */
484 for (k
= 0; k
< BZ_MAX_FILENAME
; k
++)
486 strcpy (outFileName
, inFileName
);
487 split
= strrchr (outFileName
, BZ_SPLIT_SYM
);
493 /* Now split points to the start of the basename. */
494 ofs
= split
- outFileName
;
495 sprintf (split
, "rec%5d", wrBlock
+1);
496 for (p
= split
; *p
!= 0; p
++) if (*p
== ' ') *p
= '0';
497 strcat (outFileName
, inFileName
+ ofs
);
499 if ( !endsInBz2(outFileName
)) strcat ( outFileName
, ".bz2" );
501 fprintf ( stderr
, " writing block %d to `%s' ...\n",
502 wrBlock
+1, outFileName
);
504 outFile
= fopen ( outFileName
, "wb" );
505 if (outFile
== NULL
) {
506 fprintf ( stderr
, "%s: can't write `%s'\n",
507 progName
, outFileName
);
510 bsWr
= bsOpenWriteStream ( outFile
);
511 bsPutUChar ( bsWr
, BZ_HDR_B
);
512 bsPutUChar ( bsWr
, BZ_HDR_Z
);
513 bsPutUChar ( bsWr
, BZ_HDR_h
);
514 bsPutUChar ( bsWr
, BZ_HDR_0
+ 9 );
515 bsPutUChar ( bsWr
, 0x31 ); bsPutUChar ( bsWr
, 0x41 );
516 bsPutUChar ( bsWr
, 0x59 ); bsPutUChar ( bsWr
, 0x26 );
517 bsPutUChar ( bsWr
, 0x53 ); bsPutUChar ( bsWr
, 0x59 );
521 fprintf ( stderr
, "%s: finished\n", progName
);
527 /*-----------------------------------------------------------*/
528 /*--- end bzip2recover.c ---*/
529 /*-----------------------------------------------------------*/