2 /*-----------------------------------------------------------*/
3 /*--- Block recoverer program for bzip2 ---*/
4 /*--- bzip2recover.c ---*/
5 /*-----------------------------------------------------------*/
8 This program is bzip2recover, a program to attempt data
9 salvage from damaged files created by the accompanying
12 Copyright (C) 1996-2005 Julian R Seward. All rights reserved.
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions
18 1. Redistributions of source code must retain the above copyright
19 notice, this list of conditions and the following disclaimer.
21 2. The origin of this software must not be misrepresented; you must
22 not claim that you wrote the original software. If you use this
23 software in a product, an acknowledgment in the product
24 documentation would be appreciated but is not required.
26 3. Altered source versions must be plainly marked as such, and must
27 not be misrepresented as being the original software.
29 4. The name of the author may not be used to endorse or promote
30 products derived from this software without specific prior written
33 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
39 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
41 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 Julian Seward, Cambridge, UK.
47 bzip2/libbzip2 version 1.0.3 of 15 February 2005
51 This program is a complete hack and should be rewritten
52 properly. It isn't very complicated.
61 /* This program records bit locations in the file to be recovered.
62 That means that if 64-bit ints are not supported, we will not
63 be able to recover .bz2 files over 512MB (2^32 bits) long.
64 On GNU supported platforms, we take advantage of the 64-bit
65 int support to circumvent this problem. Ditto MSVC.
67 This change occurred in version 1.0.2; all prior versions have
71 typedef unsigned long long int MaybeUInt64
;
72 # define MaybeUInt64_FMT "%Lu"
75 typedef unsigned __int64 MaybeUInt64
;
76 # define MaybeUInt64_FMT "%I64u"
78 typedef unsigned int MaybeUInt64
;
79 # define MaybeUInt64_FMT "%u"
83 typedef unsigned int UInt32
;
85 typedef unsigned char UChar
;
87 typedef unsigned char Bool
;
88 #define True ((Bool)1)
89 #define False ((Bool)0)
92 #define BZ_MAX_FILENAME 2000
94 Char inFileName
[BZ_MAX_FILENAME
];
95 Char outFileName
[BZ_MAX_FILENAME
];
96 Char progName
[BZ_MAX_FILENAME
];
98 MaybeUInt64 bytesOut
= 0;
99 MaybeUInt64 bytesIn
= 0;
102 /*---------------------------------------------------*/
103 /*--- Header bytes ---*/
104 /*---------------------------------------------------*/
106 #define BZ_HDR_B 0x42 /* 'B' */
107 #define BZ_HDR_Z 0x5a /* 'Z' */
108 #define BZ_HDR_h 0x68 /* 'h' */
109 #define BZ_HDR_0 0x30 /* '0' */
112 /*---------------------------------------------------*/
113 /*--- I/O errors ---*/
114 /*---------------------------------------------------*/
116 /*---------------------------------------------*/
117 void readError ( void )
120 "%s: I/O error reading `%s', possible reason follows.\n",
121 progName
, inFileName
);
123 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
129 /*---------------------------------------------*/
130 void writeError ( void )
133 "%s: I/O error reading `%s', possible reason follows.\n",
134 progName
, inFileName
);
136 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
142 /*---------------------------------------------*/
143 void mallocFail ( Int32 n
)
146 "%s: malloc failed on request for %d bytes.\n",
148 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
154 /*---------------------------------------------*/
155 void tooManyBlocks ( Int32 max_handled_blocks
)
158 "%s: `%s' appears to contain more than %d blocks\n",
159 progName
, inFileName
, max_handled_blocks
);
161 "%s: and cannot be handled. To fix, increase\n",
164 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
171 /*---------------------------------------------------*/
172 /*--- Bit stream I/O ---*/
173 /*---------------------------------------------------*/
185 /*---------------------------------------------*/
186 BitStream
* bsOpenReadStream ( FILE* stream
)
188 BitStream
*bs
= malloc ( sizeof(BitStream
) );
189 if (bs
== NULL
) mallocFail ( sizeof(BitStream
) );
198 /*---------------------------------------------*/
199 BitStream
* bsOpenWriteStream ( FILE* stream
)
201 BitStream
*bs
= malloc ( sizeof(BitStream
) );
202 if (bs
== NULL
) mallocFail ( sizeof(BitStream
) );
211 /*---------------------------------------------*/
212 void bsPutBit ( BitStream
* bs
, Int32 bit
)
214 if (bs
->buffLive
== 8) {
215 Int32 retVal
= putc ( (UChar
) bs
->buffer
, bs
->handle
);
216 if (retVal
== EOF
) writeError();
219 bs
->buffer
= bit
& 0x1;
221 bs
->buffer
= ( (bs
->buffer
<< 1) | (bit
& 0x1) );
227 /*---------------------------------------------*/
229 Returns 0 or 1, or 2 to indicate EOF.
231 Int32
bsGetBit ( BitStream
* bs
)
233 if (bs
->buffLive
> 0) {
235 return ( ((bs
->buffer
) >> (bs
->buffLive
)) & 0x1 );
237 Int32 retVal
= getc ( bs
->handle
);
238 if ( retVal
== EOF
) {
239 if (errno
!= 0) readError();
244 return ( ((bs
->buffer
) >> 7) & 0x1 );
249 /*---------------------------------------------*/
250 void bsClose ( BitStream
* bs
)
254 if ( bs
->mode
== 'w' ) {
255 while ( bs
->buffLive
< 8 ) {
259 retVal
= putc ( (UChar
) (bs
->buffer
), bs
->handle
);
260 if (retVal
== EOF
) writeError();
262 retVal
= fflush ( bs
->handle
);
263 if (retVal
== EOF
) writeError();
265 retVal
= fclose ( bs
->handle
);
267 if (bs
->mode
== 'w') writeError(); else readError();
273 /*---------------------------------------------*/
274 void bsPutUChar ( BitStream
* bs
, UChar c
)
277 for (i
= 7; i
>= 0; i
--)
278 bsPutBit ( bs
, (((UInt32
) c
) >> i
) & 0x1 );
282 /*---------------------------------------------*/
283 void bsPutUInt32 ( BitStream
* bs
, UInt32 c
)
287 for (i
= 31; i
>= 0; i
--)
288 bsPutBit ( bs
, (c
>> i
) & 0x1 );
292 /*---------------------------------------------*/
293 Bool
endsInBz2 ( Char
* name
)
295 Int32 n
= strlen ( name
);
296 if (n
<= 4) return False
;
305 /*---------------------------------------------------*/
307 /*---------------------------------------------------*/
309 /* This logic isn't really right when it comes to Cygwin. */
311 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
313 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
316 #define BLOCK_HEADER_HI 0x00003141UL
317 #define BLOCK_HEADER_LO 0x59265359UL
319 #define BLOCK_ENDMARK_HI 0x00001772UL
320 #define BLOCK_ENDMARK_LO 0x45385090UL
322 /* Increase if necessary. However, a .bz2 file with > 50000 blocks
323 would have an uncompressed size of at least 40GB, so the chances
324 are low you'll need to up this.
326 #define BZ_MAX_HANDLED_BLOCKS 50000
328 MaybeUInt64 bStart
[BZ_MAX_HANDLED_BLOCKS
];
329 MaybeUInt64 bEnd
[BZ_MAX_HANDLED_BLOCKS
];
330 MaybeUInt64 rbStart
[BZ_MAX_HANDLED_BLOCKS
];
331 MaybeUInt64 rbEnd
[BZ_MAX_HANDLED_BLOCKS
];
333 Int32
main ( Int32 argc
, Char
** argv
)
337 BitStream
* bsIn
, *bsWr
;
338 Int32 b
, wrBlock
, currBlock
, rbCtr
;
339 MaybeUInt64 bitsRead
;
341 UInt32 buffHi
, buffLo
, blockCRC
;
344 strcpy ( progName
, argv
[0] );
345 inFileName
[0] = outFileName
[0] = 0;
348 "bzip2recover 1.0.3: extracts blocks from damaged .bz2 files.\n" );
351 fprintf ( stderr
, "%s: usage is `%s damaged_file_name'.\n",
352 progName
, progName
);
353 switch (sizeof(MaybeUInt64
)) {
356 "\trestrictions on size of recovered file: None\n");
360 "\trestrictions on size of recovered file: 512 MB\n");
362 "\tto circumvent, recompile with MaybeUInt64 as an\n"
363 "\tunsigned 64-bit int.\n");
367 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
368 "configuration error.\n");
374 if (strlen(argv
[1]) >= BZ_MAX_FILENAME
-20) {
376 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
377 progName
, (int)strlen(argv
[1]) );
381 strcpy ( inFileName
, argv
[1] );
383 inFile
= fopen ( inFileName
, "rb" );
384 if (inFile
== NULL
) {
385 fprintf ( stderr
, "%s: can't read `%s'\n", progName
, inFileName
);
389 bsIn
= bsOpenReadStream ( inFile
);
390 fprintf ( stderr
, "%s: searching for block boundaries ...\n", progName
);
395 bStart
[currBlock
] = 0;
400 b
= bsGetBit ( bsIn
);
403 if (bitsRead
>= bStart
[currBlock
] &&
404 (bitsRead
- bStart
[currBlock
]) >= 40) {
405 bEnd
[currBlock
] = bitsRead
-1;
407 fprintf ( stderr
, " block %d runs from " MaybeUInt64_FMT
408 " to " MaybeUInt64_FMT
" (incomplete)\n",
409 currBlock
, bStart
[currBlock
], bEnd
[currBlock
] );
414 buffHi
= (buffHi
<< 1) | (buffLo
>> 31);
415 buffLo
= (buffLo
<< 1) | (b
& 1);
416 if ( ( (buffHi
& 0x0000ffff) == BLOCK_HEADER_HI
417 && buffLo
== BLOCK_HEADER_LO
)
419 ( (buffHi
& 0x0000ffff) == BLOCK_ENDMARK_HI
420 && buffLo
== BLOCK_ENDMARK_LO
)
423 bEnd
[currBlock
] = bitsRead
-49;
428 (bEnd
[currBlock
] - bStart
[currBlock
]) >= 130) {
429 fprintf ( stderr
, " block %d runs from " MaybeUInt64_FMT
430 " to " MaybeUInt64_FMT
"\n",
431 rbCtr
+1, bStart
[currBlock
], bEnd
[currBlock
] );
432 rbStart
[rbCtr
] = bStart
[currBlock
];
433 rbEnd
[rbCtr
] = bEnd
[currBlock
];
436 if (currBlock
>= BZ_MAX_HANDLED_BLOCKS
)
437 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS
);
440 bStart
[currBlock
] = bitsRead
;
446 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
450 "%s: sorry, I couldn't find any block boundaries.\n",
455 fprintf ( stderr
, "%s: splitting into blocks\n", progName
);
457 inFile
= fopen ( inFileName
, "rb" );
458 if (inFile
== NULL
) {
459 fprintf ( stderr
, "%s: can't open `%s'\n", progName
, inFileName
);
462 bsIn
= bsOpenReadStream ( inFile
);
464 /*-- placate gcc's dataflow analyser --*/
465 blockCRC
= 0; bsWr
= 0;
473 buffHi
= (buffHi
<< 1) | (buffLo
>> 31);
474 buffLo
= (buffLo
<< 1) | (b
& 1);
475 if (bitsRead
== 47+rbStart
[wrBlock
])
476 blockCRC
= (buffHi
<< 16) | (buffLo
>> 16);
478 if (outFile
!= NULL
&& bitsRead
>= rbStart
[wrBlock
]
479 && bitsRead
<= rbEnd
[wrBlock
]) {
480 bsPutBit ( bsWr
, b
);
485 if (bitsRead
== rbEnd
[wrBlock
]+1) {
486 if (outFile
!= NULL
) {
487 bsPutUChar ( bsWr
, 0x17 ); bsPutUChar ( bsWr
, 0x72 );
488 bsPutUChar ( bsWr
, 0x45 ); bsPutUChar ( bsWr
, 0x38 );
489 bsPutUChar ( bsWr
, 0x50 ); bsPutUChar ( bsWr
, 0x90 );
490 bsPutUInt32 ( bsWr
, blockCRC
);
493 if (wrBlock
>= rbCtr
) break;
496 if (bitsRead
== rbStart
[wrBlock
]) {
497 /* Create the output file name, correctly handling leading paths.
498 (31.10.2001 by Sergey E. Kusikov) */
501 for (k
= 0; k
< BZ_MAX_FILENAME
; k
++)
503 strcpy (outFileName
, inFileName
);
504 split
= strrchr (outFileName
, BZ_SPLIT_SYM
);
510 /* Now split points to the start of the basename. */
511 ofs
= split
- outFileName
;
512 sprintf (split
, "rec%5d", wrBlock
+1);
513 for (p
= split
; *p
!= 0; p
++) if (*p
== ' ') *p
= '0';
514 strcat (outFileName
, inFileName
+ ofs
);
516 if ( !endsInBz2(outFileName
)) strcat ( outFileName
, ".bz2" );
518 fprintf ( stderr
, " writing block %d to `%s' ...\n",
519 wrBlock
+1, outFileName
);
521 outFile
= fopen ( outFileName
, "wb" );
522 if (outFile
== NULL
) {
523 fprintf ( stderr
, "%s: can't write `%s'\n",
524 progName
, outFileName
);
527 bsWr
= bsOpenWriteStream ( outFile
);
528 bsPutUChar ( bsWr
, BZ_HDR_B
);
529 bsPutUChar ( bsWr
, BZ_HDR_Z
);
530 bsPutUChar ( bsWr
, BZ_HDR_h
);
531 bsPutUChar ( bsWr
, BZ_HDR_0
+ 9 );
532 bsPutUChar ( bsWr
, 0x31 ); bsPutUChar ( bsWr
, 0x41 );
533 bsPutUChar ( bsWr
, 0x59 ); bsPutUChar ( bsWr
, 0x26 );
534 bsPutUChar ( bsWr
, 0x53 ); bsPutUChar ( bsWr
, 0x59 );
538 fprintf ( stderr
, "%s: finished\n", progName
);
544 /*-----------------------------------------------------------*/
545 /*--- end bzip2recover.c ---*/
546 /*-----------------------------------------------------------*/