1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_cpio
);
11 #define SUBFMT_BINARY_LE 1
12 #define SUBFMT_BINARY_BE 2
13 #define SUBFMT_ASCII_PORTABLE 3
14 #define SUBFMT_ASCII_NEW 4
15 #define SUBFMT_ASCII_NEWCRC 5
21 i64 fixed_header_size
; // Not including the filename
27 u32 checksum_reported
;
28 struct de_stringreaderdata
*filename_srd
;
30 u32 checksum_calculated
;
33 typedef struct localctx_struct
{
39 // Returns a value suitable for format identification.
40 // If format is unidentified, subfmt=0
41 static int identify_cpio_internal(deark
*c
, i64 pos
, int *subfmt
)
46 de_read(b
, pos
, sizeof(b
));
48 if(!de_memcmp(b
, "070707", 6)) {
49 *subfmt
= SUBFMT_ASCII_PORTABLE
;
52 if(!de_memcmp(b
, "070701", 6)) {
53 *subfmt
= SUBFMT_ASCII_NEW
;
56 if(!de_memcmp(b
, "070702", 6)) {
57 *subfmt
= SUBFMT_ASCII_NEWCRC
;
60 if(b
[0]==0xc7 && b
[1]==0x71) {
61 *subfmt
= SUBFMT_BINARY_LE
;
64 if(b
[0]==0x71 && b
[1]==0xc7) {
65 *subfmt
= SUBFMT_BINARY_BE
;
72 // Header decoders are responsible for setting:
73 // - md->fixed_header_size
75 // - md->namesize_padded
77 // - md->filesize_padded
78 // (among other things)
80 static int read_header_ascii_portable(deark
*c
, lctx
*d
, struct member_data
*md
)
87 char timestamp_buf
[64];
94 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 6, 8, &n
);
96 de_dbg(c
, "c_ino: %d", (int)n
);
99 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 6, 8, &md
->mode
);
101 de_dbg(c
, "c_mode: octal(%06o)", (unsigned int)md
->mode
);
109 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 11, 8, &modtime_unix
);
111 de_unix_time_to_timestamp(modtime_unix
, &md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], 0x1);
112 de_timestamp_to_string(&md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], timestamp_buf
, sizeof(timestamp_buf
), 0);
113 de_dbg(c
, "c_mtime: %d (%s)", (int)modtime_unix
, timestamp_buf
);
116 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 6, 8, &md
->namesize
);
118 de_dbg(c
, "c_namesize: %d", (int)md
->namesize
);
121 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 11, 8, &md
->filesize
);
123 de_dbg(c
, "c_filesize: %d", (int)md
->filesize
);
126 md
->fixed_header_size
= pos
- md
->startpos
;
127 md
->namesize_padded
= md
->namesize
;
128 md
->filesize_padded
= md
->filesize
;
136 static int read_header_ascii_new(deark
*c
, lctx
*d
, struct member_data
*md
)
142 i64 header_and_namesize_padded
;
144 char timestamp_buf
[64];
150 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 8, 16, &n
);
152 de_dbg(c
, "c_ino: %d", (int)n
);
155 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 8, 16, &md
->mode
);
157 de_dbg(c
, "c_mode: octal(%06o)", (unsigned int)md
->mode
);
164 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 8, 16, &modtime_unix
);
166 de_unix_time_to_timestamp(modtime_unix
, &md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], 0x1);
167 de_timestamp_to_string(&md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], timestamp_buf
, sizeof(timestamp_buf
), 0);
168 de_dbg(c
, "c_mtime: %d (%s)", (int)modtime_unix
, timestamp_buf
);
171 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 8, 16, &md
->filesize
);
173 de_dbg(c
, "c_filesize: %d", (int)md
->filesize
);
176 pos
+= 8; // c_devmajor
177 pos
+= 8; // c_devminor
178 pos
+= 8; // c_rdevmajor
179 pos
+= 8; // c_rdevminor
181 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 8, 16, &md
->namesize
);
183 de_dbg(c
, "c_namesize: %d", (int)md
->namesize
);
186 if(md
->subfmt
==SUBFMT_ASCII_NEWCRC
) {
187 ret
= dbuf_read_ascii_number(c
->infile
, pos
, 8, 16, &n
);
189 md
->checksum_reported
= (u32
)n
;
190 de_dbg(c
, "c_check: %u", (unsigned int)md
->checksum_reported
);
194 md
->fixed_header_size
= pos
- md
->startpos
;
196 header_and_namesize_padded
= de_pad_to_4(md
->fixed_header_size
+ md
->namesize
);
197 md
->namesize_padded
= header_and_namesize_padded
- md
->fixed_header_size
;
199 md
->filesize_padded
= de_pad_to_4(md
->filesize
);
207 static int read_header_binary(deark
*c
, lctx
*d
, struct member_data
*md
)
211 i64 modtime_msw
, modtime_lsw
;
213 i64 filesize_msw
, filesize_lsw
;
215 char timestamp_buf
[64];
222 n
= dbuf_getu16x(c
->infile
, pos
, md
->is_le
);
223 de_dbg(c
, "c_ino: %d", (int)n
);
226 md
->mode
= dbuf_getu16x(c
->infile
, pos
, md
->is_le
);
227 de_dbg(c
, "c_mode: octal(%06o)", (unsigned int)md
->mode
);
235 modtime_msw
= dbuf_getu16x(c
->infile
, pos
, md
->is_le
);
236 modtime_lsw
= dbuf_getu16x(c
->infile
, pos
+2, md
->is_le
);
237 modtime_unix
= (modtime_msw
<<16) | modtime_lsw
;
238 de_unix_time_to_timestamp(modtime_unix
, &md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], 0x1);
239 de_timestamp_to_string(&md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], timestamp_buf
, sizeof(timestamp_buf
), 0);
240 de_dbg(c
, "c_mtime: %d (%s)", (int)modtime_unix
, timestamp_buf
);
243 md
->namesize
= dbuf_getu16x(c
->infile
, pos
, md
->is_le
);
244 de_dbg(c
, "c_namesize: %d", (int)md
->namesize
);
247 filesize_msw
= dbuf_getu16x(c
->infile
, pos
, md
->is_le
);
248 filesize_lsw
= dbuf_getu16x(c
->infile
, pos
+2, md
->is_le
);
249 md
->filesize
= (filesize_msw
<<16) | filesize_lsw
;
250 de_dbg(c
, "c_filesize: %d", (int)md
->filesize
);
253 md
->fixed_header_size
= pos
- md
->startpos
;
254 md
->namesize_padded
= de_pad_to_2(md
->namesize
);
255 md
->filesize_padded
= de_pad_to_2(md
->filesize
);
261 // Always allocates md->filename_srd.
262 static void read_member_name(deark
*c
, lctx
*d
, struct member_data
*md
)
264 i64 namesize_adjusted
;
266 // Filenames end with a NUL byte, which is included in the namesize field.
267 namesize_adjusted
= md
->namesize
- 1;
268 if(namesize_adjusted
<0) namesize_adjusted
=0;
269 if(namesize_adjusted
>DE_DBG_MAX_STRLEN
) namesize_adjusted
=DE_DBG_MAX_STRLEN
;
271 md
->filename_srd
= dbuf_read_string(c
->infile
, md
->startpos
+ md
->fixed_header_size
,
272 namesize_adjusted
, namesize_adjusted
, 0, d
->input_encoding
);
274 de_dbg(c
, "name: \"%s\"", ucstring_getpsz(md
->filename_srd
->str
));
277 static void our_writelistener_cb(dbuf
*f
, void *userdata
, const u8
*buf
, i64 buf_len
)
280 struct member_data
*md
= (struct member_data
*)userdata
;
282 for(k
=0; k
<buf_len
; k
++) {
283 // The 32-bit unsigned integer overflow is by design.
284 md
->checksum_calculated
+= (u32
)buf
[k
];
288 static int read_member(deark
*c
, lctx
*d
, i64 pos1
,
289 i64
*bytes_consumed_member
)
292 struct member_data
*md
= NULL
;
294 unsigned int unix_filetype
;
295 enum { CPIOFT_SPECIAL
=0, CPIOFT_REGULAR
,
296 CPIOFT_DIR
, CPIOFT_TRAILER
} cpio_filetype
;
298 unsigned int snflags
;
299 int saved_indent_level
;
301 de_dbg_indent_save(c
, &saved_indent_level
);
303 de_dbg(c
, "member at %d", (int)pos
);
306 de_dbg(c
, "fixed header at %d", (int)pos
);
309 md
= de_malloc(c
, sizeof(struct member_data
));
311 md
->fi
= de_finfo_create(c
);
312 md
->fi
->detect_root_dot_dir
= 1;
314 identify_cpio_internal(c
, md
->startpos
, &md
->subfmt
);
316 de_err(c
, "Unknown cpio format at %d", (int)md
->startpos
);
320 if(md
->subfmt
==SUBFMT_ASCII_PORTABLE
) {
321 read_header_ascii_portable(c
, d
, md
);
323 else if(md
->subfmt
==SUBFMT_ASCII_NEW
|| md
->subfmt
==SUBFMT_ASCII_NEWCRC
) {
324 read_header_ascii_new(c
, d
, md
);
326 else if(md
->subfmt
==SUBFMT_BINARY_LE
) {
328 read_header_binary(c
, d
, md
);
330 else if(md
->subfmt
==SUBFMT_BINARY_BE
) {
331 read_header_binary(c
, d
, md
);
334 de_err(c
, "Unsupported cpio format at %d", (int)md
->startpos
);
338 de_dbg_indent(c
, -1);
339 de_dbg(c
, "member name at %d", (int)(md
->startpos
+ md
->fixed_header_size
));
341 read_member_name(c
, d
, md
);
343 pos
= md
->startpos
+ md
->fixed_header_size
+ md
->namesize_padded
;
344 de_dbg_indent(c
, -1);
346 de_dbg(c
, "member data at %d, len=%d", (int)pos
, (int)md
->filesize
);
349 if(pos
+ md
->filesize
> c
->infile
->len
) {
355 unix_filetype
= (unsigned int)md
->mode
& 0170000;
357 if(unix_filetype
==040000) {
358 cpio_filetype
= CPIOFT_DIR
;
360 else if(unix_filetype
==0100000) {
361 cpio_filetype
= CPIOFT_REGULAR
;
364 cpio_filetype
= CPIOFT_SPECIAL
;
365 if(md
->mode
==0 && md
->namesize
==11) {
366 if(!de_strcmp(md
->filename_srd
->sz
, "TRAILER!!!")) {
367 cpio_filetype
= CPIOFT_TRAILER
;
368 de_dbg(c
, "[Trailer. Not extracting.]");
369 d
->trailer_found
= 1;
373 if(cpio_filetype
==CPIOFT_SPECIAL
) {
374 de_dbg(c
, "[Not a regular file. Skipping.]");
378 if(cpio_filetype
!=CPIOFT_REGULAR
&& cpio_filetype
!=CPIOFT_DIR
) {
379 goto done
; // Not extracting this member
382 snflags
= DE_SNFLAG_FULLPATH
;
383 if(cpio_filetype
==CPIOFT_DIR
) {
384 md
->fi
->is_directory
= 1;
385 // Directory members might or might not end in a slash.
386 snflags
|= DE_SNFLAG_STRIPTRAILINGSLASH
;
388 else if((md
->mode
& 0111) != 0) {
389 md
->fi
->mode_flags
|= DE_MODEFLAG_EXE
;
392 md
->fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
395 de_finfo_set_name_from_ucstring(c
, md
->fi
, md
->filename_srd
->str
, snflags
);
396 md
->fi
->original_filename_flag
= 1;
398 outf
= dbuf_create_output_file(c
, NULL
, md
->fi
, 0);
400 if(md
->subfmt
==SUBFMT_ASCII_NEWCRC
) {
401 // Use a callback function to calculate the checksum.
402 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)md
);
403 md
->checksum_calculated
= 0;
406 dbuf_copy(c
->infile
, pos
, md
->filesize
, outf
);
408 if(md
->subfmt
==SUBFMT_ASCII_NEWCRC
) {
409 de_dbg(c
, "checksum (calculated): %u", (unsigned int)md
->checksum_calculated
);
410 if(md
->checksum_calculated
!= md
->checksum_reported
) {
411 de_warn(c
, "Checksum failed for file %s: Expected %u, got %u",
412 ucstring_getpsz_d(md
->filename_srd
->str
),
413 (unsigned int)md
->checksum_reported
, (unsigned int)md
->checksum_calculated
);
420 *bytes_consumed_member
= md
->fixed_header_size
+ md
->namesize_padded
+ md
->filesize_padded
;
423 de_destroy_stringreaderdata(c
, md
->filename_srd
);
424 de_finfo_destroy(c
, md
->fi
);
427 de_dbg_indent_restore(c
, saved_indent_level
);
431 static void de_run_cpio(deark
*c
, de_module_params
*mparams
)
438 d
= de_malloc(c
, sizeof(lctx
));
440 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_UTF8
);
444 if(identify_cpio_internal(c
, pos
, &d
->first_subfmt
)==0) {
445 de_err(c
, "Not a cpio file, or unknown cpio format");
449 switch(d
->first_subfmt
) {
450 case SUBFMT_BINARY_LE
:
451 de_declare_fmt(c
, "cpio Binary little-endian");
453 case SUBFMT_BINARY_BE
:
454 de_declare_fmt(c
, "cpio Binary big-endian");
456 case SUBFMT_ASCII_PORTABLE
:
457 de_declare_fmt(c
, "cpio ASCII Portable");
459 case SUBFMT_ASCII_NEW
:
460 de_declare_fmt(c
, "cpio ASCII New");
462 case SUBFMT_ASCII_NEWCRC
:
463 de_declare_fmt(c
, "cpio ASCII New-CRC");
468 if(d
->trailer_found
) break;
469 if(pos
>= c
->infile
->len
) break;
471 ret
= read_member(c
, d
, pos
, &bytes_consumed
);
473 if(bytes_consumed
<1) break;
474 pos
+= bytes_consumed
;
481 static int de_identify_cpio(deark
*c
)
484 return identify_cpio_internal(c
, 0, &subfmt
);
487 void de_module_cpio(deark
*c
, struct deark_module_info
*mi
)
490 mi
->desc
= "cpio archive";
491 mi
->run_fn
= de_run_cpio
;
492 mi
->identify_fn
= de_identify_cpio
;