riff: Basic support for extracting RDIB images
[deark.git] / modules / cpio.c
blob049fb7325c39977ba44139daaea95c050e4ea116
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // cpio archive format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_cpio);
11 #define SUBFMT_BINARY_LE 1
12 #define SUBFMT_BINARY_BE 2
13 #define SUBFMT_ASCII_PORTABLE 3
14 #define SUBFMT_ASCII_NEW 4
15 #define SUBFMT_ASCII_NEWCRC 5
17 struct member_data {
18 int subfmt;
19 int is_le;
20 i64 startpos;
21 i64 fixed_header_size; // Not including the filename
22 i64 namesize;
23 i64 namesize_padded;
24 i64 filesize;
25 i64 filesize_padded;
26 i64 mode;
27 u32 checksum_reported;
28 struct de_stringreaderdata *filename_srd;
29 de_finfo *fi;
30 u32 checksum_calculated;
33 typedef struct localctx_struct {
34 int first_subfmt;
35 int trailer_found;
36 int input_encoding;
37 } lctx;
39 // Returns a value suitable for format identification.
40 // If format is unidentified, subfmt=0
41 static int identify_cpio_internal(deark *c, i64 pos, int *subfmt)
43 u8 b[6];
45 *subfmt = 0;
46 de_read(b, pos, sizeof(b));
48 if(!de_memcmp(b, "070707", 6)) {
49 *subfmt = SUBFMT_ASCII_PORTABLE;
50 return 100;
52 if(!de_memcmp(b, "070701", 6)) {
53 *subfmt = SUBFMT_ASCII_NEW;
54 return 100;
56 if(!de_memcmp(b, "070702", 6)) {
57 *subfmt = SUBFMT_ASCII_NEWCRC;
58 return 100;
60 if(b[0]==0xc7 && b[1]==0x71) {
61 *subfmt = SUBFMT_BINARY_LE;
62 return 70;
64 if(b[0]==0x71 && b[1]==0xc7) {
65 *subfmt = SUBFMT_BINARY_BE;
66 return 70;
69 return 0;
72 // Header decoders are responsible for setting:
73 // - md->fixed_header_size
74 // - md->namesize
75 // - md->namesize_padded
76 // - md->filesize
77 // - md->filesize_padded
78 // (among other things)
80 static int read_header_ascii_portable(deark *c, lctx *d, struct member_data *md)
82 i64 pos;
83 int ret;
84 i64 n;
85 i64 modtime_unix;
86 int retval = 0;
87 char timestamp_buf[64];
89 pos = md->startpos;
91 pos += 6; // c_magic
92 pos += 6; // c_dev
94 ret = dbuf_read_ascii_number(c->infile, pos, 6, 8, &n);
95 if(!ret) goto done;
96 de_dbg(c, "c_ino: %d", (int)n);
97 pos += 6;
99 ret = dbuf_read_ascii_number(c->infile, pos, 6, 8, &md->mode);
100 if(!ret) goto done;
101 de_dbg(c, "c_mode: octal(%06o)", (unsigned int)md->mode);
102 pos += 6;
104 pos += 6; // c_uid
105 pos += 6; // c_gid
106 pos += 6; // c_nlink
107 pos += 6; // c_rdev
109 ret = dbuf_read_ascii_number(c->infile, pos, 11, 8, &modtime_unix);
110 if(!ret) goto done;
111 de_unix_time_to_timestamp(modtime_unix, &md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], 0x1);
112 de_timestamp_to_string(&md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], timestamp_buf, sizeof(timestamp_buf), 0);
113 de_dbg(c, "c_mtime: %d (%s)", (int)modtime_unix, timestamp_buf);
114 pos += 11;
116 ret = dbuf_read_ascii_number(c->infile, pos, 6, 8, &md->namesize);
117 if(!ret) goto done;
118 de_dbg(c, "c_namesize: %d", (int)md->namesize);
119 pos += 6;
121 ret = dbuf_read_ascii_number(c->infile, pos, 11, 8, &md->filesize);
122 if(!ret) goto done;
123 de_dbg(c, "c_filesize: %d", (int)md->filesize);
124 pos += 11;
126 md->fixed_header_size = pos - md->startpos;
127 md->namesize_padded = md->namesize;
128 md->filesize_padded = md->filesize;
130 retval = 1;
132 done:
133 return retval;
136 static int read_header_ascii_new(deark *c, lctx *d, struct member_data *md)
138 i64 pos;
139 int ret;
140 i64 n;
141 i64 modtime_unix;
142 i64 header_and_namesize_padded;
143 int retval = 0;
144 char timestamp_buf[64];
146 pos = md->startpos;
148 pos += 6; // c_magic
150 ret = dbuf_read_ascii_number(c->infile, pos, 8, 16, &n);
151 if(!ret) goto done;
152 de_dbg(c, "c_ino: %d", (int)n);
153 pos += 8;
155 ret = dbuf_read_ascii_number(c->infile, pos, 8, 16, &md->mode);
156 if(!ret) goto done;
157 de_dbg(c, "c_mode: octal(%06o)", (unsigned int)md->mode);
158 pos += 8;
160 pos += 8; // c_uid
161 pos += 8; // c_gid
162 pos += 8; // c_nlink
164 ret = dbuf_read_ascii_number(c->infile, pos, 8, 16, &modtime_unix);
165 if(!ret) goto done;
166 de_unix_time_to_timestamp(modtime_unix, &md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], 0x1);
167 de_timestamp_to_string(&md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], timestamp_buf, sizeof(timestamp_buf), 0);
168 de_dbg(c, "c_mtime: %d (%s)", (int)modtime_unix, timestamp_buf);
169 pos += 8;
171 ret = dbuf_read_ascii_number(c->infile, pos, 8, 16, &md->filesize);
172 if(!ret) goto done;
173 de_dbg(c, "c_filesize: %d", (int)md->filesize);
174 pos += 8;
176 pos += 8; // c_devmajor
177 pos += 8; // c_devminor
178 pos += 8; // c_rdevmajor
179 pos += 8; // c_rdevminor
181 ret = dbuf_read_ascii_number(c->infile, pos, 8, 16, &md->namesize);
182 if(!ret) goto done;
183 de_dbg(c, "c_namesize: %d", (int)md->namesize);
184 pos += 8;
186 if(md->subfmt==SUBFMT_ASCII_NEWCRC) {
187 ret = dbuf_read_ascii_number(c->infile, pos, 8, 16, &n);
188 if(!ret) goto done;
189 md->checksum_reported = (u32)n;
190 de_dbg(c, "c_check: %u", (unsigned int)md->checksum_reported);
192 pos += 8; // c_check
194 md->fixed_header_size = pos - md->startpos;
196 header_and_namesize_padded = de_pad_to_4(md->fixed_header_size + md->namesize);
197 md->namesize_padded = header_and_namesize_padded - md->fixed_header_size;
199 md->filesize_padded = de_pad_to_4(md->filesize);
201 retval = 1;
203 done:
204 return retval;
207 static int read_header_binary(deark *c, lctx *d, struct member_data *md)
209 i64 pos;
210 i64 n;
211 i64 modtime_msw, modtime_lsw;
212 i64 modtime_unix;
213 i64 filesize_msw, filesize_lsw;
214 int retval = 0;
215 char timestamp_buf[64];
217 pos = md->startpos;
219 pos += 2; // c_magic
220 pos += 2; // c_dev
222 n = dbuf_getu16x(c->infile, pos, md->is_le);
223 de_dbg(c, "c_ino: %d", (int)n);
224 pos += 2;
226 md->mode = dbuf_getu16x(c->infile, pos, md->is_le);
227 de_dbg(c, "c_mode: octal(%06o)", (unsigned int)md->mode);
228 pos += 2;
230 pos += 2; // c_uid
231 pos += 2; // c_gid
232 pos += 2; // c_nlink
233 pos += 2; // c_rdev
235 modtime_msw = dbuf_getu16x(c->infile, pos, md->is_le);
236 modtime_lsw = dbuf_getu16x(c->infile, pos+2, md->is_le);
237 modtime_unix = (modtime_msw<<16) | modtime_lsw;
238 de_unix_time_to_timestamp(modtime_unix, &md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], 0x1);
239 de_timestamp_to_string(&md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], timestamp_buf, sizeof(timestamp_buf), 0);
240 de_dbg(c, "c_mtime: %d (%s)", (int)modtime_unix, timestamp_buf);
241 pos += 4;
243 md->namesize = dbuf_getu16x(c->infile, pos, md->is_le);
244 de_dbg(c, "c_namesize: %d", (int)md->namesize);
245 pos += 2;
247 filesize_msw = dbuf_getu16x(c->infile, pos, md->is_le);
248 filesize_lsw = dbuf_getu16x(c->infile, pos+2, md->is_le);
249 md->filesize = (filesize_msw<<16) | filesize_lsw;
250 de_dbg(c, "c_filesize: %d", (int)md->filesize);
251 pos += 4;
253 md->fixed_header_size = pos - md->startpos;
254 md->namesize_padded = de_pad_to_2(md->namesize);
255 md->filesize_padded = de_pad_to_2(md->filesize);
257 retval = 1;
258 return retval;
261 // Always allocates md->filename_srd.
262 static void read_member_name(deark *c, lctx *d, struct member_data *md)
264 i64 namesize_adjusted;
266 // Filenames end with a NUL byte, which is included in the namesize field.
267 namesize_adjusted = md->namesize - 1;
268 if(namesize_adjusted<0) namesize_adjusted=0;
269 if(namesize_adjusted>DE_DBG_MAX_STRLEN) namesize_adjusted=DE_DBG_MAX_STRLEN;
271 md->filename_srd = dbuf_read_string(c->infile, md->startpos + md->fixed_header_size,
272 namesize_adjusted, namesize_adjusted, 0, d->input_encoding);
274 de_dbg(c, "name: \"%s\"", ucstring_getpsz(md->filename_srd->str));
277 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
279 i64 k;
280 struct member_data *md = (struct member_data *)userdata;
282 for(k=0; k<buf_len; k++) {
283 // The 32-bit unsigned integer overflow is by design.
284 md->checksum_calculated += (u32)buf[k];
288 static int read_member(deark *c, lctx *d, i64 pos1,
289 i64 *bytes_consumed_member)
291 int retval = 0;
292 struct member_data *md = NULL;
293 i64 pos = pos1;
294 unsigned int unix_filetype;
295 enum { CPIOFT_SPECIAL=0, CPIOFT_REGULAR,
296 CPIOFT_DIR, CPIOFT_TRAILER } cpio_filetype;
297 dbuf *outf = NULL;
298 unsigned int snflags;
299 int saved_indent_level;
301 de_dbg_indent_save(c, &saved_indent_level);
303 de_dbg(c, "member at %d", (int)pos);
304 de_dbg_indent(c, 1);
306 de_dbg(c, "fixed header at %d", (int)pos);
307 de_dbg_indent(c, 1);
309 md = de_malloc(c, sizeof(struct member_data));
310 md->startpos = pos1;
311 md->fi = de_finfo_create(c);
312 md->fi->detect_root_dot_dir = 1;
314 identify_cpio_internal(c, md->startpos, &md->subfmt);
315 if(md->subfmt==0) {
316 de_err(c, "Unknown cpio format at %d", (int)md->startpos);
317 goto done;
320 if(md->subfmt==SUBFMT_ASCII_PORTABLE) {
321 read_header_ascii_portable(c, d, md);
323 else if(md->subfmt==SUBFMT_ASCII_NEW || md->subfmt==SUBFMT_ASCII_NEWCRC) {
324 read_header_ascii_new(c, d, md);
326 else if(md->subfmt==SUBFMT_BINARY_LE) {
327 md->is_le = 1;
328 read_header_binary(c, d, md);
330 else if(md->subfmt==SUBFMT_BINARY_BE) {
331 read_header_binary(c, d, md);
333 else {
334 de_err(c, "Unsupported cpio format at %d", (int)md->startpos);
335 goto done;
338 de_dbg_indent(c, -1);
339 de_dbg(c, "member name at %d", (int)(md->startpos + md->fixed_header_size));
340 de_dbg_indent(c, 1);
341 read_member_name(c, d, md);
343 pos = md->startpos + md->fixed_header_size + md->namesize_padded;
344 de_dbg_indent(c, -1);
346 de_dbg(c, "member data at %d, len=%d", (int)pos, (int)md->filesize);
347 de_dbg_indent(c, 1);
349 if(pos + md->filesize > c->infile->len) {
350 goto done;
353 retval = 1;
355 unix_filetype = (unsigned int)md->mode & 0170000;
357 if(unix_filetype==040000) {
358 cpio_filetype = CPIOFT_DIR;
360 else if(unix_filetype==0100000) {
361 cpio_filetype = CPIOFT_REGULAR;
363 else {
364 cpio_filetype = CPIOFT_SPECIAL;
365 if(md->mode==0 && md->namesize==11) {
366 if(!de_strcmp(md->filename_srd->sz, "TRAILER!!!")) {
367 cpio_filetype = CPIOFT_TRAILER;
368 de_dbg(c, "[Trailer. Not extracting.]");
369 d->trailer_found = 1;
373 if(cpio_filetype==CPIOFT_SPECIAL) {
374 de_dbg(c, "[Not a regular file. Skipping.]");
378 if(cpio_filetype!=CPIOFT_REGULAR && cpio_filetype!=CPIOFT_DIR) {
379 goto done; // Not extracting this member
382 snflags = DE_SNFLAG_FULLPATH;
383 if(cpio_filetype==CPIOFT_DIR) {
384 md->fi->is_directory = 1;
385 // Directory members might or might not end in a slash.
386 snflags |= DE_SNFLAG_STRIPTRAILINGSLASH;
388 else if((md->mode & 0111) != 0) {
389 md->fi->mode_flags |= DE_MODEFLAG_EXE;
391 else {
392 md->fi->mode_flags |= DE_MODEFLAG_NONEXE;
395 de_finfo_set_name_from_ucstring(c, md->fi, md->filename_srd->str, snflags);
396 md->fi->original_filename_flag = 1;
398 outf = dbuf_create_output_file(c, NULL, md->fi, 0);
400 if(md->subfmt==SUBFMT_ASCII_NEWCRC) {
401 // Use a callback function to calculate the checksum.
402 dbuf_set_writelistener(outf, our_writelistener_cb, (void*)md);
403 md->checksum_calculated = 0;
406 dbuf_copy(c->infile, pos, md->filesize, outf);
408 if(md->subfmt==SUBFMT_ASCII_NEWCRC) {
409 de_dbg(c, "checksum (calculated): %u", (unsigned int)md->checksum_calculated);
410 if(md->checksum_calculated != md->checksum_reported) {
411 de_warn(c, "Checksum failed for file %s: Expected %u, got %u",
412 ucstring_getpsz_d(md->filename_srd->str),
413 (unsigned int)md->checksum_reported, (unsigned int)md->checksum_calculated);
417 done:
418 dbuf_close(outf);
419 if(retval && md) {
420 *bytes_consumed_member = md->fixed_header_size + md->namesize_padded + md->filesize_padded;
422 if(md) {
423 de_destroy_stringreaderdata(c, md->filename_srd);
424 de_finfo_destroy(c, md->fi);
425 de_free(c, md);
427 de_dbg_indent_restore(c, saved_indent_level);
428 return retval;
431 static void de_run_cpio(deark *c, de_module_params *mparams)
433 lctx *d = NULL;
434 i64 bytes_consumed;
435 i64 pos;
436 int ret;
438 d = de_malloc(c, sizeof(lctx));
440 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_UTF8);
442 pos = 0;
444 if(identify_cpio_internal(c, pos, &d->first_subfmt)==0) {
445 de_err(c, "Not a cpio file, or unknown cpio format");
446 goto done;
449 switch(d->first_subfmt) {
450 case SUBFMT_BINARY_LE:
451 de_declare_fmt(c, "cpio Binary little-endian");
452 break;
453 case SUBFMT_BINARY_BE:
454 de_declare_fmt(c, "cpio Binary big-endian");
455 break;
456 case SUBFMT_ASCII_PORTABLE:
457 de_declare_fmt(c, "cpio ASCII Portable");
458 break;
459 case SUBFMT_ASCII_NEW:
460 de_declare_fmt(c, "cpio ASCII New");
461 break;
462 case SUBFMT_ASCII_NEWCRC:
463 de_declare_fmt(c, "cpio ASCII New-CRC");
464 break;
467 while(1) {
468 if(d->trailer_found) break;
469 if(pos >= c->infile->len) break;
470 bytes_consumed = 0;
471 ret = read_member(c, d, pos, &bytes_consumed);
472 if(!ret) break;
473 if(bytes_consumed<1) break;
474 pos += bytes_consumed;
477 done:
478 de_free(c, d);
481 static int de_identify_cpio(deark *c)
483 int subfmt;
484 return identify_cpio_internal(c, 0, &subfmt);
487 void de_module_cpio(deark *c, struct deark_module_info *mi)
489 mi->id = "cpio";
490 mi->desc = "cpio archive";
491 mi->run_fn = de_run_cpio;
492 mi->identify_fn = de_identify_cpio;