lzah: Improved some debug messages
[deark.git] / modules / iso9660.c
blob5931faf1338e9b46355754783c4dd51bc24b1dcb
1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // ISO 9660 CD-ROM image
6 // NRG CD-ROM image
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_iso9660);
12 DE_DECLARE_MODULE(de_module_cd_raw);
13 DE_DECLARE_MODULE(de_module_nrg);
15 #define CODE_AA 0x4141U
16 #define CODE_CE 0x4345U
17 #define CODE_ER 0x4552U
18 #define CODE_NM 0x4e4dU
19 #define CODE_PX 0x5058U
20 #define CODE_SF 0x5346U
21 #define CODE_SP 0x5350U
22 #define CODE_ST 0x5354U
23 #define CODE_TF 0x5446U
24 #define CODE_ZF 0x5a46U
26 #define MAX_NESTING_LEVEL 32
28 struct dir_record {
29 u8 file_flags;
30 u8 is_dir;
31 u8 is_thisdir;
32 u8 is_parentdir;
33 u8 is_root_dot; // The "." entry in the root dir
34 u8 rr_is_executable;
35 u8 rr_is_nonexecutable;
36 u8 is_symlink;
37 u8 is_specialfiletype;
38 u8 is_specialfileformat;
39 u8 has_archimedes_ext;
40 i64 len_dir_rec;
41 i64 len_ext_attr_rec;
42 i64 data_len;
43 i64 file_id_len;
44 i64 extent_blk;
45 de_ucstring *fname;
46 de_ucstring *rr_name;
47 struct de_timestamp recording_time;
48 struct de_timestamp rr_modtime;
49 struct de_timestamp riscos_timestamp;
50 u32 archimedes_attribs;
53 struct vol_record {
54 i64 secnum;
55 i64 root_dir_extent_blk;
56 i64 root_dir_data_len;
57 i64 block_size;
58 de_encoding encoding; // Char encoding associated with this volume descriptor
59 u8 file_structure_version;
60 u8 is_joliet;
61 u8 is_cdxa;
62 u8 quality;
65 typedef struct localctx_struct {
66 int user_req_encoding;
67 int rr_encoding;
68 u8 names_to_lowercase;
69 u8 vol_desc_sector_forced;
70 u8 blocksize_warned;
71 int dirsize_hack_state; // 0=disabled, 1=in use, -1=allowed
72 i64 vol_desc_sector_to_use;
73 i64 secsize;
74 i64 primary_vol_desc_count;
75 i64 suppl_vol_desc_count;
76 struct de_strarray *curpath;
77 struct de_inthashtable *dirs_seen;
78 struct de_inthashtable *voldesc_crc_hash;
79 u8 uses_SUSP;
80 u8 is_udf;
81 i64 SUSP_default_bytes_to_skip;
82 struct vol_record *vol; // Volume descriptor to use
83 struct de_crcobj *crco;
84 } lctx;
86 static i64 sector_dpos(lctx *d, i64 secnum)
88 return secnum * d->secsize;
91 static i64 getu16bbo_p(dbuf *f, i64 *ppos)
93 i64 val;
94 val = dbuf_getu16be(f, (*ppos)+2);
95 *ppos += 4;
96 return val;
99 static i64 getu32bbo(dbuf *f, i64 pos)
101 return dbuf_getu32be(f, pos+4);
104 static i64 getu32bbo_p(dbuf *f, i64 *ppos)
106 i64 val;
107 val = getu32bbo(f, *ppos);
108 *ppos += 8;
109 return val;
112 // If vol is not NULL, use its encoding if it has one. Else ASCII.
113 static void read_iso_string(deark *c, lctx *d, struct vol_record *vol,
114 i64 pos, i64 len, de_ucstring *s)
116 de_encoding encoding;
118 ucstring_empty(s);
119 if(vol && (vol->encoding!=DE_ENCODING_UNKNOWN)) {
120 encoding = vol->encoding;
122 else {
123 encoding = DE_ENCODING_ASCII;
125 if(encoding==DE_ENCODING_UTF16BE) {
126 if(len%2) {
127 len--;
130 dbuf_read_to_ucstring(c->infile, pos, len, s, 0, encoding);
131 ucstring_truncate_at_NUL(s);
132 ucstring_strip_trailing_spaces(s);
135 static void handle_iso_string_p(deark *c, lctx *d, struct vol_record *vol,
136 const char *name, i64 *ppos, i64 len, de_ucstring *tmpstr)
138 read_iso_string(c, d, vol, *ppos, len, tmpstr);
139 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz_d(tmpstr));
140 *ppos += len;
143 static void dbg_timestamp(deark *c, struct de_timestamp *ts, const char *field_name)
145 char timestamp_buf[64];
147 if(ts->is_valid) {
148 de_dbg_timestamp_to_string(c, ts, timestamp_buf, sizeof(timestamp_buf), 0);
149 de_dbg(c, "%s: %s", field_name, timestamp_buf);
151 else {
152 de_dbg(c, "%s: (not set)", field_name);
156 static i64 read_decimal_substr(dbuf *f, i64 pos, i64 len)
158 char buf[24];
160 if(len<1 || len>23) return 0;
161 dbuf_read(f, (u8*)buf, pos, len);
162 buf[len] = '\0';
163 return de_atoi64(buf);
166 static void read_datetime17(deark *c, lctx *d, i64 pos, struct de_timestamp *ts)
168 i64 yr, mo, da;
169 i64 hr, mi, se, hs;
170 i64 offs;
172 de_zeromem(ts, sizeof(struct de_timestamp));
173 yr = read_decimal_substr(c->infile, pos, 4);
174 if(yr==0) return;
175 mo = read_decimal_substr(c->infile, pos+4, 2);
176 da = read_decimal_substr(c->infile, pos+6, 2);
177 hr = read_decimal_substr(c->infile, pos+8, 2);
178 mi = read_decimal_substr(c->infile, pos+10, 2);
179 se = read_decimal_substr(c->infile, pos+12, 2);
180 hs = read_decimal_substr(c->infile, pos+14, 2);
181 offs = dbuf_geti8(c->infile, pos+16);
182 de_make_timestamp(ts, yr, mo, da, hr, mi, se);
183 de_timestamp_set_subsec(ts, ((double)hs)/100.0);
184 de_timestamp_cvt_to_utc(ts, -offs*60*15);
187 static void read_datetime7(deark *c, lctx *d, i64 pos, struct de_timestamp *ts)
189 i64 yr, mo, da;
190 i64 hr, mi, se;
191 i64 offs;
193 ts->is_valid = 0;
195 yr = de_getbyte(pos);
196 mo = de_getbyte(pos+1);
197 if(mo==0) return;
198 da = de_getbyte(pos+2);
199 hr = de_getbyte(pos+3);
200 mi = de_getbyte(pos+4);
201 se = de_getbyte(pos+5);
202 offs = dbuf_geti8(c->infile, pos+6);
204 de_make_timestamp(ts, 1900+yr, mo, da, hr, mi, se);
205 de_timestamp_cvt_to_utc(ts, -offs*60*15);
208 static void free_dir_record(deark *c, struct dir_record *dr)
210 if(!dr) return;
211 ucstring_destroy(dr->fname);
212 ucstring_destroy(dr->rr_name);
213 de_free(c, dr);
216 enum voldesctype_enum {
217 VOLDESCTYPE_UNKNOWN,
218 VOLDESCTYPE_OTHERVALID,
219 VOLDESCTYPE_CD_PRIMARY,
220 VOLDESCTYPE_CD_SUPPL,
221 VOLDESCTYPE_CD_BOOT,
222 VOLDESCTYPE_CD_PARTDESCR,
223 VOLDESCTYPE_CD_TERM,
224 VOLDESCTYPE_BEA,
225 VOLDESCTYPE_TEA,
226 VOLDESCTYPE_NSR
229 static const char *get_vol_descr_type_name(enum voldesctype_enum vdt)
231 const char *name = NULL;
232 switch(vdt) {
233 case VOLDESCTYPE_CD_BOOT: name="boot record"; break;
234 case VOLDESCTYPE_CD_PRIMARY: name="primary volume descriptor"; break;
235 case VOLDESCTYPE_CD_SUPPL: name="supplementary or enhanced volume descriptor"; break;
236 case VOLDESCTYPE_CD_PARTDESCR: name="volume partition descriptor"; break;
237 case VOLDESCTYPE_CD_TERM: name="volume descriptor set terminator"; break;
238 case VOLDESCTYPE_BEA: name="beginning of extended descriptors"; break;
239 case VOLDESCTYPE_TEA: name="end of extended descriptors"; break;
240 case VOLDESCTYPE_NSR: name="UDF indicator"; break;
241 case VOLDESCTYPE_OTHERVALID: name="(other/valid)"; break;
242 case VOLDESCTYPE_UNKNOWN: break;
244 return name?name:"?";
247 static void fixup_filename(deark *c, lctx *d, de_ucstring *fname)
249 if(fname->len<3) return;
250 if(fname->str[fname->len-2]==';' &&
251 fname->str[fname->len-1]=='1')
253 ucstring_truncate(fname, fname->len-2);
255 if(fname->len>1) {
256 if(fname->str[fname->len-1]=='.') {
257 ucstring_truncate(fname, fname->len-1);
263 // Handle (presumably extract) the contents of the file represented by the
264 // given dir_record.
265 static void do_extract_file(deark *c, lctx *d, struct dir_record *dr)
267 i64 dpos, dlen;
268 de_finfo *fi = NULL;
269 de_ucstring *final_name = NULL;
271 if(dr->extent_blk<1) goto done;
272 dpos = sector_dpos(d, dr->extent_blk);
273 if(dr->is_dir) {
274 dlen = 0;
276 else {
277 dlen = dr->data_len;
280 fi = de_finfo_create(c);
282 final_name = ucstring_create(c);
284 if(!dr->is_root_dot) {
285 de_strarray_make_path(d->curpath, final_name, 0);
288 if(dr->is_root_dot) {
289 fi->is_root_dir = 1;
291 else if(ucstring_isnonempty(dr->rr_name)) {
292 ucstring_append_ucstring(final_name, dr->rr_name);
293 de_finfo_set_name_from_ucstring(c, fi, final_name, DE_SNFLAG_FULLPATH);
294 fi->original_filename_flag = 1;
296 else if(ucstring_isnonempty(dr->fname)) {
297 ucstring_append_ucstring(final_name, dr->fname);
298 fixup_filename(c, d, final_name);
299 de_finfo_set_name_from_ucstring(c, fi, final_name, DE_SNFLAG_FULLPATH);
300 fi->original_filename_flag = 1;
303 if(dr->riscos_timestamp.is_valid) {
304 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = dr->riscos_timestamp;
306 else if(dr->rr_modtime.is_valid) {
307 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = dr->rr_modtime;
309 else if(dr->recording_time.is_valid) {
310 // Apparently, the "recording time" (whatever that is) is
311 // sometimes used as the mod time.
312 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = dr->recording_time;
315 if(dr->is_dir) {
316 fi->is_directory = 1;
318 else if(dr->rr_is_executable) {
319 fi->mode_flags |= DE_MODEFLAG_EXE;
321 else if(dr->rr_is_nonexecutable) {
322 fi->mode_flags |= DE_MODEFLAG_NONEXE;
325 if(dpos+dlen > c->infile->len) {
326 de_err(c, "%s goes beyond end of file", ucstring_getpsz(final_name));
327 goto done;
330 if(dr->is_specialfileformat) {
331 de_warn(c, "%s has an advanced file structure, and might not be "
332 "extracted correctly.", ucstring_getpsz(final_name));
334 else if(dr->is_symlink) {
335 de_warn(c, "%s is a symlink. It will not be extracted as such.",
336 ucstring_getpsz(final_name));
338 else if(dr->is_specialfiletype) { // E.g. FIFO, device, ...
339 de_warn(c, "%s is a special file. It will not be extracted as such.",
340 ucstring_getpsz(final_name));
343 dbuf_create_file_from_slice(c->infile, dpos, dlen, NULL, fi, 0);
345 done:
346 ucstring_destroy(final_name);
347 de_finfo_destroy(c, fi);
350 static void do_SUSP_SP(deark *c, lctx *d, struct dir_record *dr,
351 i64 pos1, i64 len)
353 if(!dr->is_root_dot) return;
354 if(len<7) return;
355 d->SUSP_default_bytes_to_skip = (i64)de_getbyte(pos1+6);
356 de_dbg(c, "bytes skipped: %d", (int)d->SUSP_default_bytes_to_skip);
359 static void do_SUSP_CE(deark *c, lctx *d, struct dir_record *dr,
360 i64 pos1, i64 len,
361 i64 *ca_blk, i64 *ca_offs, i64 *ca_len)
363 i64 pos = pos1 + 4;
366 if(len<28) return;
367 *ca_blk = getu32bbo_p(c->infile, &pos);
368 de_dbg(c, "loc. of continuation area: block #%u", (unsigned int)*ca_blk);
369 *ca_offs = getu32bbo_p(c->infile, &pos);
370 de_dbg(c, "continuation area offset: %u bytes", (unsigned int)*ca_offs);
371 *ca_len = getu32bbo_p(c->infile, &pos);
372 de_dbg(c, "continuation area len: %u bytes", (unsigned int)*ca_len);
375 static void do_SUSP_ER(deark *c, lctx *d, struct dir_record *dr,
376 i64 pos1, i64 len)
378 i64 pos = pos1+4;
379 i64 len_id, len_des, len_src;
380 u8 ext_ver;
381 de_ucstring *tmpstr = NULL;
383 if(!dr->is_root_dot) goto done;
384 if(len<8) goto done;
385 len_id = (i64)de_getbyte_p(&pos);
386 len_des = (i64)de_getbyte_p(&pos);
387 len_src = (i64)de_getbyte_p(&pos);
388 ext_ver = de_getbyte_p(&pos);
389 de_dbg(c, "extension version: %u", (unsigned int)ext_ver);
390 if(8+len_id+len_des+len_src > len) goto done;
391 tmpstr = ucstring_create(c);
392 handle_iso_string_p(c, d, NULL, "extension id", &pos, len_id, tmpstr);
393 handle_iso_string_p(c, d, NULL, "extension descriptor", &pos, len_des, tmpstr);
394 handle_iso_string_p(c, d, NULL, "extension source", &pos, len_src, tmpstr);
396 done:
397 ucstring_destroy(tmpstr);
400 static void do_SUSP_rockridge_NM(deark *c, lctx *d, struct dir_record *dr,
401 i64 pos1, i64 len)
403 u8 flags;
405 flags = de_getbyte(pos1+4);
406 de_dbg(c, "flags: 0x%02x", (unsigned int)flags);
407 if(len<6) return;
408 if(!dr->rr_name)
409 dr->rr_name = ucstring_create(c);
410 // It is intentional that this may append to a name that started in a previous
411 // NM item.
412 dbuf_read_to_ucstring(c->infile, pos1+5, len-5, dr->rr_name, 0x0,
413 d->rr_encoding);
414 de_dbg(c, "Rock Ridge name: \"%s\"", ucstring_getpsz_d(dr->rr_name));
417 static void do_SUSP_rockridge_PX(deark *c, lctx *d, struct dir_record *dr,
418 i64 pos1, i64 len)
420 i64 pos = pos1+4;
421 u32 perms;
422 u32 ftype;
424 if(len<36) return; // 36 in v1r1.10; 44 in v1.12
425 perms = (u32)getu32bbo_p(c->infile, &pos);
426 de_dbg(c, "perms: octal(%06o)", (unsigned int)perms);
427 ftype = (perms&0170000);
428 if(ftype==0100000 || ftype==0) { // regular file
429 if(perms&0111) {
430 dr->rr_is_executable = 1;
432 else {
433 dr->rr_is_nonexecutable = 1;
436 else if(ftype==040000U) { // directory
439 else if(ftype==0120000U) {
440 dr->is_symlink = 1;
442 else {
443 dr->is_specialfiletype = 1;
447 static void do_SUSP_rockridge_TF(deark *c, lctx *d, struct dir_record *dr,
448 i64 pos1, i64 len)
450 i64 pos = pos1+4;
451 unsigned int flags;
452 unsigned int i;
453 i64 bytes_per_field;
454 static const char *names[7] = { "create", "mod", "access",
455 "attrib-change", "backup", "expire", "effective" };
457 if(len<5) return;
458 flags = (unsigned int)de_getbyte_p(&pos);
459 bytes_per_field = (flags&0x80) ? 17 : 7;
461 for(i=0; i<=6; i++) {
462 struct de_timestamp tmpts;
463 char tmpsz[32];
465 // Flag bits indicate which timestamps are present.
466 if(flags & (1<<i)) {
467 if(bytes_per_field==17) {
468 read_datetime17(c, d, pos, &tmpts);
470 else {
471 read_datetime7(c, d, pos, &tmpts);
473 de_snprintf(tmpsz, sizeof(tmpsz), "%s time", names[i]);
474 dbg_timestamp(c, &tmpts, tmpsz);
476 if(i==1 && tmpts.is_valid) { // Save the mod time
477 dr->rr_modtime = tmpts;
479 pos += bytes_per_field;
484 static void do_SUSP_ZF(deark *c, lctx *d, struct dir_record *dr,
485 i64 pos1, i64 len)
487 struct de_fourcc cmprtype;
488 i64 n;
489 i64 pos = pos1+4;
491 dr->is_specialfileformat = 1;
492 if(len<16) goto done;
494 dbuf_read_fourcc(c->infile, pos, &cmprtype, 2, 0x0);
495 de_dbg(c, "cmpr algorithm: '%s'", cmprtype.id_dbgstr);
496 pos += 2;
498 n = (i64)de_getbyte_p(&pos);
499 de_dbg(c, "header size: %u (%u bytes)", (unsigned int)n,
500 (unsigned int)(n*4));
502 n = (i64)de_getbyte_p(&pos);
503 de_dbg(c, "block size: 2^%u (%u bytes)", (unsigned int)n,
504 (unsigned int)(1U<<(unsigned int)n));
506 n = getu32bbo_p(c->infile, &pos);
507 de_dbg(c, "uncmpr. size: %"I64_FMT" bytes", n);
509 done:
513 static int is_SUSP_indicator(deark *c, i64 pos, i64 len)
515 u8 buf[6];
517 if(len<6) return 0;
518 de_read(buf, pos, 6);
519 if(buf[0]=='S' && buf[1]=='P' && buf[4]==0xbe && buf[5]==0xef) {
520 return 1;
522 return 0;
525 static void do_Apple_AA_HFS(deark *c, lctx *d, struct dir_record *dr, i64 pos1, i64 len)
527 unsigned int finder_flags;
528 struct de_fourcc type4cc;
529 struct de_fourcc creator4cc;
530 i64 pos = pos1+4;
532 de_dbg(c, "Apple AA/HFS extension at %"I64_FMT, pos1);
533 de_dbg_indent(c, 1);
534 dbuf_read_fourcc(c->infile, pos, &type4cc, 4, 0x0);
535 de_dbg(c, "type: '%s'", type4cc.id_dbgstr);
536 pos += 4;
537 dbuf_read_fourcc(c->infile, pos, &creator4cc, 4, 0x0);
538 de_dbg(c, "creator: '%s'", creator4cc.id_dbgstr);
539 pos += 4;
540 finder_flags = (unsigned int)de_getu16be_p(&pos);
541 de_dbg(c, "finder flags: 0x%04x", finder_flags);
542 de_dbg_indent(c, -1);
545 static void do_ARCHIMEDES(deark *c, lctx *d, struct dir_record *dr, i64 pos1, i64 len)
547 i64 pos = pos1;
548 struct de_riscos_file_attrs rfa;
550 de_dbg(c, "ARCHIMEDES extension at %"I64_FMT, pos1);
551 de_dbg_indent(c, 1);
552 if(len<10+12) goto done;
553 dr->has_archimedes_ext = 1;
554 pos += 10; // signature
556 de_zeromem(&rfa, sizeof(struct de_riscos_file_attrs));
557 fmtutil_riscos_read_load_exec(c, c->infile, &rfa, pos);
558 dr->riscos_timestamp = rfa.mod_time;
559 pos += 8;
561 fmtutil_riscos_read_attribs_field(c, c->infile, &rfa, pos, 0);
562 dr->archimedes_attribs = rfa.attribs;
564 done:
565 de_dbg_indent(c, -1);
568 static void do_CDXA_dirdata(deark *c, lctx *d, struct dir_record *dr,
569 i64 pos1)
571 unsigned int attribs;
573 de_dbg(c, "CD-ROM XA data at %"I64_FMT, pos1);
574 de_dbg_indent(c, 1);
575 attribs = (unsigned int)de_getu16be(pos1+4);
576 de_dbg(c, "attribs: 0x%04x", attribs);
577 de_dbg_indent(c, -1);
580 // Decode a contiguous set of SUSP entries.
581 // Does not follow a "CE" continuation entry, but returns info about it.
582 static void do_dir_rec_SUSP_set(deark *c, lctx *d, struct dir_record *dr,
583 i64 pos1, i64 len,
584 i64 *ca_blk, i64 *ca_offs, i64 *ca_len)
586 i64 pos = pos1;
587 int saved_indent_level;
589 de_dbg_indent_save(c, &saved_indent_level);
591 de_dbg(c, "SUSP data at %"I64_FMT", len=%d", pos1, (int)len);
592 de_dbg_indent(c, 1);
594 while(1) {
595 struct de_fourcc sig4cc;
596 i64 itempos;
597 i64 itemlen, dlen;
598 u8 itemver;
600 itempos = pos;
601 if(itempos+4 > pos1+len) break;
602 dbuf_read_fourcc(c->infile, pos, &sig4cc, 2, 0x0);
603 pos += 2;
604 itemlen = (i64)de_getbyte_p(&pos);
605 if(itemlen<4) break;
606 dlen = itemlen-4;
607 if(itempos+itemlen > pos1+len) break;
608 itemver = de_getbyte_p(&pos);
609 de_dbg(c, "entry '%s' at %"I64_FMT", len=%d, ver=%u, dlen=%d",
610 sig4cc.id_dbgstr, itempos, (int)itemlen, (unsigned int)itemver, (int)dlen);
612 de_dbg_indent(c, 1);
613 switch(sig4cc.id) {
614 case CODE_SP:
615 do_SUSP_SP(c, d, dr, itempos, itemlen);
616 break;
617 case CODE_CE:
618 do_SUSP_CE(c, d, dr, itempos, itemlen, ca_blk, ca_offs, ca_len);
619 break;
620 case CODE_ER:
621 do_SUSP_ER(c, d, dr, itempos, itemlen);
622 break;
623 case CODE_ST:
624 goto done;
625 case CODE_NM:
626 do_SUSP_rockridge_NM(c, d, dr, itempos, itemlen);
627 break;
628 case CODE_PX:
629 do_SUSP_rockridge_PX(c, d, dr, itempos, itemlen);
630 break;
631 case CODE_TF:
632 do_SUSP_rockridge_TF(c, d, dr, itempos, itemlen);
633 break;
634 case CODE_SF:
635 dr->is_specialfileformat = 1; // Sparse file
636 break;
637 case CODE_ZF: // zisofs
638 do_SUSP_ZF(c, d, dr, itempos, itemlen);
639 break;
640 default:
641 if(sig4cc.id==CODE_AA && itemlen==14 && itemver==2) {
642 // Apple AA extensions are not SUSP, but I've seen them used
643 // as SUSP anyway. They're sufficiently compatible.
644 do_Apple_AA_HFS(c, d, dr, itempos, itemlen);
646 else if(c->debug_level>=2) {
647 de_dbg_hexdump(c, c->infile, pos, itemlen-4, 256, NULL, 0x1);
650 pos = itempos+itemlen;
651 de_dbg_indent(c, -1);
654 done:
655 de_dbg_indent_restore(c, saved_indent_level);
658 static void do_dir_rec_SUSP(deark *c, lctx *d, struct dir_record *dr,
659 i64 pos1, i64 len1)
661 i64 pos = pos1;
662 i64 len = len1;
664 while(1) {
665 i64 ca_blk = 0;
666 i64 ca_offs = 0;
667 i64 ca_len = 0;
669 do_dir_rec_SUSP_set(c, d, dr, pos, len, &ca_blk, &ca_offs, &ca_len);
671 if(ca_blk==0) {
672 break;
675 // Prepare to jump to a continuation area
677 pos = sector_dpos(d, ca_blk) + ca_offs;
679 // Prevent loops
680 if(!de_inthashtable_add_item(c, d->dirs_seen, pos, NULL)) {
681 break;
684 len = ca_len;
688 static void do_dir_rec_system_use_area(deark *c, lctx *d, struct dir_record *dr,
689 i64 pos1, i64 len)
691 i64 pos = pos1;
692 int non_SUSP_handled = 0;
693 i64 non_SUSP_len = len; // default
694 i64 SUSP_len = 0; // default
696 de_dbg(c, "[%"I64_FMT" bytes of system use data at %"I64_FMT"]", len, pos1);
698 if(dr->is_root_dot) {
699 if(is_SUSP_indicator(c, pos, len)) {
700 d->uses_SUSP = 1;
701 non_SUSP_len = 0;
702 SUSP_len = len;
705 else if(d->uses_SUSP) {
706 non_SUSP_len = d->SUSP_default_bytes_to_skip;
707 SUSP_len = len - d->SUSP_default_bytes_to_skip;
710 if(non_SUSP_len>0) {
711 u8 buf[10];
713 // TODO: Detect & handle more non-SUSP formats here.
714 // - Apple AA/ProDOS
715 // - Apple BA
717 de_zeromem(buf, sizeof(buf));
718 de_read(buf, pos, de_min_int(sizeof(buf), non_SUSP_len));
720 if(d->vol->is_cdxa && non_SUSP_len>=14 && buf[6]=='X' && buf[7]=='A') {
721 do_CDXA_dirdata(c, d, dr, pos);
722 non_SUSP_handled = 1;
724 else if(non_SUSP_len>=14 && buf[0]=='A' && buf[1]=='A' && buf[2]==0x0e &&
725 buf[3]==0x02)
727 // TODO: Support XA + AA
728 do_Apple_AA_HFS(c, d, dr, pos, non_SUSP_len);
729 non_SUSP_handled = 1;
731 else if(non_SUSP_len>=10 && !de_memcmp(buf, "ARCHIMEDES", 10)) {
732 do_ARCHIMEDES(c, d, dr, pos, non_SUSP_len);
733 non_SUSP_handled = 1;
737 if(non_SUSP_len>0 && !non_SUSP_handled) {
738 de_dbg(c, "[unidentified system use data]");
739 if(c->debug_level>=2) {
740 de_dbg_indent(c, 1);
741 de_dbg_hexdump(c, c->infile, pos, non_SUSP_len, 256, NULL, 0x1);
742 de_dbg_indent(c, -1);
746 if(d->uses_SUSP && SUSP_len>0) {
747 do_dir_rec_SUSP(c, d, dr, pos+non_SUSP_len, SUSP_len);
749 // TODO?: There can potentially also be non-SUSP data *after* the SUSP data,
750 // but I don't know if we need to worry about that.
753 static void name_to_lowercase(de_ucstring *s)
755 i64 i;
757 if(!s) return;
758 for(i=0; i<s->len; i++) {
759 if(s->str[i]>='A' && s->str[i]<='Z') {
760 s->str[i] += 32;
765 static void do_directory(deark *c, lctx *d, i64 pos1, i64 len, int nesting_level);
767 // Caller allocates dr
768 static int do_directory_record(deark *c, lctx *d, i64 pos1, struct dir_record *dr, int nesting_level)
770 i64 n;
771 i64 pos = pos1;
772 i64 sys_use_len;
773 u8 b;
774 u8 specialfnbyte;
775 de_ucstring *tmps = NULL;
776 int retval = 0;
777 de_ext_encoding file_id_encoding;
779 dr->len_dir_rec = (i64)de_getbyte_p(&pos);
780 de_dbg(c, "dir rec len: %u", (unsigned int)dr->len_dir_rec);
781 if(dr->len_dir_rec<1) goto done;
783 dr->len_ext_attr_rec = (i64)de_getbyte_p(&pos);
784 de_dbg(c, "ext attrib rec len: %u", (unsigned int)dr->len_ext_attr_rec);
786 dr->extent_blk = getu32bbo_p(c->infile, &pos);
787 de_dbg(c, "loc. of extent: %"I64_FMT" (block #%u)", sector_dpos(d, dr->extent_blk),
788 (unsigned int)dr->extent_blk);
789 dr->data_len = getu32bbo_p(c->infile, &pos);
790 de_dbg(c, "data length: %u", (unsigned int)dr->data_len);
792 read_datetime7(c, d, pos, &dr->recording_time);
793 dbg_timestamp(c, &dr->recording_time, "recording time");
794 pos += 7;
796 dr->file_flags = de_getbyte_p(&pos);
797 tmps = ucstring_create(c);
798 if(dr->file_flags & 0x01) ucstring_append_flags_item(tmps, "hidden");
799 if(dr->file_flags & 0x02) {
800 ucstring_append_flags_item(tmps, "directory");
801 dr->is_dir = 1;
803 if(dr->file_flags & 0x04) {
804 ucstring_append_flags_item(tmps, "associated file");
806 if(dr->file_flags & 0x08) {
807 ucstring_append_flags_item(tmps, "record format");
808 dr->is_specialfileformat = 1;
810 if(dr->file_flags & 0x10) ucstring_append_flags_item(tmps, "protected");
811 if(dr->file_flags & 0x80) {
812 ucstring_append_flags_item(tmps, "multi-extent");
813 dr->is_specialfileformat = 1;
815 de_dbg(c, "file flags: 0x%02x (%s)", (unsigned int)dr->file_flags,
816 ucstring_getpsz_d(tmps));
818 b = de_getbyte_p(&pos);
819 de_dbg(c, "file unit size: %u", (unsigned int)b);
821 b = de_getbyte_p(&pos);
822 de_dbg(c, "interleave gap size: %u", (unsigned int)b);
823 if(b!=0) {
824 dr->is_specialfileformat = 1;
827 n = getu16bbo_p(c->infile, &pos);
828 de_dbg(c, "volume sequence number: %u", (unsigned int)n);
829 dr->file_id_len = (i64)de_getbyte_p(&pos);
831 if(dr->is_dir && dr->file_id_len==1) {
832 // Peek at the first (& only) byte of the filename.
833 specialfnbyte = de_getbyte(pos);
835 else {
836 specialfnbyte = 0xff;
839 if(specialfnbyte==0x00 || specialfnbyte==0x01) {
840 // To better display the "thisdir" and "parentdir" directory entries
841 file_id_encoding = DE_EXTENC_MAKE(DE_ENCODING_ASCII, DE_ENCSUBTYPE_PRINTABLE);
843 else if(d->vol->encoding!=DE_ENCODING_UNKNOWN) {
844 file_id_encoding = d->vol->encoding;
846 else if(d->uses_SUSP) {
847 // We're using the user_req_encoding for the Rock Ridge names,
848 // so don't use it here.
849 file_id_encoding = DE_ENCODING_ASCII;
851 else if(d->user_req_encoding!=DE_ENCODING_UNKNOWN) {
852 file_id_encoding = d->user_req_encoding;
854 else {
855 file_id_encoding = DE_ENCODING_ASCII;
858 dr->fname = ucstring_create(c);
859 dbuf_read_to_ucstring(c->infile, pos, dr->file_id_len, dr->fname, 0, file_id_encoding);
860 de_dbg(c, "file id: \"%s\"", ucstring_getpsz_d(dr->fname));
862 if(d->names_to_lowercase && !d->vol->is_joliet) {
863 name_to_lowercase(dr->fname);
866 if(specialfnbyte==0x00) {
867 dr->is_thisdir = 1;
868 if(nesting_level==0) {
869 dr->is_root_dot = 1;
872 else if(specialfnbyte==0x01) {
873 dr->is_parentdir = 1;
875 pos += dr->file_id_len;
877 if((dr->file_id_len%2)==0) pos++; // padding byte
879 // System Use area
880 sys_use_len = pos1+dr->len_dir_rec-pos;
881 if(sys_use_len>0) {
882 do_dir_rec_system_use_area(c, d, dr, pos, sys_use_len);
885 if(dr->has_archimedes_ext && (dr->archimedes_attribs&0x100)) {
886 // Based on what Linux does, and other evidence: If a certain attribute bit
887 // is set, the filename is supposed to start with an exclamation point.
888 if(ucstring_isnonempty(dr->fname)) {
889 if(dr->fname->str[0]=='_') {
890 dr->fname->str[0] = '!';
895 if(dr->len_ext_attr_rec>0) {
896 // TODO
897 de_err(c, "Can't handle files with extended attribute records");
898 goto done;
901 if(dr->is_dir && !dr->is_thisdir && !dr->is_parentdir) {
902 do_extract_file(c, d, dr);
903 if(ucstring_isnonempty(dr->rr_name)) {
904 de_strarray_push(d->curpath, dr->rr_name);
906 else {
907 de_strarray_push(d->curpath, dr->fname);
909 do_directory(c, d, sector_dpos(d, dr->extent_blk), dr->data_len, nesting_level+1);
910 de_strarray_pop(d->curpath);
912 else if(!dr->is_dir) {
913 do_extract_file(c, d, dr);
915 else if(dr->is_root_dot) {
916 do_extract_file(c, d, dr);
919 retval = 1;
921 done:
922 ucstring_destroy(tmps);
923 return retval;
926 // A sequence of directory_records
927 static void do_directory(deark *c, lctx *d, i64 pos1, i64 len, int nesting_level)
929 struct dir_record *dr = NULL;
930 i64 pos = pos1;
931 int saved_indent_level;
932 int idx = 0;
934 de_dbg_indent_save(c, &saved_indent_level);
935 if(pos1<=0) goto done;
937 if((len>=0x08000000LL) & (d->dirsize_hack_state!=0)) {
938 // A few CDs seem to have garbage in the high bits of the directory length fields.
939 // Examples:
940 // https://archive.org/details/NIghtsOwl
941 // https://archive.org/details/SDN1__793
943 if(d->dirsize_hack_state<0) {
944 de_warn(c, "Possibly corrupt directory length found (0x%08x). Enabling workaround.", (UI)len);
945 d->dirsize_hack_state = 1;
947 len &= 0x07ffffffLL;
950 de_dbg(c, "directory at %"I64_FMT", len=%"I64_FMT, pos1, len);
951 de_dbg_indent(c, 1);
953 if(!de_inthashtable_add_item(c, d->dirs_seen, pos1, NULL)) {
954 de_warn(c, "Duplicate directory or loop detected (@%"I64_FMT")", pos1);
955 goto done;
958 if(nesting_level>MAX_NESTING_LEVEL) {
959 de_err(c, "Maximum directory nesting level exceeded");
960 goto done;
963 if(pos1+len > c->infile->len) {
964 de_warn(c, "Directory at %"I64_FMT" goes beyond end of file (size=%"I64_FMT")",
965 pos1, len);
968 while(1) {
969 int ret;
971 if(pos >= pos1+len) break;
972 if(pos >= c->infile->len) break;
974 // Peek at the first byte of the dir record (the length)
975 if(pos%d->secsize != 0) {
976 if(de_getbyte(pos)==0) {
977 // No more dir records in this sector; advance to the next sector
978 pos = de_pad_to_n(pos, d->secsize);
981 if(pos >= pos1+len) break;
982 if(pos >= c->infile->len) break;
985 de_dbg(c, "file/dir record at %"I64_FMT" (item[%d] in dir@%"I64_FMT")", pos,
986 idx, pos1);
987 dr = de_malloc(c, sizeof(struct dir_record));
988 de_dbg_indent(c, 1);
989 ret = do_directory_record(c, d, pos, dr, nesting_level);
990 de_dbg_indent(c, -1);
991 if(!ret) break;
992 if(dr->len_dir_rec<1) break;
994 pos += dr->len_dir_rec; // + ext_len??
995 free_dir_record(c, dr);
996 dr = NULL;
997 idx++;
1000 done:
1001 if(dr) free_dir_record(c, dr);
1002 de_dbg_indent_restore(c, saved_indent_level);
1005 static void do_boot_volume_descr(deark *c, lctx *d, i64 pos1)
1007 de_ucstring *tmpstr = NULL;
1008 struct de_stringreaderdata *boot_sys_id = NULL;
1009 i64 pos = pos1 + 7;
1010 i64 n;
1012 tmpstr = ucstring_create(c);
1013 boot_sys_id = dbuf_read_string(c->infile, pos, 32, 32, DE_CONVFLAG_STOP_AT_NUL,
1014 DE_ENCODING_ASCII);
1015 pos += 32;
1016 de_dbg(c, "boot system id: \"%s\"", ucstring_getpsz(boot_sys_id->str));
1018 handle_iso_string_p(c, d, NULL, "boot id", &pos, 32, tmpstr);
1020 if(!de_strcmp(boot_sys_id->sz, "EL TORITO SPECIFICATION")) {
1021 n = de_getu32le_p(&pos);
1022 de_dbg(c, "first sector of boot catalog: %u", (unsigned int)n);
1025 ucstring_destroy(tmpstr);
1026 de_destroy_stringreaderdata(c, boot_sys_id);
1029 static void read_escape_sequences(deark *c, lctx *d, struct vol_record *vol, i64 pos)
1031 u8 es[8];
1033 de_dbg(c, "escape sequences:");
1034 de_dbg_indent(c, 1);
1035 de_dbg_hexdump(c, c->infile, pos, 32, 32, NULL, 0);
1036 de_read(es, pos, sizeof(es));
1038 // 40, 43, 45 are for UCS-2.
1039 // 4a-4c are for UTF-16, probably not used by Joliet since it predates UTF-16,
1040 // but it shouldn't hurt to allow it.
1041 if(es[0]==0x25 && es[1]==0x2f && (es[2]==0x40 || es[2]==0x43 || es[2]==0x45 ||
1042 es[2]==0x4a || es[2]==0x4b || es[3]==0x4c))
1044 vol->is_joliet = 1;
1045 vol->encoding = DE_ENCODING_UTF16BE;
1047 de_dbg(c, "is joliet: %u", (unsigned int)vol->is_joliet);
1048 de_dbg_indent(c, -1);
1051 static void do_primary_or_suppl_volume_descr_internal(deark *c, lctx *d,
1052 struct vol_record *vol, i64 secnum, i64 pos1, int is_primary)
1054 i64 pos = pos1 + 7;
1055 i64 vol_space_size;
1056 i64 vol_set_size;
1057 i64 vol_seq_num;
1058 i64 n;
1059 unsigned int vol_flags;
1060 u32 crc;
1061 int is_dup;
1062 de_ucstring *tmpstr = NULL;
1063 struct de_timestamp tmpts;
1065 // Check whether this is a copy of a previous descriptor
1066 if(!d->crco) {
1067 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC32_IEEE);
1069 de_crcobj_reset(d->crco);
1070 de_crcobj_addslice(d->crco, c->infile, pos1, d->secsize);
1071 crc = de_crcobj_getval(d->crco);
1073 is_dup = (de_inthashtable_add_item(c, d->voldesc_crc_hash, (i64)crc, NULL) == 0);
1074 // False positives are *possible*, but note that we always allow the first
1075 // primary descriptor (multiple unique primary descriptors are not allowed), and
1076 // the first supplemental descriptor (multiple unique supplemental descriptors
1077 // are rare).
1078 if(is_primary) {
1079 if(d->primary_vol_desc_count==0) is_dup = 0;
1080 d->primary_vol_desc_count++;
1082 else {
1083 if(d->suppl_vol_desc_count==0) is_dup = 0;
1084 d->suppl_vol_desc_count++;
1087 if(is_dup) {
1088 de_dbg(c, "[this is an extra copy of a previous volume descriptor]");
1089 if(d->vol_desc_sector_forced && (secnum==d->vol_desc_sector_to_use)) {
1090 ; // ... but we have to read it anyway.
1092 else {
1093 vol->quality = 0;
1094 goto done;
1097 /////////
1099 vol->encoding = DE_ENCODING_UNKNOWN;
1101 if(!is_primary) {
1102 vol_flags = de_getbyte(pos);
1103 de_dbg(c, "volume flags: 0x%02x", vol_flags);
1105 pos++;
1107 if(!is_primary) {
1108 // Look ahead at the escape sequences field, because fields that appear
1109 // before it may depend on it.
1110 read_escape_sequences(c, d, vol, pos1+88);
1113 tmpstr = ucstring_create(c);
1114 handle_iso_string_p(c, d, vol, "system id", &pos, 32, tmpstr);
1115 handle_iso_string_p(c, d, vol, "volume id", &pos, 32, tmpstr);
1117 pos += 8; // 73-80 unused
1119 vol_space_size = getu32bbo_p(c->infile, &pos);
1120 de_dbg(c, "volume space size: %"I64_FMT" blocks", vol_space_size);
1122 pos += 32; // escape sequences (already read) or unused
1124 vol_set_size = getu16bbo_p(c->infile, &pos);
1125 de_dbg(c, "volume set size: %u", (unsigned int)vol_set_size);
1126 vol_seq_num = getu16bbo_p(c->infile, &pos);
1127 de_dbg(c, "volume sequence number: %u", (unsigned int)vol_seq_num);
1128 vol->block_size = getu16bbo_p(c->infile, &pos);
1129 de_dbg(c, "block size: %u bytes", (unsigned int)vol->block_size);
1130 if(vol->block_size==0) {
1131 if(!d->blocksize_warned) {
1132 de_warn(c, "Block size not set. Assuming 2048.");
1133 d->blocksize_warned = 1;
1135 vol->block_size = 2048;
1137 n = getu32bbo_p(c->infile, &pos);
1138 de_dbg(c, "path table size: %"I64_FMT" bytes", n);
1140 n = de_getu32le_p(&pos);
1141 de_dbg(c, "loc. of type L path table: block #%u", (unsigned int)n);
1142 n = de_getu32le_p(&pos);
1143 de_dbg(c, "loc. of optional type L path table: block #%u", (unsigned int)n);
1144 n = de_getu32be_p(&pos);
1145 de_dbg(c, "loc. of type M path table: block #%u", (unsigned int)n);
1146 n = de_getu32be_p(&pos);
1147 de_dbg(c, "loc. of optional type M path table: block #%u", (unsigned int)n);
1149 de_dbg(c, "dir record for root dir");
1150 de_dbg_indent(c, 1);
1151 // This is a copy of the main information in the root directory's
1152 // directory entry, basically for bootstrapping.
1153 // It should be effectively identical to the "." entry in the root
1154 // directory. The only fields we care about:
1155 vol->root_dir_extent_blk = getu32bbo(c->infile, pos+2);
1156 de_dbg(c, "loc. of extent: block #%u", (unsigned int)vol->root_dir_extent_blk);
1157 vol->root_dir_data_len = getu32bbo(c->infile, pos+10);
1158 de_dbg(c, "data length: %u", (unsigned int)vol->root_dir_data_len);
1160 de_dbg_indent(c, -1);
1161 pos += 34;
1163 handle_iso_string_p(c, d, vol, "volume set id", &pos, 128, tmpstr);
1164 handle_iso_string_p(c, d, vol, "publisher id", &pos, 128, tmpstr);
1165 handle_iso_string_p(c, d, vol, "data preparer id", &pos, 128, tmpstr);
1166 handle_iso_string_p(c, d, vol, "application id", &pos, 128, tmpstr);
1167 handle_iso_string_p(c, d, vol, "copyright file id", &pos, 37, tmpstr);
1168 handle_iso_string_p(c, d, vol, "abstract file id", &pos, 37, tmpstr);
1169 handle_iso_string_p(c, d, vol, "bibliographic file id", &pos, 37, tmpstr);
1171 read_datetime17(c, d, pos, &tmpts);
1172 dbg_timestamp(c, &tmpts, "volume creation time");
1173 pos += 17;
1175 read_datetime17(c, d, pos, &tmpts);
1176 dbg_timestamp(c, &tmpts, "volume mod time");
1177 pos += 17;
1179 read_datetime17(c, d, pos, &tmpts);
1180 dbg_timestamp(c, &tmpts, "volume expiration time");
1181 pos += 17;
1183 read_datetime17(c, d, pos, &tmpts);
1184 dbg_timestamp(c, &tmpts, "volume effective time");
1185 pos += 17;
1187 vol->file_structure_version = de_getbyte_p(&pos);
1188 de_dbg(c, "file structure version: %u", (unsigned int)vol->file_structure_version);
1190 vol->is_cdxa = !dbuf_memcmp(c->infile, pos1+1024, "CD-XA001", 8);
1191 de_dbg(c, "is CD-ROM XA: %u", (unsigned int)vol->is_cdxa);
1193 vol->quality = 1 +
1194 ((vol->block_size==2048)?80:0) +
1195 ((vol->is_joliet)?40:0) +
1196 ((vol->file_structure_version<=1)?10:0) +
1197 ((vol->file_structure_version==1)?10:0) +
1198 ((is_primary)?5:0);
1200 done:
1201 ucstring_destroy(tmpstr);
1204 static void do_primary_or_suppl_volume_descr(deark *c, lctx *d, i64 secnum,
1205 i64 pos1, int is_primary)
1207 struct vol_record *newvol;
1209 newvol = de_malloc(c, sizeof(struct vol_record));
1210 newvol->secnum = secnum;
1212 do_primary_or_suppl_volume_descr_internal(c, d, newvol, secnum, pos1, is_primary);
1214 if(newvol->quality==0) goto done; // not usable
1215 if(d->vol_desc_sector_forced && (secnum!=d->vol_desc_sector_to_use)) {
1216 // User told us not to use this volume descriptor.
1217 goto done;
1220 if(d->vol) {
1221 // We already have a volume descriptor. Is the new one preferable?
1222 if(newvol->quality > d->vol->quality) {
1223 de_free(c, d->vol);
1224 d->vol = newvol;
1225 newvol = NULL;
1228 else {
1229 d->vol = newvol;
1230 newvol = NULL;
1233 done:
1234 if(newvol) de_free(c, newvol);
1237 // Returns 0 if this is a terminator, or on serious error.
1238 // Returns 1 normally.
1239 static int do_volume_descriptor(deark *c, lctx *d, i64 secnum)
1241 u8 dtype;
1242 u8 dvers;
1243 int saved_indent_level;
1244 i64 pos1, pos;
1245 const char *vdtname;
1246 int retval = 0;
1247 enum voldesctype_enum vdt = VOLDESCTYPE_UNKNOWN;
1248 struct de_stringreaderdata *standard_id = NULL;
1250 de_dbg_indent_save(c, &saved_indent_level);
1252 pos1 = sector_dpos(d, secnum);
1253 pos = pos1;
1255 dtype = de_getbyte_p(&pos);
1256 standard_id = dbuf_read_string(c->infile, pos, 5, 5, 0, DE_ENCODING_ASCII);
1257 pos += 5;
1258 dvers = de_getbyte_p(&pos);
1260 if(!de_strcmp(standard_id->sz, "CD001")) {
1261 switch(dtype) {
1262 case 0: vdt = VOLDESCTYPE_CD_BOOT; break;
1263 case 1: vdt = VOLDESCTYPE_CD_PRIMARY; break;
1264 case 2: vdt = VOLDESCTYPE_CD_SUPPL; break;
1265 case 3: vdt = VOLDESCTYPE_CD_PARTDESCR; break;
1266 case 0xff: vdt = VOLDESCTYPE_CD_TERM; break;
1267 default: vdt = VOLDESCTYPE_OTHERVALID; break;
1270 else if(!de_strncmp(standard_id->sz, "NSR0", 4))
1272 vdt = VOLDESCTYPE_NSR;
1274 else if(!de_strncmp(standard_id->sz, "BEA0", 4)) {
1275 vdt = VOLDESCTYPE_BEA;
1277 else if(!de_strncmp(standard_id->sz, "TEA0", 4)) {
1278 vdt = VOLDESCTYPE_TEA;
1280 else if(!de_strncmp(standard_id->sz, "BOOT", 4) ||
1281 !de_strncmp(standard_id->sz, "CDW0", 4))
1283 vdt = VOLDESCTYPE_OTHERVALID;
1286 if(vdt==VOLDESCTYPE_UNKNOWN) {
1287 de_warn(c, "Expected volume descriptor at %"I64_FMT" not found", pos1);
1288 goto done;
1291 de_dbg(c, "volume descriptor at %"I64_FMT" (sector %d)", pos1, (int)secnum);
1292 de_dbg_indent(c, 1);
1294 de_dbg(c, "type: %u", (unsigned int)dtype);
1295 de_dbg(c, "standard id: \"%s\"", ucstring_getpsz_d(standard_id->str));
1296 de_dbg(c, "version: %u", (unsigned int)dvers);
1298 vdtname = get_vol_descr_type_name(vdt);
1299 de_dbg(c, "interpreted type: %s", vdtname);
1301 retval = 1;
1302 if(vdt==VOLDESCTYPE_TEA) {
1303 retval = 0;
1305 else if(vdt==VOLDESCTYPE_CD_TERM) {
1306 // Minor hack: Peak ahead at the next sector. Unless it looks like a
1307 // BEA descriptor, signifying that there are extended descriptors,
1308 // assume this is the last descriptor.
1309 if(dbuf_memcmp(c->infile, sector_dpos(d, secnum+1)+1, "BEA0", 4)) {
1310 retval = 0;
1314 switch(vdt) {
1315 case VOLDESCTYPE_CD_BOOT:
1316 do_boot_volume_descr(c, d, pos1);
1317 break;
1318 case VOLDESCTYPE_CD_PRIMARY:
1319 do_primary_or_suppl_volume_descr(c, d, secnum, pos1, 1);
1320 break;
1321 case VOLDESCTYPE_CD_SUPPL: // supplementary or enhanced
1322 do_primary_or_suppl_volume_descr(c, d, secnum, pos1, 0);
1323 break;
1324 case VOLDESCTYPE_NSR:
1325 d->is_udf = 1;
1326 break;
1327 case VOLDESCTYPE_BEA:
1328 case VOLDESCTYPE_CD_TERM:
1329 case VOLDESCTYPE_TEA:
1330 break;
1331 default:
1332 de_dbg(c, "[disregarding this volume descriptor]");
1335 done:
1336 de_dbg_indent_restore(c, saved_indent_level);
1337 de_destroy_stringreaderdata(c, standard_id);
1338 return retval;
1341 static void de_run_iso9660(deark *c, de_module_params *mparams)
1343 lctx *d = NULL;
1344 i64 cursec;
1345 const char *s;
1347 d = de_malloc(c, sizeof(lctx));
1349 if(de_get_ext_option_bool(c, "iso9660:tolower", 0)) {
1350 d->names_to_lowercase = 1;
1353 d->dirsize_hack_state = de_get_ext_option_bool(c, "iso9660:dirsizehack", -1);
1355 s = de_get_ext_option(c, "iso9660:voldesc");
1356 if(s) {
1357 d->vol_desc_sector_forced = 1;
1358 d->vol_desc_sector_to_use = de_atoi(s);
1361 d->user_req_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_UNKNOWN);
1362 d->rr_encoding = (d->user_req_encoding==DE_ENCODING_UNKNOWN) ?
1363 DE_ENCODING_UTF8 : d->user_req_encoding;
1365 d->secsize = 2048;
1367 if(!dbuf_memcmp(c->infile, 512, "PM\x00\x00", 4)) {
1368 de_info(c, "Note: This file includes an Apple Partition Map. "
1369 "Use \"-m apm\" to read it.");
1372 d->voldesc_crc_hash = de_inthashtable_create(c);
1373 cursec = 16;
1374 while(1) {
1375 if(!do_volume_descriptor(c, d, cursec)) break;
1376 cursec++;
1379 if(d->is_udf) {
1380 de_warn(c, "This file might have UDF-specific content, which is "
1381 "not supported.");
1384 if(!d->vol) {
1385 de_err(c, "No usable volume descriptor found");
1386 goto done;
1389 de_dbg(c, "[using volume descriptor at sector %u]", (unsigned int)d->vol->secnum);
1391 if(d->vol->block_size != 2048) {
1392 // TODO: Figure out sector size vs. block size.
1393 de_err(c, "Unsupported block size: %u", (unsigned int)d->vol->block_size);
1394 goto done;
1397 d->dirs_seen = de_inthashtable_create(c);
1398 d->curpath = de_strarray_create(c, MAX_NESTING_LEVEL+10);
1400 if(d->vol->root_dir_extent_blk) {
1401 do_directory(c, d, sector_dpos(d, d->vol->root_dir_extent_blk),
1402 d->vol->root_dir_data_len, 0);
1405 done:
1406 if(d) {
1407 de_free(c, d->vol);
1408 de_strarray_destroy(d->curpath);
1409 de_inthashtable_destroy(c, d->dirs_seen);
1410 de_inthashtable_destroy(c, d->voldesc_crc_hash);
1411 de_crcobj_destroy(d->crco);
1412 de_free(c, d);
1416 static int cdsig_at(dbuf *f, i64 pos)
1418 u8 buf[6];
1420 dbuf_read(f, buf, pos, sizeof(buf));
1421 if(de_memcmp(&buf[1], "CD001", 5)) return 0;
1422 if(buf[0]>3 && buf[0]<255) return 0;
1423 return 1;
1426 static int cdsig_at2(dbuf *f, i64 pos1, i64 pos2)
1428 return (cdsig_at(f, pos1) &&
1429 cdsig_at(f, pos2));
1432 static int de_identify_iso9660(deark *c)
1434 if(cdsig_at2(c->infile, 32768, 32768+2048)) {
1435 // Confidence is practically 100%, but since hybrid formats are
1436 // possible, we want other modules to be able to have precedence.
1437 return 80;
1439 return 0;
1442 static void de_help_iso9660(deark *c)
1444 de_msg(c, "-opt iso9660:tolower : Convert original-style filenames to lowercase.");
1445 de_msg(c, "-opt iso9660:voldesc=<n> : Use the volume descriptor at sector <n>.");
1446 de_msg(c, "-opt iso9660:dirsizehack=0 : Disable a workaround for bad directory lengths.");
1449 void de_module_iso9660(deark *c, struct deark_module_info *mi)
1451 mi->id = "iso9660";
1452 mi->desc = "ISO 9660 (CD-ROM) image";
1453 mi->run_fn = de_run_iso9660;
1454 mi->identify_fn = de_identify_iso9660;
1455 mi->help_fn = de_help_iso9660;
1458 struct cdraw_params {
1459 int ok;
1460 i64 sector_total_len;
1461 i64 sector_dlen;
1462 i64 sector_data_offset;
1463 const char *ext;
1466 // If the volume has an ISO 9660 "volume identifier", try to read it to use as
1467 // part of the output filename.
1468 // This is quick and dirty, and somewhat duplicates code from the iso9660 module.
1469 static void cdraw_set_name_from_vol_id(deark *c, struct cdraw_params *cdrp, de_finfo *fi)
1471 de_ucstring *vol_id = NULL;
1472 i64 pos;
1474 pos = 16*cdrp->sector_total_len + cdrp->sector_data_offset;
1475 if(dbuf_memcmp(c->infile, pos, "\x01" "CD001", 6)) goto done;
1477 vol_id = ucstring_create(c);
1478 dbuf_read_to_ucstring(c->infile, pos+40, 32, vol_id, DE_CONVFLAG_STOP_AT_NUL,
1479 DE_ENCODING_ASCII);
1480 ucstring_strip_trailing_spaces(vol_id);
1482 if(ucstring_isnonempty(vol_id)) {
1483 de_dbg(c, "iso9660 volume id: \"%s\"", ucstring_getpsz_d(vol_id));
1484 de_finfo_set_name_from_ucstring(c, fi, vol_id, 0);
1487 done:
1488 ucstring_destroy(vol_id);
1491 static void do_cdraw_convert(deark *c, struct cdraw_params *cdrp)
1493 i64 pos;
1494 de_finfo *fi = NULL;
1495 dbuf *outf = NULL;
1497 fi = de_finfo_create(c);
1498 cdraw_set_name_from_vol_id(c, cdrp, fi);
1500 outf = dbuf_create_output_file(c, cdrp->ext, fi, 0x0);
1502 pos = cdrp->sector_data_offset;
1503 while(1) {
1504 if(pos >= c->infile->len) break;
1505 dbuf_copy(c->infile, pos, cdrp->sector_dlen, outf);
1506 pos += cdrp->sector_total_len;
1509 dbuf_close(outf);
1510 de_finfo_destroy(c, fi);
1513 static void cdraw_setdefaults(struct cdraw_params *cdrp)
1515 cdrp->ok = 0;
1516 cdrp->sector_total_len = 2048;
1517 cdrp->sector_dlen = 2048;
1518 cdrp->sector_data_offset = 0;
1519 cdrp->ext = "bin";
1522 static int syncbytes_at(dbuf *f, i64 pos)
1524 return !dbuf_memcmp(f, pos,
1525 "\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00", 12);
1528 static void cdraw_detect_params(dbuf *f, struct cdraw_params *cdrp)
1530 if(cdsig_at2(f, 2336*16+8, 2336*17+8)) {
1531 cdrp->ok = 1;
1532 cdrp->sector_total_len = 2336;
1533 cdrp->sector_data_offset = 8;
1534 cdrp->ext = "iso";
1535 return;
1537 if(cdsig_at2(f, 2352*16+16, 2352*17+16)) {
1538 cdrp->ok = 1;
1539 cdrp->sector_total_len = 2352;
1540 cdrp->sector_data_offset = 16;
1541 cdrp->ext = "iso";
1542 return;
1544 if(cdsig_at2(f, 2352*16+24, 2352*17+24)) {
1545 cdrp->ok = 1;
1546 cdrp->sector_total_len = 2352;
1547 cdrp->sector_data_offset = 24;
1548 cdrp->ext = "iso";
1549 return;
1551 if(cdsig_at2(f, 2448*16+16, 2448*17+16)) {
1552 cdrp->ok = 1;
1553 cdrp->sector_total_len = 2448;
1554 cdrp->sector_data_offset = 16;
1555 cdrp->ext = "iso";
1556 return;
1558 if(cdsig_at2(f, 2448*16+24, 2448*17+24)) {
1559 cdrp->ok = 1;
1560 cdrp->sector_total_len = 2448;
1561 cdrp->sector_data_offset = 24;
1562 cdrp->ext = "iso";
1563 return;
1565 if(syncbytes_at(f, 0)) {
1566 if(syncbytes_at(f, 2352)) {
1567 if(!dbuf_memcmp(f, 512+16, "PM", 2)) {
1568 cdrp->ok = 1;
1569 cdrp->sector_total_len = 2352;
1570 cdrp->sector_data_offset = 16;
1571 cdrp->ext = "apm";
1572 return;
1576 // TODO: More formats?
1579 static void de_run_cd_raw(deark *c, de_module_params *mparams)
1581 struct cdraw_params cdrp;
1583 cdraw_setdefaults(&cdrp);
1584 cdraw_detect_params(c->infile, &cdrp);
1585 if(!cdrp.ok) {
1586 de_err(c, "Failed to detect raw CD format");
1587 goto done;
1590 de_dbg(c, "total bytes/sector: %"I64_FMT, cdrp.sector_total_len);
1591 de_dbg(c, "data bytes/sector: %"I64_FMT, cdrp.sector_dlen);
1592 de_dbg(c, "data offset: %"I64_FMT, cdrp.sector_data_offset);
1594 do_cdraw_convert(c, &cdrp);
1596 done:
1600 static int de_identify_cd_raw(deark *c)
1602 struct cdraw_params cdrp;
1604 cdraw_setdefaults(&cdrp);
1605 cdraw_detect_params(c->infile, &cdrp);
1606 if(cdrp.ok) return 70;
1607 return 0;
1610 void de_module_cd_raw(deark *c, struct deark_module_info *mi)
1612 mi->id = "cd_raw";
1613 mi->desc = "Raw CD image";
1614 mi->run_fn = de_run_cd_raw;
1615 mi->identify_fn = de_identify_cd_raw;
1618 struct nrg_ctx {
1619 int ver;
1620 i64 chunk_list_start;
1621 i64 chunk_list_size;
1624 #define CODE_CDTX 0x43445458U
1625 #define CODE_CUES 0x43554553U
1626 #define CODE_CUEX 0x43554558U
1627 #define CODE_DAOI 0x44414f49U
1628 #define CODE_DAOX 0x44414f58U
1629 #define CODE_END_ 0x454e4421U // END!
1630 #define CODE_ETNF 0x45544e46U
1631 #define CODE_SINF 0x53494e46U
1633 static int detect_nrg_internal(deark *c)
1635 if(!dbuf_memcmp(c->infile, c->infile->len-8, "NERO", 4)) {
1636 return 1;
1638 if(!dbuf_memcmp(c->infile, c->infile->len-12, "NER5", 4)) {
1639 return 2;
1641 return 0;
1644 static void do_nrg_ETNF(deark *c, struct de_iffctx *ictx,
1645 const struct de_iffchunkctx *chunkctx)
1647 i64 pos = chunkctx->dpos;
1648 i64 t = 0;
1650 while(1) {
1651 i64 track_offs_bytes, track_len_bytes, start_lba;
1652 unsigned int mode;
1654 if(chunkctx->dpos + chunkctx->dlen - pos < 20) break;
1655 de_dbg(c, "track #%d", (int)t);
1656 de_dbg_indent(c, 1);
1657 track_offs_bytes = de_getu32be(pos);
1658 track_len_bytes = de_getu32be(pos+4);
1659 de_dbg(c, "offset: %"I64_FMT", len: %"I64_FMT, track_offs_bytes, track_len_bytes);
1660 mode = (unsigned int)de_getu32be(pos+8);
1661 de_dbg(c, "mode: %u", mode);
1662 start_lba = de_getu32be(pos+12);
1663 de_dbg(c, "start lba: %"I64_FMT, start_lba);
1664 de_dbg_indent(c, -1);
1665 pos += 20;
1666 t++;
1670 static int my_preprocess_nrg_chunk_fn(deark *c, struct de_iffctx *ictx)
1672 const char *name = NULL;
1674 switch(ictx->chunkctx->chunk4cc.id) {
1675 case CODE_CDTX: name = "CD-text"; break;
1676 case CODE_CUES: case CODE_CUEX: name = "cue sheet"; break;
1677 case CODE_DAOI: case CODE_DAOX: name = "DAO info"; break;
1678 case CODE_ETNF: name = "extended track info"; break;
1679 case CODE_SINF: name = "session info"; break;
1682 if(name) {
1683 ictx->chunkctx->chunk_name = name;
1685 return 1;
1689 static int my_nrg_chunk_handler(deark *c, struct de_iffctx *ictx)
1691 // Always set this, because we never want the IFF parser to try to handle
1692 // a chunk itself.
1693 ictx->handled = 1;
1695 switch(ictx->chunkctx->chunk4cc.id) {
1696 case CODE_ETNF:
1697 do_nrg_ETNF(c, ictx, ictx->chunkctx);
1698 break;
1701 if(ictx->chunkctx->chunk4cc.id==CODE_END_) {
1702 return 0;
1704 return 1;
1707 static void do_nrg_chunks(deark *c, struct nrg_ctx *nrg)
1709 struct de_iffctx *ictx = NULL;
1711 ictx = de_malloc(c, sizeof(struct de_iffctx));
1712 ictx->userdata = (void*)nrg;
1713 ictx->preprocess_chunk_fn = my_preprocess_nrg_chunk_fn;
1714 ictx->handle_chunk_fn = my_nrg_chunk_handler;
1715 ictx->f = c->infile;
1716 ictx->is_le = 0;
1717 ictx->reversed_4cc = 0;
1719 fmtutil_read_iff_format(c, ictx, nrg->chunk_list_start, nrg->chunk_list_size);
1722 static void de_run_nrg(deark *c, de_module_params *mparams)
1724 struct cdraw_params cdrp;
1725 struct nrg_ctx *nrg = NULL;
1727 nrg = de_malloc(c, sizeof(struct nrg_ctx));
1729 nrg->ver = detect_nrg_internal(c);
1730 if(nrg->ver==0) {
1731 de_err(c, "Not in NRG format");
1732 goto done;
1735 if(nrg->ver==2) {
1736 nrg->chunk_list_start = de_geti64be(c->infile->len-8);
1737 nrg->chunk_list_size = c->infile->len - 12 - nrg->chunk_list_start;
1739 else {
1740 nrg->chunk_list_start = de_getu32be(c->infile->len-4);
1741 nrg->chunk_list_size = c->infile->len - 8 - nrg->chunk_list_start;
1743 de_dbg(c, "chunk list: offset=%"I64_FMT", len=%"I64_FMT,
1744 nrg->chunk_list_start, nrg->chunk_list_size);
1746 do_nrg_chunks(c, nrg);
1748 // TODO: The NRG data we just read probably tells us the image format,
1749 // somehow, so it seems wrong to autodetect it.
1751 if(cdsig_at2(c->infile, 32768, 32768+2048)) {
1752 de_dbg(c, "ISO 9660 image at %d", 0);
1753 de_dbg_indent(c, 1);
1754 de_run_module_by_id_on_slice(c, "iso9660", NULL, c->infile, 0, nrg->chunk_list_start);
1755 de_dbg_indent(c, -1);
1756 goto done;
1759 cdraw_setdefaults(&cdrp);
1760 cdraw_detect_params(c->infile, &cdrp);
1761 if(cdrp.ok) {
1762 de_dbg(c, "raw CD image at %d", 0);
1763 de_dbg_indent(c, 1);
1764 de_run_module_by_id_on_slice(c, "cd_raw", NULL, c->infile, 0, nrg->chunk_list_start);
1765 de_dbg_indent(c, -1);
1768 done:
1769 de_free(c, nrg);
1772 static int de_identify_nrg(deark *c)
1774 if(!de_input_file_has_ext(c, "nrg")) return 0;
1775 if(detect_nrg_internal(c)>0) {
1776 return 85;
1778 return 0;
1781 void de_module_nrg(deark *c, struct deark_module_info *mi)
1783 mi->id = "nrg";
1784 mi->desc = "NRG CD-ROM image";
1785 mi->run_fn = de_run_nrg;
1786 mi->identify_fn = de_identify_nrg;