Refactoring the iff decoder
[deark.git] / modules / iso9660.c
blobb48c677850835f294696d89b6038f6b45688f87f
1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // ISO 9660 CD-ROM image
6 // NRG CD-ROM image
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_iso9660);
12 DE_DECLARE_MODULE(de_module_cd_raw);
13 DE_DECLARE_MODULE(de_module_nrg);
15 #define CODE_AA 0x4141U
16 #define CODE_CE 0x4345U
17 #define CODE_ER 0x4552U
18 #define CODE_NM 0x4e4dU
19 #define CODE_PX 0x5058U
20 #define CODE_SF 0x5346U
21 #define CODE_SP 0x5350U
22 #define CODE_ST 0x5354U
23 #define CODE_TF 0x5446U
24 #define CODE_ZF 0x5a46U
26 #define MAX_NESTING_LEVEL 32
28 struct dir_record {
29 u8 file_flags;
30 u8 is_dir;
31 u8 is_thisdir;
32 u8 is_parentdir;
33 u8 is_root_dot; // The "." entry in the root dir
34 u8 rr_is_executable;
35 u8 rr_is_nonexecutable;
36 u8 is_symlink;
37 u8 is_specialfiletype;
38 u8 is_specialfileformat;
39 i64 len_dir_rec;
40 i64 len_ext_attr_rec;
41 i64 data_len;
42 i64 file_id_len;
43 i64 extent_blk;
44 de_ucstring *fname;
45 de_ucstring *rr_name;
46 struct de_timestamp recording_time;
47 struct de_timestamp rr_modtime;
48 struct de_timestamp riscos_timestamp;
49 u8 has_riscos_data;
50 struct de_riscos_file_attrs rfa;
53 struct vol_record {
54 i64 secnum;
55 i64 root_dir_extent_blk;
56 i64 root_dir_data_len;
57 i64 block_size;
58 de_encoding encoding; // Char encoding associated with this volume descriptor
59 u8 file_structure_version;
60 u8 is_joliet;
61 u8 is_cdxa;
62 u8 quality;
65 typedef struct localctx_struct {
66 int user_req_encoding;
67 int rr_encoding;
68 u8 names_to_lowercase;
69 u8 vol_desc_sector_forced;
70 u8 blocksize_warned;
71 int dirsize_hack_state; // 0=disabled, 1=in use, -1=allowed
72 i64 vol_desc_sector_to_use;
73 i64 secsize;
74 i64 primary_vol_desc_count;
75 i64 suppl_vol_desc_count;
76 struct de_strarray *curpath;
77 struct de_inthashtable *dirs_seen;
78 struct de_inthashtable *voldesc_crc_hash;
79 u8 uses_SUSP;
80 u8 is_udf;
81 i64 SUSP_default_bytes_to_skip;
82 struct vol_record *vol; // Volume descriptor to use
83 struct de_crcobj *crco;
84 } lctx;
86 static i64 sector_dpos(lctx *d, i64 secnum)
88 return secnum * d->secsize;
91 static i64 getu16bbo_p(dbuf *f, i64 *ppos)
93 i64 val;
94 val = dbuf_getu16be(f, (*ppos)+2);
95 *ppos += 4;
96 return val;
99 static i64 getu32bbo(dbuf *f, i64 pos)
101 return dbuf_getu32be(f, pos+4);
104 static i64 getu32bbo_p(dbuf *f, i64 *ppos)
106 i64 val;
107 val = getu32bbo(f, *ppos);
108 *ppos += 8;
109 return val;
112 // If vol is not NULL, use its encoding if it has one. Else ASCII.
113 static void read_iso_string(deark *c, lctx *d, struct vol_record *vol,
114 i64 pos, i64 len, de_ucstring *s)
116 de_encoding encoding;
118 ucstring_empty(s);
119 if(vol && (vol->encoding!=DE_ENCODING_UNKNOWN)) {
120 encoding = vol->encoding;
122 else {
123 encoding = DE_ENCODING_ASCII;
125 if(encoding==DE_ENCODING_UTF16BE) {
126 if(len%2) {
127 len--;
130 dbuf_read_to_ucstring(c->infile, pos, len, s, 0, encoding);
131 ucstring_truncate_at_NUL(s);
132 ucstring_strip_trailing_spaces(s);
135 static void handle_iso_string_p(deark *c, lctx *d, struct vol_record *vol,
136 const char *name, i64 *ppos, i64 len, de_ucstring *tmpstr)
138 read_iso_string(c, d, vol, *ppos, len, tmpstr);
139 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz_d(tmpstr));
140 *ppos += len;
143 static void dbg_timestamp(deark *c, struct de_timestamp *ts, const char *field_name)
145 char timestamp_buf[64];
147 if(ts->is_valid) {
148 de_dbg_timestamp_to_string(c, ts, timestamp_buf, sizeof(timestamp_buf), 0);
149 de_dbg(c, "%s: %s", field_name, timestamp_buf);
151 else {
152 de_dbg(c, "%s: (not set)", field_name);
156 static i64 read_decimal_substr(dbuf *f, i64 pos, i64 len)
158 char buf[24];
160 if(len<1 || len>23) return 0;
161 dbuf_read(f, (u8*)buf, pos, len);
162 buf[len] = '\0';
163 return de_atoi64(buf);
166 static void read_datetime17(deark *c, lctx *d, i64 pos, struct de_timestamp *ts)
168 i64 yr, mo, da;
169 i64 hr, mi, se, hs;
170 i64 offs;
172 de_zeromem(ts, sizeof(struct de_timestamp));
173 yr = read_decimal_substr(c->infile, pos, 4);
174 if(yr==0) return;
175 mo = read_decimal_substr(c->infile, pos+4, 2);
176 da = read_decimal_substr(c->infile, pos+6, 2);
177 hr = read_decimal_substr(c->infile, pos+8, 2);
178 mi = read_decimal_substr(c->infile, pos+10, 2);
179 se = read_decimal_substr(c->infile, pos+12, 2);
180 hs = read_decimal_substr(c->infile, pos+14, 2);
181 offs = dbuf_geti8(c->infile, pos+16);
182 de_make_timestamp(ts, yr, mo, da, hr, mi, se);
183 de_timestamp_set_subsec(ts, ((double)hs)/100.0);
184 de_timestamp_cvt_to_utc(ts, -offs*60*15);
187 static void read_datetime7(deark *c, lctx *d, i64 pos, struct de_timestamp *ts)
189 i64 yr, mo, da;
190 i64 hr, mi, se;
191 i64 offs;
193 ts->is_valid = 0;
195 yr = de_getbyte(pos);
196 mo = de_getbyte(pos+1);
197 if(mo==0) return;
198 da = de_getbyte(pos+2);
199 hr = de_getbyte(pos+3);
200 mi = de_getbyte(pos+4);
201 se = de_getbyte(pos+5);
202 offs = dbuf_geti8(c->infile, pos+6);
204 de_make_timestamp(ts, 1900+yr, mo, da, hr, mi, se);
205 de_timestamp_cvt_to_utc(ts, -offs*60*15);
208 static void free_dir_record(deark *c, struct dir_record *dr)
210 if(!dr) return;
211 ucstring_destroy(dr->fname);
212 ucstring_destroy(dr->rr_name);
213 de_free(c, dr);
216 enum voldesctype_enum {
217 VOLDESCTYPE_UNKNOWN,
218 VOLDESCTYPE_OTHERVALID,
219 VOLDESCTYPE_CD_PRIMARY,
220 VOLDESCTYPE_CD_SUPPL,
221 VOLDESCTYPE_CD_BOOT,
222 VOLDESCTYPE_CD_PARTDESCR,
223 VOLDESCTYPE_CD_TERM,
224 VOLDESCTYPE_BEA,
225 VOLDESCTYPE_TEA,
226 VOLDESCTYPE_NSR
229 static const char *get_vol_descr_type_name(enum voldesctype_enum vdt)
231 const char *name = NULL;
232 switch(vdt) {
233 case VOLDESCTYPE_CD_BOOT: name="boot record"; break;
234 case VOLDESCTYPE_CD_PRIMARY: name="primary volume descriptor"; break;
235 case VOLDESCTYPE_CD_SUPPL: name="supplementary or enhanced volume descriptor"; break;
236 case VOLDESCTYPE_CD_PARTDESCR: name="volume partition descriptor"; break;
237 case VOLDESCTYPE_CD_TERM: name="volume descriptor set terminator"; break;
238 case VOLDESCTYPE_BEA: name="beginning of extended descriptors"; break;
239 case VOLDESCTYPE_TEA: name="end of extended descriptors"; break;
240 case VOLDESCTYPE_NSR: name="UDF indicator"; break;
241 case VOLDESCTYPE_OTHERVALID: name="(other/valid)"; break;
242 case VOLDESCTYPE_UNKNOWN: break;
244 return name?name:"?";
247 static void fixup_filename(deark *c, lctx *d, de_ucstring *fname)
249 if(fname->len<3) return;
250 if(fname->str[fname->len-2]==';' &&
251 fname->str[fname->len-1]=='1')
253 ucstring_truncate(fname, fname->len-2);
255 if(fname->len>1) {
256 if(fname->str[fname->len-1]=='.') {
257 ucstring_truncate(fname, fname->len-1);
263 // Handle (presumably extract) the contents of the file represented by the
264 // given dir_record.
265 static void do_extract_file(deark *c, lctx *d, struct dir_record *dr)
267 i64 dpos, dlen;
268 de_finfo *fi = NULL;
269 de_ucstring *final_name = NULL;
271 if(dr->extent_blk<1) goto done;
272 dpos = sector_dpos(d, dr->extent_blk);
273 if(dr->is_dir) {
274 dlen = 0;
276 else {
277 dlen = dr->data_len;
280 fi = de_finfo_create(c);
282 final_name = ucstring_create(c);
284 if(!dr->is_root_dot) {
285 de_strarray_make_path(d->curpath, final_name, 0);
288 if(dr->is_root_dot) {
289 fi->is_root_dir = 1;
291 else if(ucstring_isnonempty(dr->rr_name)) {
292 ucstring_append_ucstring(final_name, dr->rr_name);
293 if(dr->has_riscos_data) {
294 fmtutil_riscos_append_type_to_filename(c, fi, final_name, &dr->rfa, dr->is_dir, 0);
296 de_finfo_set_name_from_ucstring(c, fi, final_name, DE_SNFLAG_FULLPATH);
297 fi->original_filename_flag = 1;
299 else if(ucstring_isnonempty(dr->fname)) {
300 ucstring_append_ucstring(final_name, dr->fname);
301 fixup_filename(c, d, final_name);
302 if(dr->has_riscos_data) {
303 fmtutil_riscos_append_type_to_filename(c, fi, final_name, &dr->rfa, dr->is_dir, 0);
305 de_finfo_set_name_from_ucstring(c, fi, final_name, DE_SNFLAG_FULLPATH);
306 fi->original_filename_flag = 1;
309 if(dr->riscos_timestamp.is_valid) {
310 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = dr->riscos_timestamp;
312 else if(dr->rr_modtime.is_valid) {
313 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = dr->rr_modtime;
315 else if(dr->recording_time.is_valid) {
316 // Apparently, the "recording time" (whatever that is) is
317 // sometimes used as the mod time.
318 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = dr->recording_time;
321 if(dr->is_dir) {
322 fi->is_directory = 1;
324 else if(dr->rr_is_executable) {
325 fi->mode_flags |= DE_MODEFLAG_EXE;
327 else if(dr->rr_is_nonexecutable) {
328 fi->mode_flags |= DE_MODEFLAG_NONEXE;
331 if(dr->has_riscos_data) {
332 fi->has_riscos_data = 1;
333 fi->riscos_attribs = dr->rfa.attribs;
334 fi->load_addr = dr->rfa.load_addr;
335 fi->exec_addr = dr->rfa.exec_addr;
338 if(dpos+dlen > c->infile->len) {
339 de_err(c, "%s goes beyond end of file", ucstring_getpsz(final_name));
340 goto done;
343 if(dr->is_specialfileformat) {
344 de_warn(c, "%s has an advanced file structure, and might not be "
345 "extracted correctly.", ucstring_getpsz(final_name));
347 else if(dr->is_symlink) {
348 de_warn(c, "%s is a symlink. It will not be extracted as such.",
349 ucstring_getpsz(final_name));
351 else if(dr->is_specialfiletype) { // E.g. FIFO, device, ...
352 de_warn(c, "%s is a special file. It will not be extracted as such.",
353 ucstring_getpsz(final_name));
356 dbuf_create_file_from_slice(c->infile, dpos, dlen, NULL, fi, 0);
358 done:
359 ucstring_destroy(final_name);
360 de_finfo_destroy(c, fi);
363 static void do_SUSP_SP(deark *c, lctx *d, struct dir_record *dr,
364 i64 pos1, i64 len)
366 if(!dr->is_root_dot) return;
367 if(len<7) return;
368 d->SUSP_default_bytes_to_skip = (i64)de_getbyte(pos1+6);
369 de_dbg(c, "bytes skipped: %d", (int)d->SUSP_default_bytes_to_skip);
372 static void do_SUSP_CE(deark *c, lctx *d, struct dir_record *dr,
373 i64 pos1, i64 len,
374 i64 *ca_blk, i64 *ca_offs, i64 *ca_len)
376 i64 pos = pos1 + 4;
379 if(len<28) return;
380 *ca_blk = getu32bbo_p(c->infile, &pos);
381 de_dbg(c, "loc. of continuation area: block #%u", (unsigned int)*ca_blk);
382 *ca_offs = getu32bbo_p(c->infile, &pos);
383 de_dbg(c, "continuation area offset: %u bytes", (unsigned int)*ca_offs);
384 *ca_len = getu32bbo_p(c->infile, &pos);
385 de_dbg(c, "continuation area len: %u bytes", (unsigned int)*ca_len);
388 static void do_SUSP_ER(deark *c, lctx *d, struct dir_record *dr,
389 i64 pos1, i64 len)
391 i64 pos = pos1+4;
392 i64 len_id, len_des, len_src;
393 u8 ext_ver;
394 de_ucstring *tmpstr = NULL;
396 if(!dr->is_root_dot) goto done;
397 if(len<8) goto done;
398 len_id = (i64)de_getbyte_p(&pos);
399 len_des = (i64)de_getbyte_p(&pos);
400 len_src = (i64)de_getbyte_p(&pos);
401 ext_ver = de_getbyte_p(&pos);
402 de_dbg(c, "extension version: %u", (unsigned int)ext_ver);
403 if(8+len_id+len_des+len_src > len) goto done;
404 tmpstr = ucstring_create(c);
405 handle_iso_string_p(c, d, NULL, "extension id", &pos, len_id, tmpstr);
406 handle_iso_string_p(c, d, NULL, "extension descriptor", &pos, len_des, tmpstr);
407 handle_iso_string_p(c, d, NULL, "extension source", &pos, len_src, tmpstr);
409 done:
410 ucstring_destroy(tmpstr);
413 static void do_SUSP_rockridge_NM(deark *c, lctx *d, struct dir_record *dr,
414 i64 pos1, i64 len)
416 u8 flags;
418 flags = de_getbyte(pos1+4);
419 de_dbg(c, "flags: 0x%02x", (unsigned int)flags);
420 if(len<6) return;
421 if(!dr->rr_name)
422 dr->rr_name = ucstring_create(c);
423 // It is intentional that this may append to a name that started in a previous
424 // NM item.
425 dbuf_read_to_ucstring(c->infile, pos1+5, len-5, dr->rr_name, 0x0,
426 d->rr_encoding);
427 de_dbg(c, "Rock Ridge name: \"%s\"", ucstring_getpsz_d(dr->rr_name));
430 static void do_SUSP_rockridge_PX(deark *c, lctx *d, struct dir_record *dr,
431 i64 pos1, i64 len)
433 i64 pos = pos1+4;
434 u32 perms;
435 u32 ftype;
437 if(len<36) return; // 36 in v1r1.10; 44 in v1.12
438 perms = (u32)getu32bbo_p(c->infile, &pos);
439 de_dbg(c, "perms: octal(%06o)", (unsigned int)perms);
440 ftype = (perms&0170000);
441 if(ftype==0100000 || ftype==0) { // regular file
442 if(perms&0111) {
443 dr->rr_is_executable = 1;
445 else {
446 dr->rr_is_nonexecutable = 1;
449 else if(ftype==040000U) { // directory
452 else if(ftype==0120000U) {
453 dr->is_symlink = 1;
455 else {
456 dr->is_specialfiletype = 1;
460 static void do_SUSP_rockridge_TF(deark *c, lctx *d, struct dir_record *dr,
461 i64 pos1, i64 len)
463 i64 pos = pos1+4;
464 unsigned int flags;
465 unsigned int i;
466 i64 bytes_per_field;
467 static const char *names[7] = { "create", "mod", "access",
468 "attrib-change", "backup", "expire", "effective" };
470 if(len<5) return;
471 flags = (unsigned int)de_getbyte_p(&pos);
472 bytes_per_field = (flags&0x80) ? 17 : 7;
474 for(i=0; i<=6; i++) {
475 struct de_timestamp tmpts;
476 char tmpsz[32];
478 // Flag bits indicate which timestamps are present.
479 if(flags & (1<<i)) {
480 if(bytes_per_field==17) {
481 read_datetime17(c, d, pos, &tmpts);
483 else {
484 read_datetime7(c, d, pos, &tmpts);
486 de_snprintf(tmpsz, sizeof(tmpsz), "%s time", names[i]);
487 dbg_timestamp(c, &tmpts, tmpsz);
489 if(i==1 && tmpts.is_valid) { // Save the mod time
490 dr->rr_modtime = tmpts;
492 pos += bytes_per_field;
497 static void do_SUSP_ZF(deark *c, lctx *d, struct dir_record *dr,
498 i64 pos1, i64 len)
500 struct de_fourcc cmprtype;
501 i64 n;
502 i64 pos = pos1+4;
504 dr->is_specialfileformat = 1;
505 if(len<16) goto done;
507 dbuf_read_fourcc(c->infile, pos, &cmprtype, 2, 0x0);
508 de_dbg(c, "cmpr algorithm: '%s'", cmprtype.id_dbgstr);
509 pos += 2;
511 n = (i64)de_getbyte_p(&pos);
512 de_dbg(c, "header size: %u (%u bytes)", (unsigned int)n,
513 (unsigned int)(n*4));
515 n = (i64)de_getbyte_p(&pos);
516 de_dbg(c, "block size: 2^%u (%u bytes)", (unsigned int)n,
517 (unsigned int)(1U<<(unsigned int)n));
519 n = getu32bbo_p(c->infile, &pos);
520 de_dbg(c, "uncmpr. size: %"I64_FMT" bytes", n);
522 done:
526 static int is_SUSP_indicator(deark *c, i64 pos, i64 len)
528 u8 buf[6];
530 if(len<6) return 0;
531 de_read(buf, pos, 6);
532 if(buf[0]=='S' && buf[1]=='P' && buf[4]==0xbe && buf[5]==0xef) {
533 return 1;
535 return 0;
538 static void do_Apple_AA_HFS(deark *c, lctx *d, struct dir_record *dr, i64 pos1, i64 len)
540 unsigned int finder_flags;
541 struct de_fourcc type4cc;
542 struct de_fourcc creator4cc;
543 i64 pos = pos1+4;
545 de_dbg(c, "Apple AA/HFS extension at %"I64_FMT, pos1);
546 de_dbg_indent(c, 1);
547 dbuf_read_fourcc(c->infile, pos, &type4cc, 4, 0x0);
548 de_dbg(c, "type: '%s'", type4cc.id_dbgstr);
549 pos += 4;
550 dbuf_read_fourcc(c->infile, pos, &creator4cc, 4, 0x0);
551 de_dbg(c, "creator: '%s'", creator4cc.id_dbgstr);
552 pos += 4;
553 finder_flags = (unsigned int)de_getu16be_p(&pos);
554 de_dbg(c, "finder flags: 0x%04x", finder_flags);
555 de_dbg_indent(c, -1);
558 static void do_ARCHIMEDES(deark *c, lctx *d, struct dir_record *dr, i64 pos1, i64 len)
560 i64 pos = pos1;
561 struct de_riscos_file_attrs rfa;
563 de_dbg(c, "ARCHIMEDES extension at %"I64_FMT, pos1);
564 de_dbg_indent(c, 1);
565 if(len<10+12) goto done;
566 pos += 10; // signature
568 de_zeromem(&rfa, sizeof(struct de_riscos_file_attrs));
569 fmtutil_riscos_read_load_exec(c, c->infile, &rfa, pos);
570 dr->riscos_timestamp = rfa.mod_time;
571 pos += 8;
573 fmtutil_riscos_read_attribs_field(c, c->infile, &rfa, pos, 0);
574 dr->has_riscos_data = 1;
575 dr->rfa = rfa;
577 done:
578 de_dbg_indent(c, -1);
581 static void do_CDXA_dirdata(deark *c, lctx *d, struct dir_record *dr,
582 i64 pos1)
584 unsigned int attribs;
586 de_dbg(c, "CD-ROM XA data at %"I64_FMT, pos1);
587 de_dbg_indent(c, 1);
588 attribs = (unsigned int)de_getu16be(pos1+4);
589 de_dbg(c, "attribs: 0x%04x", attribs);
590 de_dbg_indent(c, -1);
593 // Decode a contiguous set of SUSP entries.
594 // Does not follow a "CE" continuation entry, but returns info about it.
595 static void do_dir_rec_SUSP_set(deark *c, lctx *d, struct dir_record *dr,
596 i64 pos1, i64 len,
597 i64 *ca_blk, i64 *ca_offs, i64 *ca_len)
599 i64 pos = pos1;
600 int saved_indent_level;
602 de_dbg_indent_save(c, &saved_indent_level);
604 de_dbg(c, "SUSP data at %"I64_FMT", len=%d", pos1, (int)len);
605 de_dbg_indent(c, 1);
607 while(1) {
608 struct de_fourcc sig4cc;
609 i64 itempos;
610 i64 itemlen, dlen;
611 u8 itemver;
613 itempos = pos;
614 if(itempos+4 > pos1+len) break;
615 dbuf_read_fourcc(c->infile, pos, &sig4cc, 2, 0x0);
616 pos += 2;
617 itemlen = (i64)de_getbyte_p(&pos);
618 if(itemlen<4) break;
619 dlen = itemlen-4;
620 if(itempos+itemlen > pos1+len) break;
621 itemver = de_getbyte_p(&pos);
622 de_dbg(c, "entry '%s' at %"I64_FMT", len=%d, ver=%u, dlen=%d",
623 sig4cc.id_dbgstr, itempos, (int)itemlen, (unsigned int)itemver, (int)dlen);
625 de_dbg_indent(c, 1);
626 switch(sig4cc.id) {
627 case CODE_SP:
628 do_SUSP_SP(c, d, dr, itempos, itemlen);
629 break;
630 case CODE_CE:
631 do_SUSP_CE(c, d, dr, itempos, itemlen, ca_blk, ca_offs, ca_len);
632 break;
633 case CODE_ER:
634 do_SUSP_ER(c, d, dr, itempos, itemlen);
635 break;
636 case CODE_ST:
637 goto done;
638 case CODE_NM:
639 do_SUSP_rockridge_NM(c, d, dr, itempos, itemlen);
640 break;
641 case CODE_PX:
642 do_SUSP_rockridge_PX(c, d, dr, itempos, itemlen);
643 break;
644 case CODE_TF:
645 do_SUSP_rockridge_TF(c, d, dr, itempos, itemlen);
646 break;
647 case CODE_SF:
648 dr->is_specialfileformat = 1; // Sparse file
649 break;
650 case CODE_ZF: // zisofs
651 do_SUSP_ZF(c, d, dr, itempos, itemlen);
652 break;
653 default:
654 if(sig4cc.id==CODE_AA && itemlen==14 && itemver==2) {
655 // Apple AA extensions are not SUSP, but I've seen them used
656 // as SUSP anyway. They're sufficiently compatible.
657 do_Apple_AA_HFS(c, d, dr, itempos, itemlen);
659 else if(c->debug_level>=2) {
660 de_dbg_hexdump(c, c->infile, pos, itemlen-4, 256, NULL, 0x1);
663 pos = itempos+itemlen;
664 de_dbg_indent(c, -1);
667 done:
668 de_dbg_indent_restore(c, saved_indent_level);
671 static void do_dir_rec_SUSP(deark *c, lctx *d, struct dir_record *dr,
672 i64 pos1, i64 len1)
674 i64 pos = pos1;
675 i64 len = len1;
677 while(1) {
678 i64 ca_blk = 0;
679 i64 ca_offs = 0;
680 i64 ca_len = 0;
682 do_dir_rec_SUSP_set(c, d, dr, pos, len, &ca_blk, &ca_offs, &ca_len);
684 if(ca_blk==0) {
685 break;
688 // Prepare to jump to a continuation area
690 pos = sector_dpos(d, ca_blk) + ca_offs;
692 // Prevent loops
693 if(!de_inthashtable_add_item(c, d->dirs_seen, pos, NULL)) {
694 break;
697 len = ca_len;
701 static void do_dir_rec_system_use_area(deark *c, lctx *d, struct dir_record *dr,
702 i64 pos1, i64 len)
704 i64 pos = pos1;
705 int non_SUSP_handled = 0;
706 i64 non_SUSP_len = len; // default
707 i64 SUSP_len = 0; // default
709 de_dbg(c, "[%"I64_FMT" bytes of system use data at %"I64_FMT"]", len, pos1);
711 if(dr->is_root_dot) {
712 if(is_SUSP_indicator(c, pos, len)) {
713 d->uses_SUSP = 1;
714 non_SUSP_len = 0;
715 SUSP_len = len;
718 else if(d->uses_SUSP) {
719 non_SUSP_len = d->SUSP_default_bytes_to_skip;
720 SUSP_len = len - d->SUSP_default_bytes_to_skip;
723 if(non_SUSP_len>0) {
724 u8 buf[10];
726 // TODO: Detect & handle more non-SUSP formats here.
727 // - Apple AA/ProDOS
728 // - Apple BA
730 de_zeromem(buf, sizeof(buf));
731 de_read(buf, pos, de_min_int(sizeof(buf), non_SUSP_len));
733 if(d->vol->is_cdxa && non_SUSP_len>=14 && buf[6]=='X' && buf[7]=='A') {
734 do_CDXA_dirdata(c, d, dr, pos);
735 non_SUSP_handled = 1;
737 else if(non_SUSP_len>=14 && buf[0]=='A' && buf[1]=='A' && buf[2]==0x0e &&
738 buf[3]==0x02)
740 // TODO: Support XA + AA
741 do_Apple_AA_HFS(c, d, dr, pos, non_SUSP_len);
742 non_SUSP_handled = 1;
744 else if(non_SUSP_len>=10 && !de_memcmp(buf, "ARCHIMEDES", 10)) {
745 do_ARCHIMEDES(c, d, dr, pos, non_SUSP_len);
746 non_SUSP_handled = 1;
750 if(non_SUSP_len>0 && !non_SUSP_handled) {
751 de_dbg(c, "[unidentified system use data]");
752 if(c->debug_level>=2) {
753 de_dbg_indent(c, 1);
754 de_dbg_hexdump(c, c->infile, pos, non_SUSP_len, 256, NULL, 0x1);
755 de_dbg_indent(c, -1);
759 if(d->uses_SUSP && SUSP_len>0) {
760 do_dir_rec_SUSP(c, d, dr, pos+non_SUSP_len, SUSP_len);
762 // TODO?: There can potentially also be non-SUSP data *after* the SUSP data,
763 // but I don't know if we need to worry about that.
766 static void name_to_lowercase(de_ucstring *s)
768 i64 i;
770 if(!s) return;
771 for(i=0; i<s->len; i++) {
772 if(s->str[i]>='A' && s->str[i]<='Z') {
773 s->str[i] += 32;
778 static void do_directory(deark *c, lctx *d, i64 pos1, i64 len, int nesting_level);
780 // Caller allocates dr
781 static int do_directory_record(deark *c, lctx *d, i64 pos1, struct dir_record *dr, int nesting_level)
783 i64 n;
784 i64 pos = pos1;
785 i64 sys_use_len;
786 u8 b;
787 u8 specialfnbyte;
788 de_ucstring *tmps = NULL;
789 int retval = 0;
790 de_ext_encoding file_id_encoding;
792 dr->len_dir_rec = (i64)de_getbyte_p(&pos);
793 de_dbg(c, "dir rec len: %u", (unsigned int)dr->len_dir_rec);
794 if(dr->len_dir_rec<1) goto done;
796 dr->len_ext_attr_rec = (i64)de_getbyte_p(&pos);
797 de_dbg(c, "ext attrib rec len: %u", (unsigned int)dr->len_ext_attr_rec);
799 dr->extent_blk = getu32bbo_p(c->infile, &pos);
800 de_dbg(c, "loc. of extent: %"I64_FMT" (block #%u)", sector_dpos(d, dr->extent_blk),
801 (unsigned int)dr->extent_blk);
802 dr->data_len = getu32bbo_p(c->infile, &pos);
803 de_dbg(c, "data length: %u", (unsigned int)dr->data_len);
805 read_datetime7(c, d, pos, &dr->recording_time);
806 dbg_timestamp(c, &dr->recording_time, "recording time");
807 pos += 7;
809 dr->file_flags = de_getbyte_p(&pos);
810 tmps = ucstring_create(c);
811 if(dr->file_flags & 0x01) ucstring_append_flags_item(tmps, "hidden");
812 if(dr->file_flags & 0x02) {
813 ucstring_append_flags_item(tmps, "directory");
814 dr->is_dir = 1;
816 if(dr->file_flags & 0x04) {
817 ucstring_append_flags_item(tmps, "associated file");
819 if(dr->file_flags & 0x08) {
820 ucstring_append_flags_item(tmps, "record format");
821 dr->is_specialfileformat = 1;
823 if(dr->file_flags & 0x10) ucstring_append_flags_item(tmps, "protected");
824 if(dr->file_flags & 0x80) {
825 ucstring_append_flags_item(tmps, "multi-extent");
826 dr->is_specialfileformat = 1;
828 de_dbg(c, "file flags: 0x%02x (%s)", (unsigned int)dr->file_flags,
829 ucstring_getpsz_d(tmps));
831 b = de_getbyte_p(&pos);
832 de_dbg(c, "file unit size: %u", (unsigned int)b);
834 b = de_getbyte_p(&pos);
835 de_dbg(c, "interleave gap size: %u", (unsigned int)b);
836 if(b!=0) {
837 dr->is_specialfileformat = 1;
840 n = getu16bbo_p(c->infile, &pos);
841 de_dbg(c, "volume sequence number: %u", (unsigned int)n);
842 dr->file_id_len = (i64)de_getbyte_p(&pos);
844 if(dr->is_dir && dr->file_id_len==1) {
845 // Peek at the first (& only) byte of the filename.
846 specialfnbyte = de_getbyte(pos);
848 else {
849 specialfnbyte = 0xff;
852 if(specialfnbyte==0x00 || specialfnbyte==0x01) {
853 // To better display the "thisdir" and "parentdir" directory entries
854 file_id_encoding = DE_EXTENC_MAKE(DE_ENCODING_ASCII, DE_ENCSUBTYPE_PRINTABLE);
856 else if(d->vol->encoding!=DE_ENCODING_UNKNOWN) {
857 file_id_encoding = d->vol->encoding;
859 else if(d->uses_SUSP) {
860 // We're using the user_req_encoding for the Rock Ridge names,
861 // so don't use it here.
862 file_id_encoding = DE_ENCODING_ASCII;
864 else if(d->user_req_encoding!=DE_ENCODING_UNKNOWN) {
865 file_id_encoding = d->user_req_encoding;
867 else {
868 file_id_encoding = DE_ENCODING_ASCII;
871 dr->fname = ucstring_create(c);
872 dbuf_read_to_ucstring(c->infile, pos, dr->file_id_len, dr->fname, 0, file_id_encoding);
873 de_dbg(c, "file id: \"%s\"", ucstring_getpsz_d(dr->fname));
875 if(d->names_to_lowercase && !d->vol->is_joliet) {
876 name_to_lowercase(dr->fname);
879 if(specialfnbyte==0x00) {
880 dr->is_thisdir = 1;
881 if(nesting_level==0) {
882 dr->is_root_dot = 1;
885 else if(specialfnbyte==0x01) {
886 dr->is_parentdir = 1;
888 pos += dr->file_id_len;
890 if((dr->file_id_len%2)==0) pos++; // padding byte
892 // System Use area
893 sys_use_len = pos1+dr->len_dir_rec-pos;
894 if(sys_use_len>0) {
895 do_dir_rec_system_use_area(c, d, dr, pos, sys_use_len);
898 if(dr->has_riscos_data && (dr->rfa.attribs&0x100)) {
899 // Based on what Linux does, and other evidence: If a certain attribute bit
900 // is set, the filename is supposed to start with an exclamation point.
901 if(ucstring_isnonempty(dr->fname)) {
902 if(dr->fname->str[0]=='_') {
903 dr->fname->str[0] = '!';
908 if(dr->len_ext_attr_rec>0) {
909 // TODO
910 de_err(c, "Can't handle files with extended attribute records");
911 goto done;
914 if(dr->is_dir && !dr->is_thisdir && !dr->is_parentdir) {
915 do_extract_file(c, d, dr);
916 if(ucstring_isnonempty(dr->rr_name)) {
917 de_strarray_push(d->curpath, dr->rr_name);
919 else {
920 de_strarray_push(d->curpath, dr->fname);
922 do_directory(c, d, sector_dpos(d, dr->extent_blk), dr->data_len, nesting_level+1);
923 de_strarray_pop(d->curpath);
925 else if(!dr->is_dir) {
926 do_extract_file(c, d, dr);
928 else if(dr->is_root_dot) {
929 do_extract_file(c, d, dr);
932 retval = 1;
934 done:
935 ucstring_destroy(tmps);
936 return retval;
939 // A sequence of directory_records
940 static void do_directory(deark *c, lctx *d, i64 pos1, i64 len, int nesting_level)
942 struct dir_record *dr = NULL;
943 i64 pos = pos1;
944 int saved_indent_level;
945 int idx = 0;
947 de_dbg_indent_save(c, &saved_indent_level);
948 if(pos1<=0) goto done;
950 if((len>=0x08000000LL) & (d->dirsize_hack_state!=0)) {
951 // A few CDs seem to have garbage in the high bits of the directory length fields.
952 // Examples:
953 // https://archive.org/details/NIghtsOwl
954 // https://archive.org/details/SDN1__793
956 if(d->dirsize_hack_state<0) {
957 de_warn(c, "Possibly corrupt directory length found (0x%08x). Enabling workaround.", (UI)len);
958 d->dirsize_hack_state = 1;
960 len &= 0x07ffffffLL;
963 de_dbg(c, "directory at %"I64_FMT", len=%"I64_FMT, pos1, len);
964 de_dbg_indent(c, 1);
966 if(!de_inthashtable_add_item(c, d->dirs_seen, pos1, NULL)) {
967 de_warn(c, "Duplicate directory or loop detected (@%"I64_FMT")", pos1);
968 goto done;
971 if(nesting_level>MAX_NESTING_LEVEL) {
972 de_err(c, "Maximum directory nesting level exceeded");
973 goto done;
976 if(pos1+len > c->infile->len) {
977 de_warn(c, "Directory at %"I64_FMT" goes beyond end of file (size=%"I64_FMT")",
978 pos1, len);
981 while(1) {
982 int ret;
984 if(pos >= pos1+len) break;
985 if(pos >= c->infile->len) break;
987 // Peek at the first byte of the dir record (the length)
988 if(pos%d->secsize != 0) {
989 if(de_getbyte(pos)==0) {
990 // No more dir records in this sector; advance to the next sector
991 pos = de_pad_to_n(pos, d->secsize);
994 if(pos >= pos1+len) break;
995 if(pos >= c->infile->len) break;
998 de_dbg(c, "file/dir record at %"I64_FMT" (item[%d] in dir@%"I64_FMT")", pos,
999 idx, pos1);
1000 dr = de_malloc(c, sizeof(struct dir_record));
1001 de_dbg_indent(c, 1);
1002 ret = do_directory_record(c, d, pos, dr, nesting_level);
1003 de_dbg_indent(c, -1);
1004 if(!ret) break;
1005 if(dr->len_dir_rec<1) break;
1007 pos += dr->len_dir_rec; // + ext_len??
1008 free_dir_record(c, dr);
1009 dr = NULL;
1010 idx++;
1013 done:
1014 if(dr) free_dir_record(c, dr);
1015 de_dbg_indent_restore(c, saved_indent_level);
1018 static void do_boot_volume_descr(deark *c, lctx *d, i64 pos1)
1020 de_ucstring *tmpstr = NULL;
1021 struct de_stringreaderdata *boot_sys_id = NULL;
1022 i64 pos = pos1 + 7;
1023 i64 n;
1025 tmpstr = ucstring_create(c);
1026 boot_sys_id = dbuf_read_string(c->infile, pos, 32, 32, DE_CONVFLAG_STOP_AT_NUL,
1027 DE_ENCODING_ASCII);
1028 pos += 32;
1029 de_dbg(c, "boot system id: \"%s\"", ucstring_getpsz(boot_sys_id->str));
1031 handle_iso_string_p(c, d, NULL, "boot id", &pos, 32, tmpstr);
1033 if(!de_strcmp(boot_sys_id->sz, "EL TORITO SPECIFICATION")) {
1034 n = de_getu32le_p(&pos);
1035 de_dbg(c, "first sector of boot catalog: %u", (unsigned int)n);
1038 ucstring_destroy(tmpstr);
1039 de_destroy_stringreaderdata(c, boot_sys_id);
1042 static void read_escape_sequences(deark *c, lctx *d, struct vol_record *vol, i64 pos)
1044 u8 es[8];
1046 de_dbg(c, "escape sequences:");
1047 de_dbg_indent(c, 1);
1048 de_dbg_hexdump(c, c->infile, pos, 32, 32, NULL, 0);
1049 de_read(es, pos, sizeof(es));
1051 // 40, 43, 45 are for UCS-2.
1052 // 4a-4c are for UTF-16, probably not used by Joliet since it predates UTF-16,
1053 // but it shouldn't hurt to allow it.
1054 if(es[0]==0x25 && es[1]==0x2f && (es[2]==0x40 || es[2]==0x43 || es[2]==0x45 ||
1055 es[2]==0x4a || es[2]==0x4b || es[3]==0x4c))
1057 vol->is_joliet = 1;
1058 vol->encoding = DE_ENCODING_UTF16BE;
1060 de_dbg(c, "is joliet: %u", (unsigned int)vol->is_joliet);
1061 de_dbg_indent(c, -1);
1064 static void do_primary_or_suppl_volume_descr_internal(deark *c, lctx *d,
1065 struct vol_record *vol, i64 secnum, i64 pos1, int is_primary)
1067 i64 pos = pos1 + 7;
1068 i64 vol_space_size;
1069 i64 vol_set_size;
1070 i64 vol_seq_num;
1071 i64 n;
1072 unsigned int vol_flags;
1073 u32 crc;
1074 int is_dup;
1075 de_ucstring *tmpstr = NULL;
1076 struct de_timestamp tmpts;
1078 // Check whether this is a copy of a previous descriptor
1079 if(!d->crco) {
1080 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC32_IEEE);
1082 de_crcobj_reset(d->crco);
1083 de_crcobj_addslice(d->crco, c->infile, pos1, d->secsize);
1084 crc = de_crcobj_getval(d->crco);
1086 is_dup = (de_inthashtable_add_item(c, d->voldesc_crc_hash, (i64)crc, NULL) == 0);
1087 // False positives are *possible*, but note that we always allow the first
1088 // primary descriptor (multiple unique primary descriptors are not allowed), and
1089 // the first supplemental descriptor (multiple unique supplemental descriptors
1090 // are rare).
1091 if(is_primary) {
1092 if(d->primary_vol_desc_count==0) is_dup = 0;
1093 d->primary_vol_desc_count++;
1095 else {
1096 if(d->suppl_vol_desc_count==0) is_dup = 0;
1097 d->suppl_vol_desc_count++;
1100 if(is_dup) {
1101 de_dbg(c, "[this is an extra copy of a previous volume descriptor]");
1102 if(d->vol_desc_sector_forced && (secnum==d->vol_desc_sector_to_use)) {
1103 ; // ... but we have to read it anyway.
1105 else {
1106 vol->quality = 0;
1107 goto done;
1110 /////////
1112 vol->encoding = DE_ENCODING_UNKNOWN;
1114 if(!is_primary) {
1115 vol_flags = de_getbyte(pos);
1116 de_dbg(c, "volume flags: 0x%02x", vol_flags);
1118 pos++;
1120 if(!is_primary) {
1121 // Look ahead at the escape sequences field, because fields that appear
1122 // before it may depend on it.
1123 read_escape_sequences(c, d, vol, pos1+88);
1126 tmpstr = ucstring_create(c);
1127 handle_iso_string_p(c, d, vol, "system id", &pos, 32, tmpstr);
1128 handle_iso_string_p(c, d, vol, "volume id", &pos, 32, tmpstr);
1130 pos += 8; // 73-80 unused
1132 vol_space_size = getu32bbo_p(c->infile, &pos);
1133 de_dbg(c, "volume space size: %"I64_FMT" blocks", vol_space_size);
1135 pos += 32; // escape sequences (already read) or unused
1137 vol_set_size = getu16bbo_p(c->infile, &pos);
1138 de_dbg(c, "volume set size: %u", (unsigned int)vol_set_size);
1139 vol_seq_num = getu16bbo_p(c->infile, &pos);
1140 de_dbg(c, "volume sequence number: %u", (unsigned int)vol_seq_num);
1141 vol->block_size = getu16bbo_p(c->infile, &pos);
1142 de_dbg(c, "block size: %u bytes", (unsigned int)vol->block_size);
1143 if(vol->block_size==0) {
1144 if(!d->blocksize_warned) {
1145 de_warn(c, "Block size not set. Assuming 2048.");
1146 d->blocksize_warned = 1;
1148 vol->block_size = 2048;
1150 n = getu32bbo_p(c->infile, &pos);
1151 de_dbg(c, "path table size: %"I64_FMT" bytes", n);
1153 n = de_getu32le_p(&pos);
1154 de_dbg(c, "loc. of type L path table: block #%u", (unsigned int)n);
1155 n = de_getu32le_p(&pos);
1156 de_dbg(c, "loc. of optional type L path table: block #%u", (unsigned int)n);
1157 n = de_getu32be_p(&pos);
1158 de_dbg(c, "loc. of type M path table: block #%u", (unsigned int)n);
1159 n = de_getu32be_p(&pos);
1160 de_dbg(c, "loc. of optional type M path table: block #%u", (unsigned int)n);
1162 de_dbg(c, "dir record for root dir");
1163 de_dbg_indent(c, 1);
1164 // This is a copy of the main information in the root directory's
1165 // directory entry, basically for bootstrapping.
1166 // It should be effectively identical to the "." entry in the root
1167 // directory. The only fields we care about:
1168 vol->root_dir_extent_blk = getu32bbo(c->infile, pos+2);
1169 de_dbg(c, "loc. of extent: block #%u", (unsigned int)vol->root_dir_extent_blk);
1170 vol->root_dir_data_len = getu32bbo(c->infile, pos+10);
1171 de_dbg(c, "data length: %u", (unsigned int)vol->root_dir_data_len);
1173 de_dbg_indent(c, -1);
1174 pos += 34;
1176 handle_iso_string_p(c, d, vol, "volume set id", &pos, 128, tmpstr);
1177 handle_iso_string_p(c, d, vol, "publisher id", &pos, 128, tmpstr);
1178 handle_iso_string_p(c, d, vol, "data preparer id", &pos, 128, tmpstr);
1179 handle_iso_string_p(c, d, vol, "application id", &pos, 128, tmpstr);
1180 handle_iso_string_p(c, d, vol, "copyright file id", &pos, 37, tmpstr);
1181 handle_iso_string_p(c, d, vol, "abstract file id", &pos, 37, tmpstr);
1182 handle_iso_string_p(c, d, vol, "bibliographic file id", &pos, 37, tmpstr);
1184 read_datetime17(c, d, pos, &tmpts);
1185 dbg_timestamp(c, &tmpts, "volume creation time");
1186 pos += 17;
1188 read_datetime17(c, d, pos, &tmpts);
1189 dbg_timestamp(c, &tmpts, "volume mod time");
1190 pos += 17;
1192 read_datetime17(c, d, pos, &tmpts);
1193 dbg_timestamp(c, &tmpts, "volume expiration time");
1194 pos += 17;
1196 read_datetime17(c, d, pos, &tmpts);
1197 dbg_timestamp(c, &tmpts, "volume effective time");
1198 pos += 17;
1200 vol->file_structure_version = de_getbyte_p(&pos);
1201 de_dbg(c, "file structure version: %u", (unsigned int)vol->file_structure_version);
1203 vol->is_cdxa = !dbuf_memcmp(c->infile, pos1+1024, "CD-XA001", 8);
1204 de_dbg(c, "is CD-ROM XA: %u", (unsigned int)vol->is_cdxa);
1206 vol->quality = 1 +
1207 ((vol->block_size==2048)?80:0) +
1208 ((vol->is_joliet)?40:0) +
1209 ((vol->file_structure_version<=1)?10:0) +
1210 ((vol->file_structure_version==1)?10:0) +
1211 ((is_primary)?5:0);
1213 done:
1214 ucstring_destroy(tmpstr);
1217 static void do_primary_or_suppl_volume_descr(deark *c, lctx *d, i64 secnum,
1218 i64 pos1, int is_primary)
1220 struct vol_record *newvol;
1222 newvol = de_malloc(c, sizeof(struct vol_record));
1223 newvol->secnum = secnum;
1225 do_primary_or_suppl_volume_descr_internal(c, d, newvol, secnum, pos1, is_primary);
1227 if(newvol->quality==0) goto done; // not usable
1228 if(d->vol_desc_sector_forced && (secnum!=d->vol_desc_sector_to_use)) {
1229 // User told us not to use this volume descriptor.
1230 goto done;
1233 if(d->vol) {
1234 // We already have a volume descriptor. Is the new one preferable?
1235 if(newvol->quality > d->vol->quality) {
1236 de_free(c, d->vol);
1237 d->vol = newvol;
1238 newvol = NULL;
1241 else {
1242 d->vol = newvol;
1243 newvol = NULL;
1246 done:
1247 if(newvol) de_free(c, newvol);
1250 // Returns 0 if this is a terminator, or on serious error.
1251 // Returns 1 normally.
1252 static int do_volume_descriptor(deark *c, lctx *d, i64 secnum)
1254 u8 dtype;
1255 u8 dvers;
1256 int saved_indent_level;
1257 i64 pos1, pos;
1258 const char *vdtname;
1259 int retval = 0;
1260 enum voldesctype_enum vdt = VOLDESCTYPE_UNKNOWN;
1261 struct de_stringreaderdata *standard_id = NULL;
1263 de_dbg_indent_save(c, &saved_indent_level);
1265 pos1 = sector_dpos(d, secnum);
1266 pos = pos1;
1268 dtype = de_getbyte_p(&pos);
1269 standard_id = dbuf_read_string(c->infile, pos, 5, 5, 0, DE_ENCODING_ASCII);
1270 pos += 5;
1271 dvers = de_getbyte_p(&pos);
1273 if(!de_strcmp(standard_id->sz, "CD001")) {
1274 switch(dtype) {
1275 case 0: vdt = VOLDESCTYPE_CD_BOOT; break;
1276 case 1: vdt = VOLDESCTYPE_CD_PRIMARY; break;
1277 case 2: vdt = VOLDESCTYPE_CD_SUPPL; break;
1278 case 3: vdt = VOLDESCTYPE_CD_PARTDESCR; break;
1279 case 0xff: vdt = VOLDESCTYPE_CD_TERM; break;
1280 default: vdt = VOLDESCTYPE_OTHERVALID; break;
1283 else if(!de_strncmp(standard_id->sz, "NSR0", 4))
1285 vdt = VOLDESCTYPE_NSR;
1287 else if(!de_strncmp(standard_id->sz, "BEA0", 4)) {
1288 vdt = VOLDESCTYPE_BEA;
1290 else if(!de_strncmp(standard_id->sz, "TEA0", 4)) {
1291 vdt = VOLDESCTYPE_TEA;
1293 else if(!de_strncmp(standard_id->sz, "BOOT", 4) ||
1294 !de_strncmp(standard_id->sz, "CDW0", 4))
1296 vdt = VOLDESCTYPE_OTHERVALID;
1299 if(vdt==VOLDESCTYPE_UNKNOWN) {
1300 de_warn(c, "Expected volume descriptor at %"I64_FMT" not found", pos1);
1301 goto done;
1304 de_dbg(c, "volume descriptor at %"I64_FMT" (sector %d)", pos1, (int)secnum);
1305 de_dbg_indent(c, 1);
1307 de_dbg(c, "type: %u", (unsigned int)dtype);
1308 de_dbg(c, "standard id: \"%s\"", ucstring_getpsz_d(standard_id->str));
1309 de_dbg(c, "version: %u", (unsigned int)dvers);
1311 vdtname = get_vol_descr_type_name(vdt);
1312 de_dbg(c, "interpreted type: %s", vdtname);
1314 retval = 1;
1315 if(vdt==VOLDESCTYPE_TEA) {
1316 retval = 0;
1318 else if(vdt==VOLDESCTYPE_CD_TERM) {
1319 // Minor hack: Peak ahead at the next sector. Unless it looks like a
1320 // BEA descriptor, signifying that there are extended descriptors,
1321 // assume this is the last descriptor.
1322 if(dbuf_memcmp(c->infile, sector_dpos(d, secnum+1)+1, "BEA0", 4)) {
1323 retval = 0;
1327 switch(vdt) {
1328 case VOLDESCTYPE_CD_BOOT:
1329 do_boot_volume_descr(c, d, pos1);
1330 break;
1331 case VOLDESCTYPE_CD_PRIMARY:
1332 do_primary_or_suppl_volume_descr(c, d, secnum, pos1, 1);
1333 break;
1334 case VOLDESCTYPE_CD_SUPPL: // supplementary or enhanced
1335 do_primary_or_suppl_volume_descr(c, d, secnum, pos1, 0);
1336 break;
1337 case VOLDESCTYPE_NSR:
1338 d->is_udf = 1;
1339 break;
1340 case VOLDESCTYPE_BEA:
1341 case VOLDESCTYPE_CD_TERM:
1342 case VOLDESCTYPE_TEA:
1343 break;
1344 default:
1345 de_dbg(c, "[disregarding this volume descriptor]");
1348 done:
1349 de_dbg_indent_restore(c, saved_indent_level);
1350 de_destroy_stringreaderdata(c, standard_id);
1351 return retval;
1354 static void de_run_iso9660(deark *c, de_module_params *mparams)
1356 lctx *d = NULL;
1357 i64 cursec;
1358 const char *s;
1360 d = de_malloc(c, sizeof(lctx));
1362 if(de_get_ext_option_bool(c, "iso9660:tolower", 0)) {
1363 d->names_to_lowercase = 1;
1366 d->dirsize_hack_state = de_get_ext_option_bool(c, "iso9660:dirsizehack", -1);
1368 s = de_get_ext_option(c, "iso9660:voldesc");
1369 if(s) {
1370 d->vol_desc_sector_forced = 1;
1371 d->vol_desc_sector_to_use = de_atoi(s);
1374 d->user_req_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_UNKNOWN);
1375 d->rr_encoding = (d->user_req_encoding==DE_ENCODING_UNKNOWN) ?
1376 DE_ENCODING_UTF8 : d->user_req_encoding;
1378 d->secsize = 2048;
1380 if(!dbuf_memcmp(c->infile, 512, "PM\x00\x00", 4)) {
1381 de_info(c, "Note: This file includes an Apple Partition Map. "
1382 "Use \"-m apm\" to read it.");
1385 d->voldesc_crc_hash = de_inthashtable_create(c);
1386 cursec = 16;
1387 while(1) {
1388 if(!do_volume_descriptor(c, d, cursec)) break;
1389 cursec++;
1392 if(d->is_udf) {
1393 de_warn(c, "This file might have UDF-specific content, which is "
1394 "not supported.");
1397 if(!d->vol) {
1398 de_err(c, "No usable volume descriptor found");
1399 goto done;
1402 de_dbg(c, "[using volume descriptor at sector %u]", (unsigned int)d->vol->secnum);
1404 if(d->vol->block_size != 2048) {
1405 // TODO: Figure out sector size vs. block size.
1406 de_err(c, "Unsupported block size: %u", (unsigned int)d->vol->block_size);
1407 goto done;
1410 d->dirs_seen = de_inthashtable_create(c);
1411 d->curpath = de_strarray_create(c, MAX_NESTING_LEVEL+10);
1413 if(d->vol->root_dir_extent_blk) {
1414 do_directory(c, d, sector_dpos(d, d->vol->root_dir_extent_blk),
1415 d->vol->root_dir_data_len, 0);
1418 done:
1419 if(d) {
1420 de_free(c, d->vol);
1421 de_strarray_destroy(d->curpath);
1422 de_inthashtable_destroy(c, d->dirs_seen);
1423 de_inthashtable_destroy(c, d->voldesc_crc_hash);
1424 de_crcobj_destroy(d->crco);
1425 de_free(c, d);
1429 static int cdsig_at(dbuf *f, i64 pos)
1431 u8 buf[6];
1433 dbuf_read(f, buf, pos, sizeof(buf));
1434 if(de_memcmp(&buf[1], "CD001", 5)) return 0;
1435 if(buf[0]>3 && buf[0]<255) return 0;
1436 return 1;
1439 static int cdsig_at2(dbuf *f, i64 pos1, i64 pos2)
1441 return (cdsig_at(f, pos1) &&
1442 cdsig_at(f, pos2));
1445 static int de_identify_iso9660(deark *c)
1447 if(cdsig_at2(c->infile, 32768, 32768+2048)) {
1448 // Confidence is practically 100%, but since hybrid formats are
1449 // possible, we want other modules to be able to have precedence.
1450 return 80;
1452 return 0;
1455 static void de_help_iso9660(deark *c)
1457 de_msg(c, "-opt iso9660:tolower : Convert original-style filenames to lowercase.");
1458 de_msg(c, "-opt iso9660:voldesc=<n> : Use the volume descriptor at sector <n>.");
1459 de_msg(c, "-opt iso9660:dirsizehack=0 : Disable a workaround for bad directory lengths.");
1462 void de_module_iso9660(deark *c, struct deark_module_info *mi)
1464 mi->id = "iso9660";
1465 mi->desc = "ISO 9660 (CD-ROM) image";
1466 mi->run_fn = de_run_iso9660;
1467 mi->identify_fn = de_identify_iso9660;
1468 mi->help_fn = de_help_iso9660;
1471 struct cdraw_params {
1472 int ok;
1473 i64 sector_total_len;
1474 i64 sector_dlen;
1475 i64 sector_data_offset;
1476 const char *ext;
1479 // If the volume has an ISO 9660 "volume identifier", try to read it to use as
1480 // part of the output filename.
1481 // This is quick and dirty, and somewhat duplicates code from the iso9660 module.
1482 static void cdraw_set_name_from_vol_id(deark *c, struct cdraw_params *cdrp, de_finfo *fi)
1484 de_ucstring *vol_id = NULL;
1485 i64 pos;
1487 pos = 16*cdrp->sector_total_len + cdrp->sector_data_offset;
1488 if(dbuf_memcmp(c->infile, pos, "\x01" "CD001", 6)) goto done;
1490 vol_id = ucstring_create(c);
1491 dbuf_read_to_ucstring(c->infile, pos+40, 32, vol_id, DE_CONVFLAG_STOP_AT_NUL,
1492 DE_ENCODING_ASCII);
1493 ucstring_strip_trailing_spaces(vol_id);
1495 if(ucstring_isnonempty(vol_id)) {
1496 de_dbg(c, "iso9660 volume id: \"%s\"", ucstring_getpsz_d(vol_id));
1497 de_finfo_set_name_from_ucstring(c, fi, vol_id, 0);
1500 done:
1501 ucstring_destroy(vol_id);
1504 static void do_cdraw_convert(deark *c, struct cdraw_params *cdrp)
1506 i64 pos;
1507 de_finfo *fi = NULL;
1508 dbuf *outf = NULL;
1510 fi = de_finfo_create(c);
1511 cdraw_set_name_from_vol_id(c, cdrp, fi);
1513 outf = dbuf_create_output_file(c, cdrp->ext, fi, 0x0);
1515 pos = cdrp->sector_data_offset;
1516 while(1) {
1517 if(pos >= c->infile->len) break;
1518 dbuf_copy(c->infile, pos, cdrp->sector_dlen, outf);
1519 pos += cdrp->sector_total_len;
1522 dbuf_close(outf);
1523 de_finfo_destroy(c, fi);
1526 static void cdraw_setdefaults(struct cdraw_params *cdrp)
1528 cdrp->ok = 0;
1529 cdrp->sector_total_len = 2048;
1530 cdrp->sector_dlen = 2048;
1531 cdrp->sector_data_offset = 0;
1532 cdrp->ext = "bin";
1535 static int syncbytes_at(dbuf *f, i64 pos)
1537 return !dbuf_memcmp(f, pos,
1538 "\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00", 12);
1541 static void cdraw_detect_params(dbuf *f, struct cdraw_params *cdrp)
1543 if(cdsig_at2(f, 2336*16+8, 2336*17+8)) {
1544 cdrp->ok = 1;
1545 cdrp->sector_total_len = 2336;
1546 cdrp->sector_data_offset = 8;
1547 cdrp->ext = "iso";
1548 return;
1550 if(cdsig_at2(f, 2352*16+16, 2352*17+16)) {
1551 cdrp->ok = 1;
1552 cdrp->sector_total_len = 2352;
1553 cdrp->sector_data_offset = 16;
1554 cdrp->ext = "iso";
1555 return;
1557 if(cdsig_at2(f, 2352*16+24, 2352*17+24)) {
1558 cdrp->ok = 1;
1559 cdrp->sector_total_len = 2352;
1560 cdrp->sector_data_offset = 24;
1561 cdrp->ext = "iso";
1562 return;
1564 if(cdsig_at2(f, 2448*16+16, 2448*17+16)) {
1565 cdrp->ok = 1;
1566 cdrp->sector_total_len = 2448;
1567 cdrp->sector_data_offset = 16;
1568 cdrp->ext = "iso";
1569 return;
1571 if(cdsig_at2(f, 2448*16+24, 2448*17+24)) {
1572 cdrp->ok = 1;
1573 cdrp->sector_total_len = 2448;
1574 cdrp->sector_data_offset = 24;
1575 cdrp->ext = "iso";
1576 return;
1578 if(syncbytes_at(f, 0)) {
1579 if(syncbytes_at(f, 2352)) {
1580 if(!dbuf_memcmp(f, 512+16, "PM", 2)) {
1581 cdrp->ok = 1;
1582 cdrp->sector_total_len = 2352;
1583 cdrp->sector_data_offset = 16;
1584 cdrp->ext = "apm";
1585 return;
1589 // TODO: More formats?
1592 static void de_run_cd_raw(deark *c, de_module_params *mparams)
1594 struct cdraw_params cdrp;
1596 cdraw_setdefaults(&cdrp);
1597 cdraw_detect_params(c->infile, &cdrp);
1598 if(!cdrp.ok) {
1599 de_err(c, "Failed to detect raw CD format");
1600 goto done;
1603 de_dbg(c, "total bytes/sector: %"I64_FMT, cdrp.sector_total_len);
1604 de_dbg(c, "data bytes/sector: %"I64_FMT, cdrp.sector_dlen);
1605 de_dbg(c, "data offset: %"I64_FMT, cdrp.sector_data_offset);
1607 do_cdraw_convert(c, &cdrp);
1609 done:
1613 static int de_identify_cd_raw(deark *c)
1615 struct cdraw_params cdrp;
1617 cdraw_setdefaults(&cdrp);
1618 cdraw_detect_params(c->infile, &cdrp);
1619 if(cdrp.ok) return 70;
1620 return 0;
1623 void de_module_cd_raw(deark *c, struct deark_module_info *mi)
1625 mi->id = "cd_raw";
1626 mi->desc = "Raw CD image";
1627 mi->run_fn = de_run_cd_raw;
1628 mi->identify_fn = de_identify_cd_raw;
1631 struct nrg_ctx {
1632 int ver;
1633 i64 chunk_list_start;
1634 i64 chunk_list_size;
1637 #define CODE_CDTX 0x43445458U
1638 #define CODE_CUES 0x43554553U
1639 #define CODE_CUEX 0x43554558U
1640 #define CODE_DAOI 0x44414f49U
1641 #define CODE_DAOX 0x44414f58U
1642 #define CODE_END_ 0x454e4421U // END!
1643 #define CODE_ETNF 0x45544e46U
1644 #define CODE_SINF 0x53494e46U
1646 static int detect_nrg_internal(deark *c)
1648 if(!dbuf_memcmp(c->infile, c->infile->len-8, "NERO", 4)) {
1649 return 1;
1651 if(!dbuf_memcmp(c->infile, c->infile->len-12, "NER5", 4)) {
1652 return 2;
1654 return 0;
1657 static void do_nrg_ETNF(deark *c, struct de_iffctx *ictx,
1658 const struct de_iffchunkctx *chunkctx)
1660 i64 pos = chunkctx->dpos;
1661 i64 t = 0;
1663 while(1) {
1664 i64 track_offs_bytes, track_len_bytes, start_lba;
1665 unsigned int mode;
1667 if(chunkctx->dpos + chunkctx->dlen - pos < 20) break;
1668 de_dbg(c, "track #%d", (int)t);
1669 de_dbg_indent(c, 1);
1670 track_offs_bytes = de_getu32be(pos);
1671 track_len_bytes = de_getu32be(pos+4);
1672 de_dbg(c, "offset: %"I64_FMT", len: %"I64_FMT, track_offs_bytes, track_len_bytes);
1673 mode = (unsigned int)de_getu32be(pos+8);
1674 de_dbg(c, "mode: %u", mode);
1675 start_lba = de_getu32be(pos+12);
1676 de_dbg(c, "start lba: %"I64_FMT, start_lba);
1677 de_dbg_indent(c, -1);
1678 pos += 20;
1679 t++;
1683 static int my_preprocess_nrg_chunk_fn(struct de_iffctx *ictx)
1685 const char *name = NULL;
1687 switch(ictx->chunkctx->chunk4cc.id) {
1688 case CODE_CDTX: name = "CD-text"; break;
1689 case CODE_CUES: case CODE_CUEX: name = "cue sheet"; break;
1690 case CODE_DAOI: case CODE_DAOX: name = "DAO info"; break;
1691 case CODE_ETNF: name = "extended track info"; break;
1692 case CODE_SINF: name = "session info"; break;
1695 if(name) {
1696 ictx->chunkctx->chunk_name = name;
1698 return 1;
1702 static int my_nrg_chunk_handler(struct de_iffctx *ictx)
1704 deark *c = ictx->c;
1706 // Always set this, because we never want the IFF parser to try to handle
1707 // a chunk itself.
1708 ictx->handled = 1;
1710 switch(ictx->chunkctx->chunk4cc.id) {
1711 case CODE_ETNF:
1712 do_nrg_ETNF(c, ictx, ictx->chunkctx);
1713 break;
1716 if(ictx->chunkctx->chunk4cc.id==CODE_END_) {
1717 return 0;
1719 return 1;
1722 static void do_nrg_chunks(deark *c, struct nrg_ctx *nrg)
1724 struct de_iffctx *ictx = NULL;
1726 ictx = fmtutil_create_iff_decoder(c);
1727 ictx->userdata = (void*)nrg;
1728 ictx->preprocess_chunk_fn = my_preprocess_nrg_chunk_fn;
1729 ictx->handle_chunk_fn = my_nrg_chunk_handler;
1730 ictx->f = c->infile;
1731 ictx->is_le = 0;
1732 ictx->reversed_4cc = 0;
1734 fmtutil_read_iff_format(ictx, nrg->chunk_list_start, nrg->chunk_list_size);
1735 fmtutil_destroy_iff_decoder(ictx);
1738 static void de_run_nrg(deark *c, de_module_params *mparams)
1740 struct cdraw_params cdrp;
1741 struct nrg_ctx *nrg = NULL;
1743 nrg = de_malloc(c, sizeof(struct nrg_ctx));
1745 nrg->ver = detect_nrg_internal(c);
1746 if(nrg->ver==0) {
1747 de_err(c, "Not in NRG format");
1748 goto done;
1751 if(nrg->ver==2) {
1752 nrg->chunk_list_start = de_geti64be(c->infile->len-8);
1753 nrg->chunk_list_size = c->infile->len - 12 - nrg->chunk_list_start;
1755 else {
1756 nrg->chunk_list_start = de_getu32be(c->infile->len-4);
1757 nrg->chunk_list_size = c->infile->len - 8 - nrg->chunk_list_start;
1759 de_dbg(c, "chunk list: offset=%"I64_FMT", len=%"I64_FMT,
1760 nrg->chunk_list_start, nrg->chunk_list_size);
1762 do_nrg_chunks(c, nrg);
1764 // TODO: The NRG data we just read probably tells us the image format,
1765 // somehow, so it seems wrong to autodetect it.
1767 if(cdsig_at2(c->infile, 32768, 32768+2048)) {
1768 de_dbg(c, "ISO 9660 image at %d", 0);
1769 de_dbg_indent(c, 1);
1770 de_run_module_by_id_on_slice(c, "iso9660", NULL, c->infile, 0, nrg->chunk_list_start);
1771 de_dbg_indent(c, -1);
1772 goto done;
1775 cdraw_setdefaults(&cdrp);
1776 cdraw_detect_params(c->infile, &cdrp);
1777 if(cdrp.ok) {
1778 de_dbg(c, "raw CD image at %d", 0);
1779 de_dbg_indent(c, 1);
1780 de_run_module_by_id_on_slice(c, "cd_raw", NULL, c->infile, 0, nrg->chunk_list_start);
1781 de_dbg_indent(c, -1);
1784 done:
1785 de_free(c, nrg);
1788 static int de_identify_nrg(deark *c)
1790 if(!de_input_file_has_ext(c, "nrg")) return 0;
1791 if(detect_nrg_internal(c)>0) {
1792 return 85;
1794 return 0;
1797 void de_module_nrg(deark *c, struct deark_module_info *mi)
1799 mi->id = "nrg";
1800 mi->desc = "NRG CD-ROM image";
1801 mi->run_fn = de_run_nrg;
1802 mi->identify_fn = de_identify_nrg;