lzhuf: Refactored to avoid direct array access
[deark.git] / modules / iff.c
bloba256af1915254ac97dc323da986b155748c99c57
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // IFF (Interchange File Format)
6 // MIDI
8 // Note that the IFF parser is actually implemented in fmtutil.c, not here.
9 // This module uses fmtutil to support unknown IFF formats, and IFF formats
10 // for which we have very little format-specific logic.
12 #include <deark-config.h>
13 #include <deark-private.h>
14 #include <deark-fmtutil.h>
15 DE_DECLARE_MODULE(de_module_iff);
16 DE_DECLARE_MODULE(de_module_midi);
18 #define FMT_FORM 1
19 #define FMT_FOR4 4
20 #define FMT_DJVU 10
22 #define CODE_8SVX 0x38535658U
23 #define CODE_AIFF 0x41494646U
24 #define CODE_CAT 0x43415420U
25 #define CODE_CAT4 0x43415434U
26 #define CODE_COMT 0x434f4d54U
27 #define CODE_FOR4 0x464f5234U
28 #define CODE_FORM 0x464f524dU
29 #define CODE_ID3 0x49443320U
30 #define CODE_LIS4 0x4c495334U
31 #define CODE_LIST 0x4c495354U
32 #define CODE_MThd 0x4d546864U
33 #define CODE_NAME 0x4e414d45U
35 typedef struct localctx_struct {
36 int fmt; // FMT_*
37 } lctx;
39 static void do_text_chunk(deark *c, struct de_iffctx *ictx, const char *name)
41 de_ucstring *s = NULL;
43 ictx->handled = 1;
44 s = ucstring_create(c);
45 // TODO: Sometimes this text is clearly not ASCII, but I've never seen
46 // a file with a "CSET" chunk, and I don't know how else I would know
47 // the character encoding.
48 dbuf_read_to_ucstring_n(c->infile,
49 ictx->chunkctx->dpos, ictx->chunkctx->dlen, DE_DBG_MAX_STRLEN,
50 s, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
51 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz(s));
52 ucstring_destroy(s);
55 static void do_id3_chunk(deark *c, struct de_iffctx *ictx)
57 if(dbuf_memcmp(ictx->f, ictx->chunkctx->dpos, "ID3", 3)) {
58 return;
60 de_dbg(c, "ID3v2 data at %"I64_FMT", len=%"I64_FMT, ictx->chunkctx->dpos, ictx->chunkctx->dlen);
61 de_dbg_indent(c, 1);
62 de_run_module_by_id_on_slice2(c, "id3", "I", ictx->f,
63 ictx->chunkctx->dpos, ictx->chunkctx->dlen);
64 de_dbg_indent(c, -1);
67 static void do_aiff_comt_chunk(deark *c, struct de_iffctx *ictx)
69 i64 pos = ictx->chunkctx->dpos;
70 i64 endpos = ictx->chunkctx->dpos + ictx->chunkctx->dlen;
71 i64 ncomments;
72 i64 i;
73 de_ucstring *s = NULL;
74 int saved_indent_level;
76 de_dbg_indent_save(c, &saved_indent_level);
77 ncomments = dbuf_getu16be_p(ictx->f, &pos);
78 de_dbg(c, "num comments: %d", (int)ncomments);
79 s = ucstring_create(c);
80 for(i=0; i<ncomments; i++) {
81 i64 textlen;
83 if(pos+8 >= endpos) goto done;
84 de_dbg(c, "comment at %"I64_FMT, pos);
85 de_dbg_indent(c, 1);
86 pos += 4; // timestamp
87 pos += 2; // MarkerID
88 textlen = dbuf_getu16be_p(ictx->f, &pos);
89 if(pos+textlen > endpos) goto done;
90 ucstring_empty(s);
91 dbuf_read_to_ucstring_n(ictx->f, pos, textlen, 1000, s, 0, DE_ENCODING_ASCII);
92 de_dbg(c, "text: \"%s\"", ucstring_getpsz_d(s));
93 pos += de_pad_to_2(textlen);
94 de_dbg_indent(c, -1);
97 done:
98 ucstring_destroy(s);
99 de_dbg_indent_restore(c, saved_indent_level);
102 static int is_container_chunk(deark *c, lctx *d, u32 ct)
104 if(d->fmt==FMT_FOR4) {
105 if(ct==CODE_FOR4 || ct==CODE_LIS4 || ct==CODE_CAT4) return 1;
107 else {
108 if(ct==CODE_FORM || ct==CODE_LIST || ct==CODE_CAT) return 1;
110 return 0;
113 static int my_std_container_start_fn(deark *c, struct de_iffctx *ictx)
115 if(ictx->level==0 &&
116 ictx->curr_container_fmt4cc.id==CODE_FORM &&
117 ictx->main_fmt4cc.id==CODE_FORM)
119 const char *fmtname = NULL;
121 switch(ictx->main_contentstype4cc.id) {
122 case CODE_8SVX: fmtname = "8SVX"; break;
123 case CODE_AIFF: fmtname = "AIFF"; break;
126 if(fmtname) {
127 de_declare_fmt(c, fmtname);
131 return 1;
134 static int my_iff_chunk_handler(deark *c, struct de_iffctx *ictx)
136 lctx *d = (lctx*)ictx->userdata;
138 ictx->is_std_container = is_container_chunk(c, d, ictx->chunkctx->chunk4cc.id);
139 if(ictx->is_std_container) goto done;
141 if(ictx->main_contentstype4cc.id==CODE_8SVX) {
142 switch(ictx->chunkctx->chunk4cc.id) {
143 case CODE_NAME:
144 // In 8SVX, the NAME chunk means "voice name". In other types
145 // of files, it presumably means some other sort of name.
146 do_text_chunk(c, ictx, "voice name");
147 break;
150 else if(ictx->main_contentstype4cc.id==CODE_AIFF) {
151 switch(ictx->chunkctx->chunk4cc.id) {
152 case CODE_COMT:
153 do_aiff_comt_chunk(c, ictx);
154 break;
155 case CODE_ID3:
156 do_id3_chunk(c, ictx);
157 break;
161 done:
162 return 1;
165 static int identify_internal(deark *c, int *confidence)
167 u8 buf[8];
169 de_read(buf, 0, sizeof(buf));
171 if(!de_memcmp(buf, (const u8*)"FORM", 4)) {
172 if(confidence) *confidence = 9;
173 return FMT_FORM;
175 if(!de_memcmp(buf, (const u8*)"FOR4", 4)) {
176 if(confidence) *confidence = 25;
177 return FMT_FOR4;
179 if(!de_memcmp(buf, (const u8*)"AT&TFORM", 8)) {
180 if(confidence) *confidence = 100;
181 return FMT_DJVU;
184 if(confidence) *confidence = 0;
185 return 0;
188 static void de_run_iff(deark *c, de_module_params *mparams)
190 lctx *d = NULL;
191 struct de_iffctx *ictx = NULL;
192 const char *s;
193 i64 pos;
196 d = de_malloc(c, sizeof(lctx));
197 ictx = de_malloc(c, sizeof(struct de_iffctx));
199 ictx->alignment = 2; // default
201 d->fmt = identify_internal(c, NULL);
203 if(d->fmt==FMT_FOR4) {
204 ictx->alignment = 4;
207 s = de_get_ext_option(c, "iff:align");
208 if(s) {
209 ictx->alignment = de_atoi(s);
212 if(d->fmt==FMT_DJVU) {
213 de_declare_fmt(c, "DjVu");
214 pos = 4;
216 else {
217 pos = 0;
220 ictx->userdata = (void*)d;
221 ictx->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_ASCII);
222 ictx->handle_chunk_fn = my_iff_chunk_handler;
223 ictx->on_std_container_start_fn = my_std_container_start_fn;
224 ictx->f = c->infile;
226 fmtutil_read_iff_format(c, ictx, pos, c->infile->len - pos);
228 de_free(c, ictx);
229 de_free(c, d);
232 static int de_identify_iff(deark *c)
234 int confidence = 0;
235 int fmt;
237 fmt = identify_internal(c, &confidence);
238 if(fmt!=0) {
239 return confidence;
241 // TODO: LIST, CAT formats?
242 return 0;
245 static void de_help_iff(deark *c)
247 de_msg(c, "-opt iff:align=<n> : Assume chunks are padded to an n-byte boundary");
249 void de_module_iff(deark *c, struct deark_module_info *mi)
251 mi->id = "iff";
252 mi->desc = "IFF (Interchange File Format)";
253 mi->run_fn = de_run_iff;
254 mi->identify_fn = de_identify_iff;
255 mi->help_fn = de_help_iff;
258 ///// MIDI /////
259 // MIDI is not IFF, but it's close enough.
261 static void do_midi_MThd(deark *c, struct de_iffctx *ictx,
262 const struct de_iffchunkctx *chunkctx)
264 i64 format_field, ntrks_field, division_field;
266 if(chunkctx->dlen<6) return;
267 format_field = dbuf_getu16be(ictx->f, chunkctx->dpos);
268 de_dbg(c, "format: %d", (int)format_field);
269 ntrks_field = dbuf_getu16be(ictx->f, chunkctx->dpos+2);
270 de_dbg(c, "ntrks: %d", (int)ntrks_field);
271 division_field = dbuf_getu16be(ictx->f, chunkctx->dpos+4);
272 de_dbg(c, "division: %d", (int)division_field);
275 static int my_midi_chunk_handler(deark *c, struct de_iffctx *ictx)
277 switch(ictx->chunkctx->chunk4cc.id) {
278 case CODE_MThd:
279 do_midi_MThd(c, ictx, ictx->chunkctx);
280 break;
282 ictx->handled = 1;
283 return 1;
286 static void de_run_midi(deark *c, de_module_params *mparams)
288 lctx *d = NULL;
289 struct de_iffctx *ictx = NULL;
291 d = de_malloc(c, sizeof(lctx));
293 ictx = de_malloc(c, sizeof(struct de_iffctx));
294 ictx->alignment = 1;
295 ictx->userdata = (void*)d;
296 ictx->handle_chunk_fn = my_midi_chunk_handler;
297 ictx->f = c->infile;
299 fmtutil_read_iff_format(c, ictx, 0, c->infile->len);
301 de_free(c, ictx);
302 de_free(c, d);
305 static int de_identify_midi(deark *c)
307 if(!dbuf_memcmp(c->infile, 0, "MThd", 4)) {
308 return 100;
310 return 0;
313 void de_module_midi(deark *c, struct deark_module_info *mi)
315 mi->id = "midi";
316 mi->desc = "MIDI audio";
317 mi->run_fn = de_run_midi;
318 mi->identify_fn = de_identify_midi;