1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
6 // Windows animated cursor format
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_riff
);
13 #define CODE_ACON 0x41434f4eU
14 #define CODE_AVI 0x41564920U
15 #define CODE_CDRX 0x43445258U
16 #define CODE_CMX1 0x434d5831U
17 #define CODE_INFO 0x494e464fU
18 #define CODE_PAL 0x50414c20U
19 #define CODE_RMID 0x524d4944U
20 #define CODE_WAVE 0x57415645U
21 #define CODE_WEBP 0x57454250U
22 #define CODE_auds 0x61756473U
23 #define CODE_bmpt 0x626d7074U
24 #define CODE_cmpr 0x636d7072U
25 #define CODE_movi 0x6d6f7669U
26 #define CODE_vids 0x76696473U
28 #define CHUNK_DISP 0x44495350U
29 #define CHUNK_EXIF 0x45584946U
30 #define CHUNK_IART 0x49415254U
31 #define CHUNK_ICOP 0x49434f50U
32 #define CHUNK_ICCP 0x49434350U
33 #define CHUNK_ICMT 0x49434d54U
34 #define CHUNK_IKEY 0x494b4559U
35 #define CHUNK_ISBJ 0x4953424aU
36 #define CHUNK_JUNK 0x4a554e4bU
37 #define CHUNK_LIST 0x4c495354U
38 #define CHUNK_RIFF 0x52494646U
39 #define CHUNK_RIFX 0x52494658U
40 #define CHUNK_XMP 0x584d5020U
41 #define CHUNK__PMX 0x5f504d58U
42 #define CHUNK_avih 0x61766968U
43 #define CHUNK_bmp 0x626d7020U
44 #define CHUNK_data 0x64617461U
45 #define CHUNK_fact 0x66616374U
46 #define CHUNK_fmt 0x666d7420U
47 #define CHUNK_icon 0x69636f6eU
48 #define CHUNK_strf 0x73747266U
49 #define CHUNK_strh 0x73747268U
51 typedef struct localctx_struct
{
53 u32 curr_avi_stream_type
;
59 static void do_extract_raw(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
, const char *ext
,
60 unsigned int createflags
)
62 dbuf_create_file_from_slice(ictx
->f
, pos
, len
, ext
, NULL
, createflags
);
65 static void do_INFO_item(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
, u32 chunk_id
)
67 de_ucstring
*s
= NULL
;
69 s
= ucstring_create(c
);
71 // TODO: Decode the chunk_id (e.g. ICRD = Creation date).
73 // TODO: Support the CSET chunk
74 dbuf_read_to_ucstring_n(ictx
->f
, pos
, len
, DE_DBG_MAX_STRLEN
, s
,
75 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_LATIN1
);
76 de_dbg(c
, "value: \"%s\"", ucstring_getpsz(s
));
81 static void extract_ani_frame(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
86 de_dbg(c
, "frame at %d, len=%d", (int)pos
, (int)len
);
90 // Try to identify the format of this frame.
91 if(!de_memcmp(buf
, "\x00\x00\x01\x00", 4)) {
94 else if(!de_memcmp(buf
, "\x00\x00\x02\x00", 4)) {
101 dbuf_create_file_from_slice(ictx
->f
, pos
, len
, ext
, NULL
, 0);
104 static const char *get_wav_fmt_name(unsigned int n
)
106 const char *name
= NULL
;
108 case 0x0001: name
="PCM"; break;
109 case 0x0002: name
="ADPCM"; break;
110 case 0x0050: name
="MPEG"; break;
111 case 0x0055: name
="MPEGLAYER3"; break;
112 case 0xFFFE: name
="EXTENSIBLE"; break;
113 // TODO: There are lots more formats.
116 return name
?name
:"?";
119 static void decode_WAVEFORMATEX(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos1
, i64 len
)
121 unsigned int formattag
;
125 if(!ictx
->is_le
) goto done
;
126 if(len
<14) goto done
;
128 formattag
= (unsigned int)dbuf_getu16le_p(ictx
->f
, &pos
);
129 de_dbg(c
, "FormatTag: 0x%04x (%s)", formattag
, get_wav_fmt_name(formattag
));
130 n
= dbuf_getu16le_p(ictx
->f
, &pos
);
131 de_dbg(c
, "Channels: %u", (unsigned int)n
);
132 n
= dbuf_getu32le_p(ictx
->f
, &pos
);
133 de_dbg(c
, "SamplesPerSec: %u", (unsigned int)n
);
134 n
= dbuf_getu32le_p(ictx
->f
, &pos
);
135 de_dbg(c
, "AvgBytesPerSec: %u", (unsigned int)n
);
136 n
= dbuf_getu16le_p(ictx
->f
, &pos
);
137 de_dbg(c
, "BlockAlign: %u", (unsigned int)n
);
138 if(len
<16) goto done
;
139 n
= dbuf_getu16le_p(ictx
->f
, &pos
);
140 de_dbg(c
, "BitsPerSample: %u", (unsigned int)n
);
141 if(len
<18) goto done
;
142 n
= dbuf_getu16le_p(ictx
->f
, &pos
);
143 de_dbg(c
, "cbSize: %u", (unsigned int)n
);
149 static void do_wav_fmt(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
151 decode_WAVEFORMATEX(c
, d
, ictx
, pos
, len
);
154 static void do_wav_fact(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
158 if(!ictx
->is_le
) return;
160 n
= de_getu32le(pos
);
161 de_dbg(c
, "number of samples: %u", (unsigned int)n
);
164 static void do_avi_avih(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
169 n
= de_getu32le(pos
);
170 de_dbg(c
, "microseconds/frame: %u", (unsigned int)n
);
171 n
= de_getu32le(pos
+12);
172 de_dbg(c
, "flags: 0x%08x", (unsigned int)n
);
173 n
= de_getu32le(pos
+16);
174 de_dbg(c
, "number of frames: %u", (unsigned int)n
);
175 n
= de_getu32le(pos
+24);
176 de_dbg(c
, "number of streams: %u", (unsigned int)n
);
177 n
= de_getu32le(pos
+32);
178 n2
= de_getu32le(pos
+36);
179 de_dbg_dimensions(c
, n
, n2
);
180 // TODO: There are more fields in this chunk.
183 static void do_avi_strh(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
185 struct de_fourcc type4cc
;
186 struct de_fourcc codec4cc
;
190 dbuf_read_fourcc(ictx
->f
, pos
, &type4cc
, 4, 0x0);
191 de_dbg(c
, "stream type: '%s'", type4cc
.id_dbgstr
);
192 // Hack. TODO: Need a better way to track state.
193 d
->curr_avi_stream_type
= type4cc
.id
;
195 dbuf_read_fourcc(ictx
->f
, pos
+4, &codec4cc
, 4, 0x0);
196 de_dbg(c
, "codec: '%s'", codec4cc
.id_dbgstr
);
198 // TODO: There are more fields here.
201 static void do_avi_strf(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
203 if(d
->curr_avi_stream_type
==CODE_vids
) {
204 struct de_bmpinfo bi
;
205 // For video streams, this is a BITMAPINFO.
206 fmtutil_get_bmpinfo(c
, ictx
->f
, &bi
, pos
, len
, DE_BMPINFO_CMPR_IS_4CC
);
207 // This chunk contains just a bitmap header, so we can't extract a bitmap.
209 else if(d
->curr_avi_stream_type
==CODE_auds
) {
210 // For audio streams, this is a WAVEFORMATEX.
211 decode_WAVEFORMATEX(c
, d
, ictx
, pos
, len
);
215 static void do_cdr_bmp(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
218 // The first 2 bytes are an index, or something. BMP starts at offset 2.
219 dbuf_create_file_from_slice(ictx
->f
, pos
+2, len
-2, "bmp", NULL
, 0);
222 static void do_palette(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
231 if(!ictx
->is_le
) return;
232 ver
= de_getu16le(pos
);
233 de_dbg(c
, "version: 0x%04x", (unsigned int)ver
);
235 n
= de_getu16le(pos
);
236 de_dbg(c
, "number of entries: %d", (int)n
);
238 if(n
>(len
-4)/4) n
=(len
-4)/4;
242 de_dbg(c
, "palette entries at %d", (int)pos
);
246 g
= de_getbyte(pos
+1);
247 b
= de_getbyte(pos
+2);
248 flags
= de_getbyte(pos
+3);
250 clr
= DE_MAKE_RGB(r
, g
, b
);
251 de_snprintf(tmps
, sizeof(tmps
), " flags=0x%02x", (unsigned int)flags
);
252 de_dbg_pal_entry2(c
, i
, clr
, NULL
, NULL
, tmps
);
254 de_dbg_indent(c
, -1);
257 static void do_DISP_DIB(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
260 // "X" = Tell the dib module to mark the output file as "auxiliary".
261 de_run_module_by_id_on_slice2(c
, "dib", "X", ictx
->f
, pos
, len
);
264 static void do_DISP_TEXT(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len1
)
270 if(dbuf_search_byte(ictx
->f
, 0x00, pos
, len1
, &foundpos
)) {
271 len
= foundpos
- pos
;
273 de_dbg(c
, "text length: %d", (int)len
);
276 do_extract_raw(c
, d
, ictx
, pos
, len
, "disp.txt", DE_CREATEFLAG_IS_AUX
);
279 static void do_ICCP(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
281 dbuf_create_file_from_slice(ictx
->f
, pos
, len
, "icc", NULL
, DE_CREATEFLAG_IS_AUX
);
284 static void do_EXIF(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
286 fmtutil_handle_exif(c
, pos
, len
);
289 static void do_XMP(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
291 dbuf_create_file_from_slice(ictx
->f
, pos
, len
, "xmp", NULL
, DE_CREATEFLAG_IS_AUX
);
294 static void do_DISP(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
299 if(!ictx
->is_le
) return;
301 ty
= (unsigned int)de_getu32le(pos
);
302 de_dbg(c
, "data type: %u (%s)", ty
,
303 fmtutil_get_windows_cb_data_type_name(ty
));
310 do_DISP_TEXT(c
, d
, ictx
, dpos
, dlen
);
314 do_DISP_DIB(c
, d
, ictx
, dpos
, dlen
);
319 static int is_fourcc_at(deark
*c
, struct de_iffctx
*ictx
, i64 pos
)
324 dbuf_read(ictx
->f
, b
, pos
, 4);
326 if(b
[i
]<32 || b
[i
]>126) return 0;
331 static int do_cmx_parse_hack(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64
*plen
)
335 // Some CMX chunks seem to be followed by a non-RIFF segment starting with either
336 // 04 00 (4 bytes) or 10 00 (16 bytes). I'm just guessing how to parse them.
337 n
= dbuf_getu16le(ictx
->f
, pos
);
338 if(n
>256 || n
==0) return 0;
340 n_padded
= de_pad_to_2(n
);
341 if(is_fourcc_at(c
, ictx
, pos
+ n_padded
)) {
342 de_dbg(c
, "[%d non-RIFF bytes at %"I64_FMT
"]", (int)n_padded
, pos
);
349 static int my_handle_nonchunk_riff_data_fn(deark
*c
, struct de_iffctx
*ictx
,
352 lctx
*d
= (lctx
*)ictx
->userdata
;
354 if(d
->cmx_parse_hack
) {
355 return do_cmx_parse_hack(c
, d
, ictx
, pos
, plen
);
360 static int my_on_std_container_start_fn(deark
*c
, struct de_iffctx
*ictx
)
362 lctx
*d
= (lctx
*)ictx
->userdata
;
365 const char *fmtname
= NULL
;
367 switch(ictx
->main_contentstype4cc
.id
) {
368 case CODE_ACON
: fmtname
= "Windows animated cursor"; break;
369 case CODE_AVI
: fmtname
= "AVI"; break;
370 case CODE_CDRX
: fmtname
= "Corel CCX"; break;
372 fmtname
= "Corel CMX";
373 ictx
->handle_nonchunk_data_fn
= my_handle_nonchunk_riff_data_fn
;
374 d
->cmx_parse_hack
= 1;
376 case CODE_WAVE
: fmtname
= "WAVE"; break;
377 case CODE_WEBP
: fmtname
= "WebP"; break;
380 // Special check for CorelDraw formats.
381 if(!fmtname
&& !de_memcmp(ictx
->main_contentstype4cc
.bytes
, (const void*)"CDR", 3)) {
383 fmtname
= "CorelDRAW (RIFF-based)";
387 de_declare_fmt(c
, fmtname
);
391 if(d
->is_cdr
&& ictx
->curr_container_fmt4cc
.id
==CHUNK_LIST
) {
392 // 'cmpr' LISTs in CorelDraw files are not correctly formed.
393 // Tell the parser not to process them.
394 if(ictx
->curr_container_contentstype4cc
.id
==CODE_cmpr
) {
395 de_dbg(c
, "[not decoding CDR cmpr list]");
400 if(ictx
->main_contentstype4cc
.id
==CODE_AVI
&&
401 ictx
->curr_container_contentstype4cc
.id
==CODE_movi
&&
404 // There are often a huge number of these chunks, and we can't do
405 // anything interesting with them, so skip them by default.
406 de_dbg(c
, "[not decoding movi chunks]");
410 if(ictx
->main_contentstype4cc
.id
==CODE_AVI
&&
411 ictx
->curr_container_contentstype4cc
.id
==CODE_movi
&& !d
->in_movi
)
413 // Keep track of when we are inside a 'movi' container.
415 d
->in_movi_level
= ictx
->level
;
421 static int my_on_container_end_fn(deark
*c
, struct de_iffctx
*ictx
)
423 lctx
*d
= (lctx
*)ictx
->userdata
;
425 if(ictx
->curr_container_contentstype4cc
.id
==CODE_movi
&&
426 d
->in_movi
&& ictx
->level
==d
->in_movi_level
)
434 static int my_preprocess_riff_chunk_fn(deark
*c
, struct de_iffctx
*ictx
)
436 const char *name
= NULL
;
438 // TODO: Need a better way to do this.
439 switch(ictx
->chunkctx
->chunk4cc
.id
) {
440 case CHUNK_DISP
: name
="display"; break;
441 case CHUNK_IART
: name
="artist"; break;
442 case CHUNK_ICOP
: name
="copyright"; break;
443 case CHUNK_ICMT
: name
="comments"; break;
444 case CHUNK_IKEY
: name
="keywords"; break;
445 case CHUNK_ISBJ
: name
="subject"; break;
446 case CHUNK_JUNK
: name
="filler"; break;
447 case CHUNK_LIST
: name
="subchunk container"; break;
451 ictx
->chunkctx
->chunk_name
= name
;
456 static int my_riff_chunk_handler(deark
*c
, struct de_iffctx
*ictx
)
460 lctx
*d
= (lctx
*)ictx
->userdata
;
462 // We should always set this flag for formats (like RIFF) that aren't standard IFF.
465 list_type
= ictx
->curr_container_contentstype4cc
.id
;
466 dpos
= ictx
->chunkctx
->dpos
;
467 dlen
= ictx
->chunkctx
->dlen
;
469 switch(ictx
->chunkctx
->chunk4cc
.id
) {
473 ictx
->is_std_container
= 1;
477 if(list_type
==CODE_INFO
) {
478 do_INFO_item(c
, d
, ictx
, dpos
, dlen
, ictx
->chunkctx
->chunk4cc
.id
);
482 switch(ictx
->chunkctx
->chunk4cc
.id
) {
485 do_DISP(c
, d
, ictx
, dpos
, dlen
);
491 case CHUNK_ICCP
: // Used by WebP
492 do_ICCP(c
, d
, ictx
, dpos
, dlen
);
495 case CHUNK_EXIF
: // Used by WebP
496 do_EXIF(c
, d
, ictx
, dpos
, dlen
);
499 case CHUNK_XMP
: // Used by WebP
500 case CHUNK__PMX
: // Used by WAVE, AVI
501 do_XMP(c
, d
, ictx
, dpos
, dlen
);
505 if(ictx
->main_contentstype4cc
.id
==CODE_ACON
) {
506 extract_ani_frame(c
, d
, ictx
, dpos
, dlen
);
511 if(list_type
==CODE_RMID
) {
512 do_extract_raw(c
, d
, ictx
, dpos
, dlen
, "mid", 0);
514 else if(list_type
==CODE_PAL
) {
515 do_palette(c
, d
, ictx
, dpos
, dlen
);
520 if(ictx
->main_contentstype4cc
.id
==CODE_WAVE
) {
521 do_wav_fmt(c
, d
, ictx
, dpos
, dlen
);
526 if(ictx
->main_contentstype4cc
.id
==CODE_WAVE
) {
527 do_wav_fact(c
, d
, ictx
, dpos
, dlen
);
532 if(ictx
->main_contentstype4cc
.id
==CODE_AVI
) {
533 do_avi_avih(c
, d
, ictx
, dpos
, dlen
);
538 if(ictx
->main_contentstype4cc
.id
==CODE_AVI
) {
539 do_avi_strh(c
, d
, ictx
, dpos
, dlen
);
544 if(ictx
->main_contentstype4cc
.id
==CODE_AVI
) {
545 do_avi_strf(c
, d
, ictx
, dpos
, dlen
);
550 if(d
->is_cdr
&& ictx
->curr_container_contentstype4cc
.id
==CODE_bmpt
) {
551 do_cdr_bmp(c
, d
, ictx
, dpos
, dlen
);
556 if(c
->debug_level
>=2 &&
557 ictx
->main_contentstype4cc
.id
==CODE_AVI
&& !d
->in_movi
)
559 de_dbg_hexdump(c
, ictx
->f
, dpos
, dlen
, 256, NULL
, 0x1);
567 static void de_run_riff(deark
*c
, de_module_params
*mparams
)
570 struct de_iffctx
*ictx
= NULL
;
573 d
= de_malloc(c
, sizeof(lctx
));
574 ictx
= de_malloc(c
, sizeof(struct de_iffctx
));
576 ictx
->userdata
= (void*)d
;
577 ictx
->preprocess_chunk_fn
= my_preprocess_riff_chunk_fn
;
578 ictx
->handle_chunk_fn
= my_riff_chunk_handler
;
579 ictx
->on_std_container_start_fn
= my_on_std_container_start_fn
;
580 ictx
->on_container_end_fn
= my_on_container_end_fn
;
585 if(!de_memcmp(buf
, "RIFF", 4)) {
587 ictx
->reversed_4cc
= 0;
589 else if(!de_memcmp(buf
, "RIFX", 4)) {
591 ictx
->reversed_4cc
= 0;
593 else if(!de_memcmp(buf
, "XFIR", 4)) {
595 ictx
->reversed_4cc
= 1;
598 de_warn(c
, "This is probably not a RIFF file.");
600 ictx
->reversed_4cc
= 0;
603 fmtutil_read_iff_format(c
, ictx
, 0, ictx
->f
->len
);
609 static int de_identify_riff(deark
*c
)
616 has_sig
= (!de_memcmp(buf
, "RIFF", 4)) ||
617 (!de_memcmp(buf
, "XFIR", 4)) ||
618 (!de_memcmp(buf
, "RIFX", 4));
619 if(!has_sig
) return 0;
621 dlen
= de_getu32le(4);
622 // This check screens out .AMV format, for example.
623 if(dlen
==0 && c
->infile
->len
!=8) return 0;
628 void de_module_riff(deark
*c
, struct deark_module_info
*mi
)
631 mi
->desc
= "RIFF-based formats";
632 mi
->run_fn
= de_run_riff
;
633 mi
->identify_fn
= de_identify_riff
;