1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
6 // Windows animated cursor format
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_riff
);
13 #define CODE_ACON 0x41434f4eU
14 #define CODE_AVI 0x41564920U
15 #define CODE_CDRX 0x43445258U
16 #define CODE_CMX1 0x434d5831U
17 #define CODE_INFO 0x494e464fU
18 #define CODE_PAL 0x50414c20U
19 #define CODE_RMID 0x524d4944U
20 #define CODE_WAVE 0x57415645U
21 #define CODE_WEBP 0x57454250U
22 #define CODE_auds 0x61756473U
23 #define CODE_bmpt 0x626d7074U
24 #define CODE_cmov 0x636d6f76U
25 #define CODE_cmpr 0x636d7072U
26 #define CODE_movi 0x6d6f7669U
27 #define CODE_vids 0x76696473U
29 #define CHUNK_DISP 0x44495350U
30 #define CHUNK_EXIF 0x45584946U
31 #define CHUNK_IART 0x49415254U
32 #define CHUNK_ICOP 0x49434f50U
33 #define CHUNK_ICCP 0x49434350U
34 #define CHUNK_ICMT 0x49434d54U
35 #define CHUNK_IKEY 0x494b4559U
36 #define CHUNK_ISBJ 0x4953424aU
37 #define CHUNK_JUNK 0x4a554e4bU
38 #define CHUNK_LIST 0x4c495354U
39 #define CHUNK_RIFF 0x52494646U
40 #define CHUNK_RIFX 0x52494658U
41 #define CHUNK_XMP 0x584d5020U
42 #define CHUNK__PMX 0x5f504d58U
43 #define CHUNK_avih 0x61766968U
44 #define CHUNK_bmp 0x626d7020U
45 #define CHUNK_data 0x64617461U
46 #define CHUNK_fact 0x66616374U
47 #define CHUNK_fmt 0x666d7420U
48 #define CHUNK_icon 0x69636f6eU
49 #define CHUNK_strf 0x73747266U
50 #define CHUNK_strh 0x73747268U
52 typedef struct localctx_struct
{
53 UI top_level_chunk_count
;
55 u32 curr_avi_stream_type
;
62 static void do_extract_raw(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
, const char *ext
,
63 unsigned int createflags
)
65 dbuf_create_file_from_slice(ictx
->f
, pos
, len
, ext
, NULL
, createflags
);
68 static void do_INFO_item(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
, u32 chunk_id
)
70 de_ucstring
*s
= NULL
;
72 s
= ucstring_create(c
);
74 // TODO: Decode the chunk_id (e.g. ICRD = Creation date).
76 // TODO: Support the CSET chunk
77 dbuf_read_to_ucstring_n(ictx
->f
, pos
, len
, DE_DBG_MAX_STRLEN
, s
,
78 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_LATIN1
);
79 de_dbg(c
, "value: \"%s\"", ucstring_getpsz(s
));
84 static void extract_ani_frame(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
89 de_dbg(c
, "frame at %d, len=%d", (int)pos
, (int)len
);
93 // Try to identify the format of this frame.
94 if(!de_memcmp(buf
, "\x00\x00\x01\x00", 4)) {
97 else if(!de_memcmp(buf
, "\x00\x00\x02\x00", 4)) {
104 dbuf_create_file_from_slice(ictx
->f
, pos
, len
, ext
, NULL
, 0);
107 static const char *get_wav_fmt_name(unsigned int n
)
109 const char *name
= NULL
;
111 case 0x0001: name
="PCM"; break;
112 case 0x0002: name
="ADPCM"; break;
113 case 0x0050: name
="MPEG"; break;
114 case 0x0055: name
="MPEGLAYER3"; break;
115 case 0xFFFE: name
="EXTENSIBLE"; break;
116 // TODO: There are lots more formats.
119 return name
?name
:"?";
122 static void decode_WAVEFORMATEX(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos1
, i64 len
)
124 unsigned int formattag
;
128 if(!ictx
->is_le
) goto done
;
129 if(len
<14) goto done
;
131 formattag
= (unsigned int)dbuf_getu16le_p(ictx
->f
, &pos
);
132 de_dbg(c
, "FormatTag: 0x%04x (%s)", formattag
, get_wav_fmt_name(formattag
));
133 n
= dbuf_getu16le_p(ictx
->f
, &pos
);
134 de_dbg(c
, "Channels: %u", (unsigned int)n
);
135 n
= dbuf_getu32le_p(ictx
->f
, &pos
);
136 de_dbg(c
, "SamplesPerSec: %u", (unsigned int)n
);
137 n
= dbuf_getu32le_p(ictx
->f
, &pos
);
138 de_dbg(c
, "AvgBytesPerSec: %u", (unsigned int)n
);
139 n
= dbuf_getu16le_p(ictx
->f
, &pos
);
140 de_dbg(c
, "BlockAlign: %u", (unsigned int)n
);
141 if(len
<16) goto done
;
142 n
= dbuf_getu16le_p(ictx
->f
, &pos
);
143 de_dbg(c
, "BitsPerSample: %u", (unsigned int)n
);
144 if(len
<18) goto done
;
145 n
= dbuf_getu16le_p(ictx
->f
, &pos
);
146 de_dbg(c
, "cbSize: %u", (unsigned int)n
);
152 static void do_wav_fmt(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
154 decode_WAVEFORMATEX(c
, d
, ictx
, pos
, len
);
157 static void do_wav_fact(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
161 if(!ictx
->is_le
) return;
163 n
= de_getu32le(pos
);
164 de_dbg(c
, "number of samples: %u", (unsigned int)n
);
167 static void do_avi_avih(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
172 n
= de_getu32le(pos
);
173 de_dbg(c
, "microseconds/frame: %u", (unsigned int)n
);
174 n
= de_getu32le(pos
+12);
175 de_dbg(c
, "flags: 0x%08x", (unsigned int)n
);
176 n
= de_getu32le(pos
+16);
177 de_dbg(c
, "number of frames: %u", (unsigned int)n
);
178 n
= de_getu32le(pos
+24);
179 de_dbg(c
, "number of streams: %u", (unsigned int)n
);
180 n
= de_getu32le(pos
+32);
181 n2
= de_getu32le(pos
+36);
182 de_dbg_dimensions(c
, n
, n2
);
183 // TODO: There are more fields in this chunk.
186 static void do_avi_strh(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
188 struct de_fourcc type4cc
;
189 struct de_fourcc codec4cc
;
193 dbuf_read_fourcc(ictx
->f
, pos
, &type4cc
, 4, 0x0);
194 de_dbg(c
, "stream type: '%s'", type4cc
.id_dbgstr
);
195 // Hack. TODO: Need a better way to track state.
196 d
->curr_avi_stream_type
= type4cc
.id
;
198 dbuf_read_fourcc(ictx
->f
, pos
+4, &codec4cc
, 4, 0x0);
199 de_dbg(c
, "codec: '%s'", codec4cc
.id_dbgstr
);
201 // TODO: There are more fields here.
204 static void do_avi_strf(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
206 if(d
->curr_avi_stream_type
==CODE_vids
) {
207 struct de_bmpinfo bi
;
208 // For video streams, this is a BITMAPINFO.
209 fmtutil_get_bmpinfo(c
, ictx
->f
, &bi
, pos
, len
, DE_BMPINFO_CMPR_IS_4CC
);
210 // This chunk contains just a bitmap header, so we can't extract a bitmap.
212 else if(d
->curr_avi_stream_type
==CODE_auds
) {
213 // For audio streams, this is a WAVEFORMATEX.
214 decode_WAVEFORMATEX(c
, d
, ictx
, pos
, len
);
218 static void do_cdr_bmp(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
221 // The first 2 bytes are an index, or something. BMP starts at offset 2.
222 dbuf_create_file_from_slice(ictx
->f
, pos
+2, len
-2, "bmp", NULL
, 0);
225 static void do_palette(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
234 if(!ictx
->is_le
) return;
235 ver
= de_getu16le(pos
);
236 de_dbg(c
, "version: 0x%04x", (unsigned int)ver
);
238 n
= de_getu16le(pos
);
239 de_dbg(c
, "number of entries: %d", (int)n
);
241 if(n
>(len
-4)/4) n
=(len
-4)/4;
245 de_dbg(c
, "palette entries at %d", (int)pos
);
249 g
= de_getbyte(pos
+1);
250 b
= de_getbyte(pos
+2);
251 flags
= de_getbyte(pos
+3);
253 clr
= DE_MAKE_RGB(r
, g
, b
);
254 de_snprintf(tmps
, sizeof(tmps
), " flags=0x%02x", (unsigned int)flags
);
255 de_dbg_pal_entry2(c
, i
, clr
, NULL
, NULL
, tmps
);
257 de_dbg_indent(c
, -1);
260 static void do_DISP_DIB(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
262 de_module_params
*mparams
= NULL
;
266 mparams
= de_malloc(c
, sizeof(de_module_params
));
267 mparams
->in_params
.codes
= "X"; // "auxiliary"
268 mparams
->in_params
.flags
= 0x80; // ".preview.bmp"
269 de_run_module_by_id_on_slice(c
, "dib", mparams
, ictx
->f
, pos
, len
);
273 static void do_DISP_TEXT(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len1
)
279 if(dbuf_search_byte(ictx
->f
, 0x00, pos
, len1
, &foundpos
)) {
280 len
= foundpos
- pos
;
282 de_dbg(c
, "text length: %d", (int)len
);
285 do_extract_raw(c
, d
, ictx
, pos
, len
, "disp.txt", DE_CREATEFLAG_IS_AUX
);
288 static void do_ICCP(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
290 dbuf_create_file_from_slice(ictx
->f
, pos
, len
, "icc", NULL
, DE_CREATEFLAG_IS_AUX
);
293 static void do_EXIF(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
295 fmtutil_handle_exif(c
, pos
, len
);
298 static void do_XMP(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
300 dbuf_create_file_from_slice(ictx
->f
, pos
, len
, "xmp", NULL
, DE_CREATEFLAG_IS_AUX
);
303 static void do_DISP(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64 len
)
308 if(!ictx
->is_le
) return;
310 ty
= (unsigned int)de_getu32le(pos
);
311 de_dbg(c
, "data type: %u (%s)", ty
,
312 fmtutil_get_windows_cb_data_type_name(ty
));
319 do_DISP_TEXT(c
, d
, ictx
, dpos
, dlen
);
323 do_DISP_DIB(c
, d
, ictx
, dpos
, dlen
);
328 static int is_fourcc_at(deark
*c
, struct de_iffctx
*ictx
, i64 pos
)
333 dbuf_read(ictx
->f
, b
, pos
, 4);
335 if(b
[i
]<32 || b
[i
]>126) return 0;
340 static int do_cmx_parse_hack(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64
*plen
)
344 // Some CMX chunks seem to be followed by a non-RIFF segment starting with either
345 // 04 00 (4 bytes) or 10 00 (16 bytes). I'm just guessing how to parse them.
346 n
= dbuf_getu16le(ictx
->f
, pos
);
347 if(n
>256 || n
==0) return 0;
349 n_padded
= de_pad_to_2(n
);
350 if(is_fourcc_at(c
, ictx
, pos
+ n_padded
)) {
351 de_dbg(c
, "[%d non-RIFF bytes at %"I64_FMT
"]", (int)n_padded
, pos
);
358 // CMV files seem to consist of two RIFF chunks, separated by four 0x00 bytes.
359 // (Maybe some sort of scan-for-the-next-RIFF-chunk logic should happen by
360 // default, but it's hard to be sure we won't break something.)
361 static int do_cmv_parse_hack(deark
*c
, lctx
*d
, struct de_iffctx
*ictx
, i64 pos
, i64
*plen
)
363 if(ictx
->level
!=0) return 0;
364 if(dbuf_getu32be(ictx
->f
, pos
)!=0) return 0;
365 if(dbuf_getu32be(ictx
->f
, pos
+4)!=CHUNK_RIFF
) return 0;
366 de_dbg(c
, "[%d non-RIFF bytes at %"I64_FMT
"]", 4, pos
);
371 static int my_handle_nonchunk_riff_data_fn(deark
*c
, struct de_iffctx
*ictx
,
374 lctx
*d
= (lctx
*)ictx
->userdata
;
376 if(d
->cmx_parse_hack
) {
377 return do_cmx_parse_hack(c
, d
, ictx
, pos
, plen
);
379 else if(d
->cmv_parse_hack
) {
380 return do_cmv_parse_hack(c
, d
, ictx
, pos
, plen
);
385 static int my_on_std_container_start_fn(deark
*c
, struct de_iffctx
*ictx
)
387 lctx
*d
= (lctx
*)ictx
->userdata
;
388 u32 chunktype
= ictx
->curr_container_fmt4cc
.id
;
389 u32 formtype
= ictx
->curr_container_contentstype4cc
.id
;
390 int suppress_decoding
= 0;
392 if(ictx
->level
==0 && (chunktype
==CHUNK_RIFF
|| chunktype
==CHUNK_RIFX
) &&
393 d
->top_level_chunk_count
==0)
395 const char *fmtname
= NULL
;
398 case CODE_ACON
: fmtname
= "Windows animated cursor"; break;
399 case CODE_AVI
: fmtname
= "AVI"; break;
400 case CODE_CDRX
: fmtname
= "Corel CCX"; break;
402 fmtname
= "Corel CMX";
403 ictx
->handle_nonchunk_data_fn
= my_handle_nonchunk_riff_data_fn
;
404 d
->cmx_parse_hack
= 1;
407 fmtname
= "CorelMOVE";
408 ictx
->handle_nonchunk_data_fn
= my_handle_nonchunk_riff_data_fn
;
409 d
->cmv_parse_hack
= 1;
411 case CODE_WAVE
: fmtname
= "WAVE"; break;
412 case CODE_WEBP
: fmtname
= "WebP"; break;
415 // Special check for CorelDraw formats.
416 if(!fmtname
&& (formtype
>>8 == 0x434452U
) /* "CDR" */) {
418 fmtname
= "CorelDRAW (RIFF-based)";
422 de_declare_fmt(c
, fmtname
);
426 if(d
->is_cdr
&& chunktype
==CHUNK_LIST
) {
427 // 'cmpr' LISTs in CorelDraw files are not correctly formed.
428 // Tell the parser not to process them.
429 if(formtype
==CODE_cmpr
) {
430 de_dbg(c
, "[not decoding CDR cmpr list]");
431 suppress_decoding
= 1;
436 if(ictx
->main_contentstype4cc
.id
==CODE_AVI
&& chunktype
==CHUNK_LIST
&&
439 // There are often a huge number of these chunks, and we can't do
440 // anything interesting with them, so skip them by default.
441 if(c
->debug_level
<2) {
442 de_dbg(c
, "[not decoding movi chunks]");
443 suppress_decoding
= 1;
448 // Keep track of when we are inside a 'movi' container.
450 d
->in_movi_level
= ictx
->level
;
456 d
->top_level_chunk_count
++;
458 return !suppress_decoding
;
461 static int my_on_container_end_fn(deark
*c
, struct de_iffctx
*ictx
)
463 lctx
*d
= (lctx
*)ictx
->userdata
;
465 if(ictx
->curr_container_contentstype4cc
.id
==CODE_movi
&&
466 d
->in_movi
&& ictx
->level
==d
->in_movi_level
)
474 static int my_preprocess_riff_chunk_fn(deark
*c
, struct de_iffctx
*ictx
)
476 const char *name
= NULL
;
478 // TODO: Need a better way to do this.
479 switch(ictx
->chunkctx
->chunk4cc
.id
) {
480 case CHUNK_DISP
: name
="display"; break;
481 case CHUNK_IART
: name
="artist"; break;
482 case CHUNK_ICOP
: name
="copyright"; break;
483 case CHUNK_ICMT
: name
="comments"; break;
484 case CHUNK_IKEY
: name
="keywords"; break;
485 case CHUNK_ISBJ
: name
="subject"; break;
486 case CHUNK_JUNK
: name
="filler"; break;
487 case CHUNK_LIST
: name
="subchunk container"; break;
491 ictx
->chunkctx
->chunk_name
= name
;
496 static int my_riff_chunk_handler(deark
*c
, struct de_iffctx
*ictx
)
500 lctx
*d
= (lctx
*)ictx
->userdata
;
502 // We should always set this flag for formats (like RIFF) that aren't standard IFF.
505 list_type
= ictx
->curr_container_contentstype4cc
.id
;
506 dpos
= ictx
->chunkctx
->dpos
;
507 dlen
= ictx
->chunkctx
->dlen
;
509 switch(ictx
->chunkctx
->chunk4cc
.id
) {
513 ictx
->is_std_container
= 1;
517 if(list_type
==CODE_INFO
) {
518 do_INFO_item(c
, d
, ictx
, dpos
, dlen
, ictx
->chunkctx
->chunk4cc
.id
);
522 switch(ictx
->chunkctx
->chunk4cc
.id
) {
525 do_DISP(c
, d
, ictx
, dpos
, dlen
);
531 case CHUNK_ICCP
: // Used by WebP
532 do_ICCP(c
, d
, ictx
, dpos
, dlen
);
535 case CHUNK_EXIF
: // Used by WebP
536 do_EXIF(c
, d
, ictx
, dpos
, dlen
);
539 case CHUNK_XMP
: // Used by WebP
540 case CHUNK__PMX
: // Used by WAVE, AVI
541 do_XMP(c
, d
, ictx
, dpos
, dlen
);
545 if(ictx
->main_contentstype4cc
.id
==CODE_ACON
) {
546 extract_ani_frame(c
, d
, ictx
, dpos
, dlen
);
551 if(list_type
==CODE_RMID
) {
552 do_extract_raw(c
, d
, ictx
, dpos
, dlen
, "mid", 0);
554 else if(list_type
==CODE_PAL
) {
555 do_palette(c
, d
, ictx
, dpos
, dlen
);
560 if(ictx
->main_contentstype4cc
.id
==CODE_WAVE
) {
561 do_wav_fmt(c
, d
, ictx
, dpos
, dlen
);
566 if(ictx
->main_contentstype4cc
.id
==CODE_WAVE
) {
567 do_wav_fact(c
, d
, ictx
, dpos
, dlen
);
572 if(ictx
->main_contentstype4cc
.id
==CODE_AVI
) {
573 do_avi_avih(c
, d
, ictx
, dpos
, dlen
);
578 if(ictx
->main_contentstype4cc
.id
==CODE_AVI
) {
579 do_avi_strh(c
, d
, ictx
, dpos
, dlen
);
584 if(ictx
->main_contentstype4cc
.id
==CODE_AVI
) {
585 do_avi_strf(c
, d
, ictx
, dpos
, dlen
);
590 if(d
->is_cdr
&& ictx
->curr_container_contentstype4cc
.id
==CODE_bmpt
) {
591 do_cdr_bmp(c
, d
, ictx
, dpos
, dlen
);
596 if(c
->debug_level
>=2 &&
597 ictx
->main_contentstype4cc
.id
==CODE_AVI
&& !d
->in_movi
)
599 de_dbg_hexdump(c
, ictx
->f
, dpos
, dlen
, 256, NULL
, 0x1);
607 static void de_run_riff(deark
*c
, de_module_params
*mparams
)
610 struct de_iffctx
*ictx
= NULL
;
613 d
= de_malloc(c
, sizeof(lctx
));
614 ictx
= de_malloc(c
, sizeof(struct de_iffctx
));
616 ictx
->userdata
= (void*)d
;
617 ictx
->preprocess_chunk_fn
= my_preprocess_riff_chunk_fn
;
618 ictx
->handle_chunk_fn
= my_riff_chunk_handler
;
619 ictx
->on_std_container_start_fn
= my_on_std_container_start_fn
;
620 ictx
->on_container_end_fn
= my_on_container_end_fn
;
625 if(!de_memcmp(buf
, "RIFF", 4)) {
627 ictx
->reversed_4cc
= 0;
629 else if(!de_memcmp(buf
, "RIFX", 4)) {
631 ictx
->reversed_4cc
= 0;
633 else if(!de_memcmp(buf
, "XFIR", 4)) {
635 ictx
->reversed_4cc
= 1;
638 de_warn(c
, "This is probably not a RIFF file.");
640 ictx
->reversed_4cc
= 0;
643 fmtutil_read_iff_format(c
, ictx
, 0, ictx
->f
->len
);
649 static int de_identify_riff(deark
*c
)
656 has_sig
= (!de_memcmp(buf
, "RIFF", 4)) ||
657 (!de_memcmp(buf
, "XFIR", 4)) ||
658 (!de_memcmp(buf
, "RIFX", 4));
659 if(!has_sig
) return 0;
661 dlen
= de_getu32le(4);
662 // This check screens out .AMV format, for example.
663 if(dlen
==0 && c
->infile
->len
!=8) return 0;
668 void de_module_riff(deark
*c
, struct deark_module_info
*mi
)
671 mi
->desc
= "RIFF-based formats";
672 mi
->run_fn
= de_run_riff
;
673 mi
->identify_fn
= de_identify_riff
;