Made Unix builds more likely to be Y2038-compliant
[deark.git] / modules / id3.c
blobdc3b242a94031dd5660184525be2f7599a5fab71
1 // This file is part of Deark.
2 // Copyright (C) 2017 Jason Summers
3 // See the file COPYING for terms of use.
5 // ID3 metadata
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_id3);
11 // **************************************************************************
12 // ID3v2
13 // **************************************************************************
15 #define CODE_APIC 0x41504943U
16 #define CODE_COM 0x434f4dU
17 #define CODE_COMM 0x434f4d4dU
18 #define CODE_GEO 0x47454fU
19 #define CODE_GEOB 0x47454f42U
20 #define CODE_PIC 0x504943U
21 #define CODE_POP 0x504f50U
22 #define CODE_POPM 0x504f504dU
23 #define CODE_PRIV 0x50524956U
24 #define CODE_TXX 0x545858U
25 #define CODE_TXXX 0x54585858U
26 #define CODE_WXX 0x575858U
27 #define CODE_WXXX 0x57585858U
29 #define ID3ENC_ISO_8859_1 0
30 #define ID3ENC_UTF16 1
31 #define ID3ENC_UTF16BE 2
32 #define ID3ENC_UTF8 3
34 typedef struct id3v2ctx_struct {
35 u8 has_id3v2;
36 u8 wmpicture_mode;
38 i64 total_len;
40 // "data" is the extended header, the frames, and the padding, in the
41 // original file.
42 i64 data_start;
43 i64 data_len;
45 int has_padding;
46 i64 approx_padding_pos;
48 // Sigh. One would think that the "major version" of ID3v2 would
49 // necessarily always be 2. One would be wrong. It depends on context.
50 // The "2" is not stored in the file, which is fine. But the spec calls the
51 // first number that *is* stored the "major version", and the second number
52 // the "revision number".
53 u8 version_code, ver_revision;
55 // If set, the data is unsynched as a single blob, 2.3.x-style.
56 u8 global_level_unsync;
58 // If set, 2.4.x-style frame-level unsynch is used for all frames.
59 u8 global_frame_level_unsync;
61 u8 has_ext_header;
62 u8 is_experimental;
63 u8 has_footer;
65 const char *approx_mark;
66 } id3v2ctx;
68 static i64 get_synchsafe_int(dbuf *f, i64 pos)
70 u8 buf[4];
71 dbuf_read(f, buf, pos, 4);
72 return (buf[0]<<21)|(buf[1]<<14)|(buf[2]<<7)|(buf[3]);
75 static const char *get_id3v2_textenc_name(id3v2ctx *d, u8 id3_encoding)
77 const char *encname;
79 switch(id3_encoding) {
80 case 0: encname = "ISO-8859-1"; break;
81 case 1:
82 if(d->version_code==2) encname = "UCS-2";
83 else if(d->version_code==3) encname = "UCS-2 w/BOM";
84 else encname = "UTF-16 w/BOM";
85 break;
86 case 2: encname = "UTF-16BE"; break;
87 case 3: encname = "UTF-8"; break;
88 default: encname = "?";
90 return encname;
93 static void id3v2_read_to_ucstring(deark *c, dbuf *f, i64 pos1, i64 len,
94 de_ucstring *s, u8 id3_encoding)
96 i64 pos = pos1;
97 const char *bomdesc = "none";
98 de_encoding encoding_to_use = DE_ENCODING_UNKNOWN;
100 if(len<=0) goto done;
102 if(id3_encoding==ID3ENC_ISO_8859_1) {
103 encoding_to_use = DE_ENCODING_LATIN1;
105 else if(id3_encoding==ID3ENC_UTF16) {
106 u32 bom_id;
108 if(len<2) goto done;
109 bom_id = (u32)dbuf_getu16be(f, pos);
111 if(bom_id==0xfeff) {
112 encoding_to_use = DE_ENCODING_UTF16BE;
113 bomdesc = "BE";
115 else if(bom_id==0xfffe) {
116 encoding_to_use = DE_ENCODING_UTF16LE;
117 bomdesc = "LE";
119 else {
120 // TODO: What should we do if there's no BOM?
121 // v2.2.x does not say anything about a BOM, but it also does not
122 // say anything about what byte order is used.
123 // v2.3.x and 2.4.x require a BOM.
124 goto done;
126 pos += 2;
128 else if(id3_encoding==ID3ENC_UTF16BE) {
129 encoding_to_use = DE_ENCODING_UTF16BE;
131 else if(id3_encoding==ID3ENC_UTF8) { // UTF-8
132 encoding_to_use = DE_ENCODING_UTF8;
134 else {
135 goto done; // Error
138 // TODO: Maybe shouldn't use DE_DBG_MAX_STRLEN here.
139 dbuf_read_to_ucstring_n(f, pos, pos1+len-pos, DE_DBG_MAX_STRLEN, s, 0, encoding_to_use);
140 ucstring_truncate_at_NUL(s);
142 done:
143 if(id3_encoding==ID3ENC_UTF16 && c->debug_level>=2) {
144 de_dbg2(c, "BOM: %s", bomdesc);
148 static int read_id3v2_terminated_string(deark *c, id3v2ctx *d, dbuf *f,
149 i64 pos, i64 nbytes_avail, i64 nbytes_to_scan, u8 id3_encoding,
150 de_ucstring *s, i64 *bytes_consumed)
152 i64 foundpos = 0;
153 i64 stringlen;
154 int ret;
155 int retval = 0;
157 if(nbytes_to_scan > nbytes_avail)
158 nbytes_to_scan = nbytes_avail;
159 if(nbytes_to_scan < 0)
160 nbytes_to_scan = 0;
162 if(id3_encoding==ID3ENC_UTF16 || id3_encoding==ID3ENC_UTF16BE) {
163 // A 2-byte encoding
164 int foundflag = 0;
166 foundflag = dbuf_get_utf16_NULterm_len(f, pos, nbytes_to_scan, bytes_consumed);
167 if(!foundflag) goto done;
168 stringlen = (*bytes_consumed)-2;
170 else {
171 // A 1-byte encoding
172 ret = dbuf_search_byte(f, 0x00, pos, nbytes_to_scan, &foundpos);
173 if(!ret) goto done;
174 stringlen = foundpos - pos;
175 *bytes_consumed = stringlen + 1;
178 id3v2_read_to_ucstring(c, f, pos, stringlen, s, id3_encoding);
180 retval = 1;
181 done:
182 return retval;
185 // Read 10-byte main ID3v2 header
186 static int do_id3v2_header(deark *c, dbuf *f, id3v2ctx *d)
188 i64 pos;
189 u8 flags;
190 int retval = 0;
191 int has_global_compression = 0;
193 pos = 0;
194 d->approx_mark = "";
196 de_dbg(c, "ID3v2 header at %d", (int)pos);
197 de_dbg_indent(c, 1);
199 // TODO: Verify signature
200 d->has_id3v2 = 1;
201 pos += 3; // ID3v2 file identifier
203 d->version_code = dbuf_getbyte(f, pos++);
204 d->ver_revision = dbuf_getbyte(f, pos++);
205 de_dbg(c, "ID3v2 version: (2.)%d.%d", (int)d->version_code, (int)d->ver_revision);
206 if(d->version_code<2 || d->version_code>4) {
207 de_warn(c, "Unsupported ID3v2 version: (2.)%d.x", (int)d->version_code);
208 goto done;
211 flags = dbuf_getbyte(f, pos++);
212 de_dbg(c, "flags: 0x%02x", (unsigned int)flags);
213 de_dbg_indent(c, 1);
215 if(d->version_code<=3) {
216 d->global_level_unsync = (flags&0x80)?1:0;
217 de_dbg(c, "global-level unsynchronisation: %d", (int)d->global_level_unsync);
219 else if(d->version_code==4) {
220 d->global_frame_level_unsync = (flags&0x80)?1:0;
221 de_dbg(c, "all frames use unsynchronisation: %d", (int)d->global_frame_level_unsync);
224 if(d->global_level_unsync) {
225 d->approx_mark = "~";
228 if(d->version_code==2) {
229 has_global_compression = (flags&0x40)?1:0;
230 de_dbg(c, "uses compression: %d", d->has_ext_header);
232 else if(d->version_code>=3) {
233 d->has_ext_header = (flags&0x40)?1:0;
234 de_dbg(c, "has extended header: %d", d->has_ext_header);
237 if(d->version_code>=3) {
238 d->is_experimental = (flags&0x20)?1:0;
239 de_dbg(c, "is experimental: %d", d->is_experimental);
242 if(d->version_code >= 4) {
243 d->has_footer = (flags&0x10)?1:0;
244 de_dbg(c, "has footer: %d", d->has_footer);
247 de_dbg_indent(c, -1);
249 d->data_len = get_synchsafe_int(f, pos);
250 de_dbg(c, "size: %d", (int)d->data_len);
251 //pos += 4;
253 d->data_start = 10;
255 d->total_len = d->data_start + d->data_len;
256 if(d->has_footer) d->total_len += 10;
258 de_dbg(c, "calculated end of ID3v2 data: %d", (int)d->total_len);
260 if(has_global_compression) {
261 de_warn(c, "ID3v2.2.x Compression not supported");
262 goto done;
265 retval = 1;
267 done:
268 de_dbg_indent(c, -1);
269 return retval;
272 // This type of escaping is called "unsynchronisation", but I'm just calling it
273 // "escaping" in some places, because otherwise it's too confusing for me.
274 // The term "unsynchronisation" makes it sound like it's *un*doing something,
275 // which it's not.
276 // Also, the process of undoing unsynchronisation does not seem to have a
277 // name. Calling it "synchronisation" would be confusing, and not really
278 // accurate; and "ununsynchronisation" would be a word crime.
279 static void unescape_id3v2_data(deark *c, dbuf *inf, i64 inf_start,
280 i64 inf_len, dbuf *outf)
282 i64 srcpos = inf_start;
283 u8 b0;
285 de_dbg(c, "unescaping \"unsynchronised\" ID3v2 data");
286 de_dbg_indent(c, 1);
288 while(srcpos<inf_start+inf_len) {
289 b0 = dbuf_getbyte(inf, srcpos++);
290 if(b0==0xff && srcpos<(inf_start+inf_len-1) && dbuf_getbyte(inf, srcpos)==0x00) {
291 srcpos++;
293 dbuf_writebyte(outf, b0);
296 de_dbg(c, "unescaped %d bytes to %d bytes", (int)inf_len, (int)outf->len);
297 de_dbg_indent(c, -1);
300 static void decode_id3v2_frame_text(deark *c, id3v2ctx *d,
301 dbuf *f, i64 pos1, i64 len, struct de_fourcc *tag4cc)
303 u8 id3_encoding;
304 de_ucstring *s = NULL;
305 i64 pos = pos1;
307 if(len<1) goto done;
308 id3_encoding = dbuf_getbyte(f, pos++);
309 de_dbg(c, "text encoding: %d (%s)", (int)id3_encoding,
310 get_id3v2_textenc_name(d, id3_encoding));
312 s = ucstring_create(c);
313 id3v2_read_to_ucstring(c, f, pos, pos1+len-pos, s, id3_encoding);
314 de_dbg(c, "text: \"%s\"", ucstring_getpsz(s));
316 done:
317 ucstring_destroy(s);
320 // From frames starting with "W", except WXXX
321 static void decode_id3v2_frame_urllink(deark *c, id3v2ctx *d,
322 dbuf *f, i64 pos1, i64 len, struct de_fourcc *tag4cc)
324 de_ucstring *s = NULL;
326 s = ucstring_create(c);
327 dbuf_read_to_ucstring(f, pos1, len, s, 0, DE_ENCODING_LATIN1);
328 de_dbg(c, "url: \"%s\"", ucstring_getpsz(s));
329 ucstring_destroy(s);
332 // TXX, TXXX, WXX, WXXX
333 static void decode_id3v2_frame_txxx_etc(deark *c, id3v2ctx *d,
334 dbuf *f, i64 pos1, i64 len, struct de_fourcc *tag4cc)
336 i64 pos = pos1;
337 u8 id3_encoding;
338 de_ucstring *description = NULL;
339 de_ucstring *value = NULL;
340 i64 bytes_consumed;
341 const char *name;
342 int ret;
344 id3_encoding = dbuf_getbyte(f, pos++);
345 de_dbg(c, "text encoding: %d (%s)", (int)id3_encoding,
346 get_id3v2_textenc_name(d, id3_encoding));
348 description = ucstring_create(c);
349 bytes_consumed = 0;
350 ret = read_id3v2_terminated_string(c, d, f, pos, pos1+len-pos, 256, id3_encoding, description, &bytes_consumed);
351 if(!ret) goto done;
352 de_dbg(c, "description: \"%s\"", ucstring_getpsz(description));
353 pos += bytes_consumed;
355 value = ucstring_create(c);
356 id3v2_read_to_ucstring(c, f, pos, pos1+len-pos, value, id3_encoding);
357 if(tag4cc->id==CODE_WXX || tag4cc->id==CODE_WXXX) name="url";
358 else name="value";
359 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz(value));
361 done:
362 ucstring_destroy(description);
363 ucstring_destroy(value);
366 static void decode_id3v2_frame_priv(deark *c, id3v2ctx *d,
367 dbuf *f, i64 pos1, i64 len)
369 struct de_stringreaderdata *owner = NULL;
370 i64 pos = pos1;
371 i64 nbytes_to_scan;
372 i64 payload_len;
374 nbytes_to_scan = pos1+len-pos;
375 if(nbytes_to_scan>256) nbytes_to_scan=256;
377 owner = dbuf_read_string(f, pos, nbytes_to_scan, nbytes_to_scan,
378 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_LATIN1);
379 if(!owner->found_nul) goto done;
381 de_dbg(c, "owner: \"%s\"", ucstring_getpsz(owner->str));
382 pos += owner->bytes_consumed;
384 payload_len = pos1+len-pos;
385 if(payload_len<1) goto done;
387 de_dbg(c, "private frame data at %"I64_FMT", len=%"I64_FMT, pos, payload_len);
388 if(!de_strcmp(owner->sz, "XMP")) {
389 dbuf_create_file_from_slice(f, pos, payload_len, "xmp", NULL, DE_CREATEFLAG_IS_AUX);
391 else if(c->debug_level>=2) {
392 de_dbg_indent(c, 1);
393 de_dbg_hexdump(c, f, pos, payload_len, 256, NULL, 0x1);
394 de_dbg_indent(c, -1);
397 done:
398 de_destroy_stringreaderdata(c, owner);
401 static void decode_id3v2_frame_comm(deark *c, id3v2ctx *d,
402 dbuf *f, i64 pos1, i64 len)
404 u8 id3_encoding;
405 i64 pos = pos1;
406 de_ucstring *lang = NULL;
407 de_ucstring *shortdesc = NULL;
408 de_ucstring *comment_text = NULL;
409 i64 bytes_consumed;
410 int ret;
412 id3_encoding = dbuf_getbyte(f, pos++);
413 de_dbg(c, "text encoding: %d (%s)", (int)id3_encoding,
414 get_id3v2_textenc_name(d, id3_encoding));
416 lang = ucstring_create(c);
417 dbuf_read_to_ucstring(f, pos, 3, lang, 0, DE_ENCODING_ASCII);
418 de_dbg(c, "language: \"%s\"", ucstring_getpsz(lang));
419 pos += 3;
421 shortdesc = ucstring_create(c);
422 bytes_consumed = 0;
423 ret = read_id3v2_terminated_string(c, d, f, pos, pos1+len-pos, 256, id3_encoding,
424 shortdesc, &bytes_consumed);
425 if(!ret) goto done;
426 de_dbg(c, "short description: \"%s\"", ucstring_getpsz(shortdesc));
427 pos += bytes_consumed;
429 comment_text = ucstring_create(c);
430 id3v2_read_to_ucstring(c, f, pos, pos1+len-pos, comment_text, id3_encoding);
431 de_dbg(c, "comment: \"%s\"", ucstring_getpsz(comment_text));
433 done:
434 ucstring_destroy(lang);
435 ucstring_destroy(shortdesc);
436 ucstring_destroy(comment_text);
439 struct apic_type_info {
440 u8 picture_type;
441 const char *name;
442 const char *token;
444 static const struct apic_type_info apic_type_info_arr[] = {
445 {0x00, "other/unspecified", NULL},
446 {0x01, "standard file icon", "icon"},
447 {0x02, "file icon", "icon"},
448 {0x03, "front cover", "front_cover"},
449 {0x04, "back cover", "back_cover"},
450 {0x05, "leaflet page", NULL},
451 {0x06, "media", "media"},
452 {0x07, "lead artist", NULL},
453 {0x08, "artist", NULL},
454 {0x09, "conductor", NULL},
455 {0x0a, "band", NULL},
456 {0x0b, "composer", NULL},
457 {0x0c, "lyricist", NULL},
458 {0x0d, "recording location", NULL},
459 {0x0e, "picture taken during recording", NULL},
460 {0x0f, "picture taken during performance", NULL},
461 {0x10, "frame from video", NULL},
462 {0x12, "illustration", NULL},
463 {0x13, "logo of artist", NULL},
464 {0x14, "logo of publisher/studio", NULL}
467 static const struct apic_type_info *get_apic_type_info(u8 t)
469 size_t k;
471 for(k=0; k<DE_ARRAYCOUNT(apic_type_info_arr); k++) {
472 if(apic_type_info_arr[k].picture_type == t) {
473 return &apic_type_info_arr[k];
476 return NULL;
479 static void extract_pic_apic(deark *c, id3v2ctx *d, dbuf *f,
480 i64 pos, i64 len, const struct apic_type_info *ptinfo)
482 const char *ext;
483 char fullext[32];
484 u8 sig[2];
485 const char *token = NULL;
487 dbuf_read(f, sig, pos, 2);
488 if(sig[0]==0x89 && sig[1]==0x50) ext="png";
489 else if(sig[0]==0xff && sig[1]==0xd8) ext="jpg";
490 else ext="bin";
492 if(ptinfo && ptinfo->token) token = ptinfo->token;
493 if(!token) {
494 if(d->wmpicture_mode) token = "wmpic";
496 if(!token) token = "id3pic";
498 de_snprintf(fullext, sizeof(fullext), "%s.%s", token, ext);
500 dbuf_create_file_from_slice(f, pos, len, fullext, NULL, DE_CREATEFLAG_IS_AUX);
503 // Similar to decode_id3v2_frame_pic_apic()
504 static void decode_id3v2_frame_wmpicture(deark *c, id3v2ctx *d,
505 dbuf *f, i64 pos1, i64 len)
507 u8 picture_type;
508 i64 pos = pos1;
509 i64 pic_data_len;
510 i64 stringlen; // includes terminating 0x0000
511 de_ucstring *mimetype = NULL;
512 de_ucstring *description = NULL;
513 const struct apic_type_info *ptinfo = NULL;
514 int ret;
516 picture_type = dbuf_getbyte(f, pos++);
517 ptinfo = get_apic_type_info(picture_type);
518 de_dbg(c, "picture type: 0x%02x (%s)", (unsigned int)picture_type,
519 ptinfo?ptinfo->name:"?");
521 pic_data_len = dbuf_getu32le(f, pos);
522 de_dbg(c, "picture size: %u", (unsigned int)pic_data_len);
523 pos += 4;
525 ret = dbuf_get_utf16_NULterm_len(f, pos, pos1+len-pos, &stringlen);
526 if(!ret) goto done;
527 mimetype = ucstring_create(c);
528 dbuf_read_to_ucstring_n(f, pos, stringlen-2, 256, mimetype, 0, DE_ENCODING_UTF16LE);
529 de_dbg(c, "mime type: \"%s\"", ucstring_getpsz_d(mimetype));
530 pos += stringlen;
532 ret = dbuf_get_utf16_NULterm_len(f, pos, pos1+len-pos, &stringlen);
533 if(!ret) goto done;
534 mimetype = ucstring_create(c);
535 dbuf_read_to_ucstring_n(f, pos, stringlen-2, 2048, mimetype, 0, DE_ENCODING_UTF16LE);
536 de_dbg(c, "description: \"%s\"", ucstring_getpsz_d(mimetype));
537 // TODO: Maybe the description should be used in the filename?
538 pos += stringlen;
540 if(pos+pic_data_len > pos1+len) goto done;
541 extract_pic_apic(c, d, f, pos, pic_data_len, ptinfo);
543 done:
544 ucstring_destroy(mimetype);
545 ucstring_destroy(description);
548 static void decode_id3v2_frame_pic_apic(deark *c, id3v2ctx *d,
549 dbuf *f, i64 pos1, i64 len, struct de_fourcc *tag4cc)
551 u8 id3_encoding;
552 u8 picture_type;
553 i64 pos = pos1;
554 struct de_stringreaderdata *fmt_srd = NULL;
555 de_ucstring *mimetype = NULL;
556 de_ucstring *description = NULL;
557 const struct apic_type_info *ptinfo = NULL;
558 i64 bytes_consumed = 0;
559 int ret;
561 id3_encoding = dbuf_getbyte(f, pos++);
562 de_dbg(c, "text encoding: %d (%s)", (int)id3_encoding,
563 get_id3v2_textenc_name(d, id3_encoding));
565 if(tag4cc->id==CODE_PIC) {
566 fmt_srd = dbuf_read_string(f, pos, 3, 3, 0, DE_ENCODING_ASCII);
567 de_dbg(c, "format: \"%s\"", ucstring_getpsz(fmt_srd->str));
568 pos += 3;
570 else {
571 mimetype = ucstring_create(c);
572 ret = read_id3v2_terminated_string(c, d, f, pos, pos1+len-pos, 256, ID3ENC_ISO_8859_1,
573 mimetype, &bytes_consumed);
574 if(!ret) goto done;
575 de_dbg(c, "mime type: \"%s\"", ucstring_getpsz(mimetype));
576 pos += bytes_consumed;
579 picture_type = dbuf_getbyte(f, pos++);
580 ptinfo = get_apic_type_info(picture_type);
581 de_dbg(c, "picture type: 0x%02x (%s)", (unsigned int)picture_type,
582 ptinfo?ptinfo->name:"?");
584 description = ucstring_create(c);
585 // "The description has a maximum length of 64 characters" [we'll allow more]
586 ret = read_id3v2_terminated_string(c, d, f, pos, pos1+len-pos, 256, id3_encoding,
587 description, &bytes_consumed);
588 if(!ret) goto done;
589 de_dbg(c, "description: \"%s\"", ucstring_getpsz(description));
590 pos += bytes_consumed;
592 if(pos >= pos1+len) goto done;
593 extract_pic_apic(c, d, f, pos, pos1+len-pos, ptinfo);
595 done:
596 de_destroy_stringreaderdata(c, fmt_srd);
597 ucstring_destroy(mimetype);
598 ucstring_destroy(description);
601 // FLAC "PICTURE" format is, I think it's safe to say, inspired by ID3 PIC/APIC
602 // format. But it's quite different in detail.
603 // This function probably belongs in the flac module instead of here, but it's
604 // a tough call. Putting it here is easier for now, as it makes it easy to
605 // reuse some code.
606 static void decode_flacpicture(deark *c, id3v2ctx *d, dbuf *f,
607 i64 pos1, i64 len)
609 u32 picture_type;
610 i64 pos = pos1;
611 i64 pic_data_len;
612 i64 stringlen;
613 de_ucstring *mimetype = NULL;
614 de_ucstring *description = NULL;
615 const struct apic_type_info *ptinfo = NULL;
617 picture_type = (u32)dbuf_getu32be_p(f, &pos);
618 if(picture_type<=0xff) {
619 ptinfo = get_apic_type_info((u8)picture_type);
621 de_dbg(c, "picture type: 0x%04x (%s)", (unsigned int)picture_type,
622 ptinfo?ptinfo->name:"?");
624 stringlen = dbuf_getu32be_p(f, &pos);
625 mimetype = ucstring_create(c);
626 dbuf_read_to_ucstring_n(f, pos, stringlen, 256, mimetype, 0, DE_ENCODING_UTF8);
627 de_dbg(c, "mime type: \"%s\"", ucstring_getpsz_d(mimetype));
628 pos += stringlen;
630 stringlen = dbuf_getu32be_p(f, &pos);
631 description = ucstring_create(c);
632 dbuf_read_to_ucstring_n(f, pos, stringlen, 512, description, 0, DE_ENCODING_UTF8);
633 de_dbg(c, "description: \"%s\"", ucstring_getpsz_d(description));
634 pos += stringlen;
636 pos += 4; // width
637 pos += 4; // height
638 pos += 4; // bits/pixel
639 pos += 4; // # palette entries
640 pic_data_len = dbuf_getu32be_p(f, &pos);
641 de_dbg(c, "picture size: %u", (unsigned int)pic_data_len);
642 if(pos+pic_data_len > pos1+len) goto done;
643 extract_pic_apic(c, d, f, pos, pic_data_len, ptinfo);
645 done:
646 ucstring_destroy(mimetype);
647 ucstring_destroy(description);
650 static void decode_id3v2_frame_geob(deark *c, id3v2ctx *d,
651 dbuf *f, i64 pos1, i64 len)
653 u8 id3_encoding;
654 i64 pos = pos1;
655 de_ucstring *mimetype = NULL;
656 de_ucstring *filename = NULL;
657 de_ucstring *description = NULL;
658 i64 bytes_consumed = 0;
659 int ret;
660 i64 objlen;
662 id3_encoding = dbuf_getbyte(f, pos++);
663 de_dbg(c, "text encoding: %d (%s)", (int)id3_encoding,
664 get_id3v2_textenc_name(d, id3_encoding));
666 mimetype = ucstring_create(c);
667 ret = read_id3v2_terminated_string(c, d, f, pos, pos1+len-pos, 256, ID3ENC_ISO_8859_1,
668 mimetype, &bytes_consumed);
669 if(!ret) goto done;
670 de_dbg(c, "mime type: \"%s\"", ucstring_getpsz(mimetype));
671 pos += bytes_consumed;
673 filename = ucstring_create(c);
674 ret = read_id3v2_terminated_string(c, d, f, pos, pos1+len-pos, 256, id3_encoding,
675 filename, &bytes_consumed);
676 if(!ret) goto done;
677 de_dbg(c, "filename: \"%s\"", ucstring_getpsz(filename));
678 pos += bytes_consumed;
680 description = ucstring_create(c);
681 ret = read_id3v2_terminated_string(c, d, f, pos, pos1+len-pos, 256, id3_encoding,
682 description, &bytes_consumed);
683 if(!ret) goto done;
684 de_dbg(c, "description: \"%s\"", ucstring_getpsz(description));
685 pos += bytes_consumed;
687 objlen = pos1+len-pos;
688 if(objlen<1) goto done;
690 de_dbg(c, "[%d bytes of encapsulated object data]", (int)objlen);
692 if(c->extract_level>=2) {
693 dbuf_create_file_from_slice(f, pos, objlen, "encobj.bin",
694 NULL, DE_CREATEFLAG_IS_AUX);
696 else if(c->debug_level>=2) {
697 de_dbg_indent(c, 1);
698 de_dbg_hexdump(c, f, pos, objlen, 256, NULL, 0x1);
699 de_dbg_indent(c, -1);
702 done:
703 ucstring_destroy(mimetype);
704 ucstring_destroy(filename);
705 ucstring_destroy(description);
708 // Popularimeter
709 static void decode_id3v2_frame_pop_popm(deark *c, id3v2ctx *d,
710 dbuf *f, i64 pos1, i64 len)
712 i64 bytes_consumed = 0;
713 de_ucstring *email = NULL;
714 i64 pos = pos1;
715 int rating;
716 int ret;
718 email = ucstring_create(c);
719 ret = read_id3v2_terminated_string(c, d, f, pos, pos1+len-pos, 256, ID3ENC_ISO_8859_1,
720 email, &bytes_consumed);
721 if(!ret) goto done;
722 de_dbg(c, "email/id: \"%s\"", ucstring_getpsz(email));
723 pos += bytes_consumed;
725 if(pos1+len-pos < 1) goto done;
726 rating = (int)dbuf_getbyte(f, pos++);
727 de_dbg(c, "rating: %d%s", rating, (rating==0)?" (unknown)":"/255");
729 // TODO: There can be a "counter" field here.
731 done:
732 ucstring_destroy(email);
735 static void decode_id3v2_frame_internal(deark *c, id3v2ctx *d, dbuf *f,
736 i64 pos1, i64 len, struct de_fourcc *tag4cc)
738 if(d->version_code==2) {
739 if(tag4cc->id==CODE_TXX || tag4cc->id==CODE_WXX) {
740 decode_id3v2_frame_txxx_etc(c, d, f, pos1, len, tag4cc);
742 else if(tag4cc->bytes[0]=='T') {
743 decode_id3v2_frame_text(c, d, f, pos1, len, tag4cc);
745 else if(tag4cc->bytes[0]=='W') {
746 decode_id3v2_frame_urllink(c, d, f, pos1, len, tag4cc);
748 else if(tag4cc->id==CODE_COM) {
749 decode_id3v2_frame_comm(c, d, f, pos1, len);
751 else if(tag4cc->id==CODE_GEO) {
752 decode_id3v2_frame_geob(c, d, f, pos1, len);
754 else if(tag4cc->id==CODE_PIC) {
755 decode_id3v2_frame_pic_apic(c, d, f, pos1, len, tag4cc);
757 else if(tag4cc->id==CODE_POP) {
758 decode_id3v2_frame_pop_popm(c, d, f, pos1, len);
761 else if(d->version_code>=3) {
762 // "All text frame identifiers begin with "T". Only text frame identifiers
763 // begin with "T", with the exception of the "TXXX" frame."
764 if(tag4cc->id==CODE_TXXX || tag4cc->id==CODE_WXXX) {
765 decode_id3v2_frame_txxx_etc(c, d, f, pos1, len, tag4cc);
767 else if(tag4cc->bytes[0]=='T') {
768 decode_id3v2_frame_text(c, d, f, pos1, len, tag4cc);
770 else if(tag4cc->bytes[0]=='W') {
771 decode_id3v2_frame_urllink(c, d, f, pos1, len, tag4cc);
773 else if(tag4cc->id==CODE_COMM) {
774 decode_id3v2_frame_comm(c, d, f, pos1, len);
776 else if(tag4cc->id==CODE_GEOB) {
777 decode_id3v2_frame_geob(c, d, f, pos1, len);
779 else if(tag4cc->id==CODE_PRIV) {
780 decode_id3v2_frame_priv(c, d, f, pos1, len);
782 else if(tag4cc->id==CODE_APIC) {
783 decode_id3v2_frame_pic_apic(c, d, f, pos1, len, tag4cc);
785 else if(tag4cc->id==CODE_POPM) {
786 decode_id3v2_frame_pop_popm(c, d, f, pos1, len);
791 static void decode_id3v2_frame(deark *c, id3v2ctx *d, dbuf *f,
792 i64 pos1, i64 len,
793 struct de_fourcc *tag4cc, unsigned int flags1, unsigned int flags2)
795 u8 frame_level_unsynch = d->global_frame_level_unsync;
796 dbuf *unescaped_frame = NULL;
798 if(d->version_code==3) {
799 if(flags2&0x80) { // 'i'
800 de_dbg(c, "[compressed frame not supported]");
801 goto done;
803 if(flags2&0x40) { // 'j'
804 de_dbg(c, "[encrypted frame not supported]");
805 goto done;
807 if(flags2&0x20) { // 'k'
808 de_dbg(c, "[grouped frame not supported]");
809 goto done;
812 if(d->version_code==4) {
813 if(flags2&0x40) { // 'h'
814 de_dbg(c, "[grouped frame not supported]");
815 goto done;
817 if(flags2&0x08) { // 'k'
818 de_dbg(c, "[compressed frame not supported]");
819 goto done;
821 if(flags2&0x04) { // 'm'
822 de_dbg(c, "[encrypted frame not supported]");
823 goto done;
825 if(flags2&0x02) { // 'n'
826 // If the global 'unsynch' flag is set, but a frame's local flag
827 // is not, evidence suggests the global flag has priority.
828 // So if this flag makes a change, it will only be 0->1, never 1->0.
829 frame_level_unsynch = 1;
831 if(flags2&0x01) { // 'p';
832 de_dbg(c, "[frame with data-length-indicator not supported]");
833 goto done;
837 if(frame_level_unsynch) {
838 unescaped_frame = dbuf_create_membuf(c, 0, 0);
839 unescape_id3v2_data(c, f, pos1, len, unescaped_frame);
840 decode_id3v2_frame_internal(c, d, unescaped_frame, 0, unescaped_frame->len, tag4cc);
842 else {
843 decode_id3v2_frame_internal(c, d, f, pos1, len, tag4cc);
846 done:
847 dbuf_close(unescaped_frame);
850 static const char *get_id3v2_frame_name(id3v2ctx *d, u32 id)
852 struct frame_list_entry {
853 u32 threecc, fourcc;
854 const char *name;
856 static const struct frame_list_entry frame_list[] = {
857 // This is a partial list, of some of the common frame types.
858 {0x54414cU, 0x54414c42U, "Album/Movie/Show title"},
859 {CODE_PIC, CODE_APIC, "Attached picture"},
860 {0x545032U, 0x54504532U, "Band/orchestra/accompaniment"},
861 {0x544250U, 0x5442504dU, "Beats per minute"},
862 {CODE_COM, CODE_COMM, "Comments"},
863 {0x57434dU, 0x57434f4dU, "Commercial information"},
864 {0x54434dU, 0x54434f4dU, "Composer"},
865 {0x545033U, 0x54504533U, "Conductor"},
866 {0x545431U, 0x54495431U, "Content group description"},
867 {0x54434fU, 0x54434f4eU, "Content type"},
868 {0x544352U, 0x54434f50U, "Copyright message"},
869 {0x544441U, 0x54444154U, "Date"},
870 {0x54454eU, 0x54454e43U, "Encoded by"},
871 {CODE_GEO, CODE_GEOB, "General encapsulated object"},
872 {0x544b45U, 0x544b4559U, "Initial key"},
873 {0, 0x54434d50U, "iTunes Compilation Flag"}, // TCMP
874 {0x544c41U, 0x544c414eU, "Language"},
875 {0x545031U, 0x54504531U, "Lead artist/Performing group"}, // TP1,TPE1
876 {0x544c45U, 0x544c454eU, "Length"},
877 {0x545854U, 0x54455854U, "Lyricist"},
878 {0x4d4349U, 0x4d434449U, "Music CD identifier"},
879 {0x574152U, 0x574f4152U, "Official artist/performer webpage"},
880 {0x574146U, 0x574f4146U, "Official audio file webpage"},
881 {0x574153U, 0x574f4153U, "Official audio source webpage"},
882 {0x544f54U, 0x544f414cU, "Original album/movie/show title"},
883 {0x544f41U, 0x544f5045U, "Original artist/performer"},
884 {0x544f4cU, 0x544f4c59U, "Original lyricist"},
885 {0x545041U, 0x54504f53U, "Part of a set"}, // TPA,TPOS
886 {CODE_POP, CODE_POPM, "Popularimeter"},
887 {0, CODE_PRIV, "Private frame"},
888 {0x545042U, 0x54505542U, "Publisher"},
889 {0, 0x54445243U, "Recording time"},
890 {0x525641U, 0x52564144U, "Relative volume adjustment"},
891 {0x545353U, 0x54535345U, "Software/Hardware and settings used for encoding"},
892 {0x54494dU, 0x54494d45U, "Time"},
893 {0x545432U, 0x54495432U, "Title"},
894 {0x54524bU, 0x5452434bU, "Track number"},
895 {0x554c54U, 0x55534c54U, "Unsynchronized lyric transcription"},
896 {CODE_TXX, CODE_TXXX, "User defined text information"},
897 {CODE_WXX, CODE_WXXX, "User defined URL link"},
898 {0x545945U, 0x54594552U, "Year"}
900 size_t k;
902 for(k=0; k<DE_ARRAYCOUNT(frame_list); k++) {
903 if(d->version_code==2) {
904 if(id==frame_list[k].threecc)
905 return frame_list[k].name;
907 else {
908 if(id==frame_list[k].fourcc)
909 return frame_list[k].name;
912 return "?";
915 static void do_id3v2_frames(deark *c, id3v2ctx *d,
916 dbuf *f, i64 pos1, i64 len, i64 orig_pos)
918 i64 pos = pos1;
919 struct de_fourcc tag4cc;
920 int saved_indent_level;
921 i64 frame_idx = 0;
922 i64 frame_header_len;
924 de_zeromem(&tag4cc, sizeof(struct de_fourcc));
925 if(d->version_code<=2) frame_header_len = 6;
926 else frame_header_len = 10;
928 de_dbg_indent_save(c, &saved_indent_level);
930 de_dbg(c, "ID3v2 frames at %d", (int)orig_pos);
931 de_dbg_indent(c, 1);
933 while(1) {
934 i64 frame_dlen;
935 u8 flags1, flags2;
936 u8 b;
937 char *flg2name;
939 if(pos+frame_header_len > pos1+len) break;
941 // Peek at the next byte
942 b = dbuf_getbyte(f, pos);
943 if(b==0x00) {
944 d->has_padding = 1;
945 d->approx_padding_pos = orig_pos+pos;
946 break;
949 // The offset we print might not be exact, because of (pre-v2.4.x)
950 // unsynchronisation.
951 // (We have no efficient way to map the position in the unescaped data
952 // back to the corresponding position in the original file.)
953 de_dbg(c, "frame #%d at %s%d", (int)frame_idx, d->approx_mark, (int)(orig_pos+pos));
954 de_dbg_indent(c, 1);
956 if(d->version_code<=2) {
957 // Version 2.2.x uses a "THREECC".
958 dbuf_read_fourcc(f, pos, &tag4cc, 3, 0x0);
959 pos += 3;
961 else {
962 dbuf_read_fourcc(f, pos, &tag4cc, 4, 0x0);
963 pos += 4;
966 de_dbg(c, "tag: '%s' (%s)", tag4cc.id_dbgstr,
967 get_id3v2_frame_name(d, tag4cc.id));
969 if(d->version_code<=2) {
970 frame_dlen = dbuf_getint_ext(f, pos, 3, 0, 0); // read 24-bit BE uint
971 pos += 3;
973 else if(d->version_code==3) {
974 frame_dlen = dbuf_getu32be(f, pos);
975 pos += 4;
977 else {
978 frame_dlen = get_synchsafe_int(f, pos);
979 pos += 4;
981 de_dbg(c, "size: %d", (int)frame_dlen);
983 if(d->version_code<=2) {
984 flags1 = 0;
985 flags2 = 0;
987 else {
988 flags1 = dbuf_getbyte(f, pos++);
989 flags2 = dbuf_getbyte(f, pos++);
990 if(d->version_code<=3) flg2name = "encoding";
991 else flg2name = "format_description";
992 de_dbg(c, "flags: status_messages=0x%02x, %s=0x%02x",
993 (unsigned int)flags1, flg2name, (unsigned int)flags2);
996 if(pos+frame_dlen > pos1+len) goto done;
997 decode_id3v2_frame(c, d, f, pos, frame_dlen, &tag4cc, flags1, flags2);
999 pos += frame_dlen;
1000 frame_idx++;
1001 de_dbg_indent(c, -1);
1004 done:
1005 de_dbg_indent_restore(c, saved_indent_level);
1008 // WM/Picture a metadata element that occurs in ASF, and maybe other, Microsoft
1009 // formats. Microsoft says
1010 // "This attribute is compatible with the ID3 frame, APIC."
1011 // That's slightly misleading. It contains the same information, but formatted
1012 // in an incompatible way.
1013 // It seems to be a serialization of the WM_PICTURE struct, with the fields in
1014 // a different order.
1015 static void do_wmpicture(deark *c, dbuf *f, i64 pos, i64 len)
1017 id3v2ctx *d = NULL;
1019 d = de_malloc(c, sizeof(id3v2ctx));
1020 d->wmpicture_mode = 1;
1021 decode_id3v2_frame_wmpicture(c, d, f, pos, len);
1022 de_free(c, d);
1025 static void do_flacpicture(deark *c, dbuf *f, i64 pos, i64 len)
1027 id3v2ctx *d = NULL;
1029 d = de_malloc(c, sizeof(id3v2ctx));
1030 decode_flacpicture(c, d, f, pos, len);
1031 de_free(c, d);
1034 static void do_id3v2(deark *c, dbuf *f, i64 pos, i64 bytes_avail,
1035 i64 *bytes_consumed)
1037 id3v2ctx *d = NULL;
1038 dbuf *unescaped_data = NULL;
1039 i64 ext_header_size = 0;
1040 int saved_indent_level;
1042 de_dbg_indent_save(c, &saved_indent_level);
1043 *bytes_consumed = 0;
1044 d = de_malloc(c, sizeof(id3v2ctx));
1045 if(!do_id3v2_header(c, f, d)) goto done;
1046 if(!d->has_id3v2) goto done;
1048 if(d->has_ext_header) {
1049 de_dbg(c, "ID3v2 extended header at %d", (int)d->data_start);
1050 de_dbg_indent(c, 1);
1051 if(d->version_code==3 && !d->global_level_unsync) {
1052 ext_header_size = 4 + dbuf_getu32be(f, d->data_start);
1053 de_dbg(c, "extended header size: %d", (int)ext_header_size);
1054 // TODO: Decode the rest of the extended header
1056 else if(d->version_code==4) {
1057 u8 ext_flags;
1058 ext_header_size = get_synchsafe_int(f, d->data_start);
1059 de_dbg(c, "extended header size: %d", (int)ext_header_size);
1060 // [d->data_start+5] = flag byte count that should always be 1
1061 ext_flags = dbuf_getbyte(f, d->data_start+5);
1062 de_dbg(c, "extended flags: 0x%02x", (unsigned int)ext_flags);
1063 // TODO: Decode the rest of the extended header
1065 else {
1066 de_warn(c, "Extended header not supported");
1067 goto done; // TODO: v2.3.x w/ unsynch
1069 de_dbg_indent(c, -1);
1070 if(ext_header_size > d->data_len) goto done;
1073 if(d->global_level_unsync) {
1074 unescaped_data = dbuf_create_membuf(c, 0, 0);
1075 unescape_id3v2_data(c, f, d->data_start,
1076 d->data_len, unescaped_data);
1078 else {
1079 unescaped_data = dbuf_open_input_subfile(f,
1080 d->data_start + ext_header_size,
1081 d->data_len - ext_header_size);
1084 do_id3v2_frames(c, d, unescaped_data, 0, unescaped_data->len,
1085 d->data_start + ext_header_size);
1087 if(d->has_padding) {
1088 de_dbg(c, "ID3v2 padding at %s%d", d->approx_mark, (int)d->approx_padding_pos);
1091 *bytes_consumed = d->total_len;
1093 done:
1094 de_dbg_indent_restore(c, saved_indent_level);
1095 dbuf_close(unescaped_data);
1096 de_free(c, d);
1099 // **************************************************************************
1100 // ID3v1
1101 // **************************************************************************
1103 static const char *get_id3v1_genre_name(u8 g)
1105 struct genre_list_entry {
1106 u8 id;
1107 const char *name;
1109 static const struct genre_list_entry genre_list[] = {
1110 {0, "Blues"}, {1, "Classic Rock"}, {2, "Country"}, {3, "Dance"}, {4, "Disco"},
1111 {5, "Funk"}, {6, "Grunge"}, {7, "Hip-Hop"}, {8, "Jazz"}, {9, "Metal"},
1112 {10, "New Age"}, {11, "Oldies"}, {12, "Other"}, {13, "Pop"}, {14, "R&B"},
1113 {15, "Rap"}, {16, "Reggae"}, {17, "Rock"}, {18, "Techno"}, {19, "Industrial"},
1114 {20, "Alternative"}, {21, "Ska"}, {22, "Death Metal"}, {23, "Pranks"}, {24, "Soundtrack"},
1115 {25, "Euro-Techno"}, {26, "Ambient"}, {27, "Trip-Hop"}, {28, "Vocal"}, {29, "Jazz+Funk"},
1116 {30, "Fusion"}, {31, "Trance"}, {32, "Classical"}, {33, "Instrumental"}, {34, "Acid"},
1117 {35, "House"}, {36, "Game"}, {37, "Sound Clip"}, {38, "Gospel"}, {39, "Noise"},
1118 {40, "Altern. Rock"}, {41, "Bass"}, {42, "Soul"}, {43, "Punk"}, {44, "Space"},
1119 {45, "Meditative"}, {46, "Instrumental Pop"}, {47, "Instrumental Rock"}, {48, "Ethnic"}, {49, "Gothic"},
1120 {50, "Darkwave"}, {51, "Techno-Industrial"}, {52, "Electronic"}, {53, "Pop-Folk"}, {54, "Eurodance"},
1121 {55, "Dream"}, {56, "Southern Rock"}, {57, "Comedy"}, {58, "Cult"}, {59, "Gangsta"},
1122 {60, "Top 40"}, {61, "Christian Rap"}, {62, "Pop/Funk"}, {63, "Jungle"}, {64, "Native American"},
1123 {65, "Cabaret"}, {66, "New Wave"}, {67, "Psychedelic"}, {68, "Rave"}, {69, "Showtunes"},
1124 {70, "Trailer"}, {71, "Lo-Fi"}, {72, "Tribal"}, {73, "Acid Punk"}, {74, "Acid Jazz"},
1125 {75, "Polka"}, {76, "Retro"}, {77, "Musical"}, {78, "Rock & Roll"}, {79, "Hard Rock"},
1126 {80, "Folk"}, {81, "Folk-Rock"}, {82, "National Folk"}, {83, "Swing"}, {84, "Fast Fusion"},
1127 {85, "Bebob"}, {86, "Latin"}, {87, "Revival"}, {88, "Celtic"}, {89, "Bluegrass"},
1128 {90, "Avantgarde"}, {91, "Gothic Rock"}, {92, "Progressive Rock"}, {93, "Psychedelic Rock"}, {94, "Symphonic Rock"},
1129 {95, "Slow Rock"}, {96, "Big Band"}, {97, "Chorus"}, {98, "Easy Listening"}, {99, "Acoustic"},
1130 {100, "Humour"}, {101, "Speech"}, {102, "Chanson"}, {103, "Opera"}, {104, "Chamber Music"},
1131 {105, "Sonata"}, {106, "Symphony"}, {107, "Booty Brass"}, {108, "Primus"}, {109, "Porn Groove"},
1132 {110, "Satire"}, {111, "Slow Jam"}, {112, "Club"}, {113, "Tango"}, {114, "Samba"},
1133 {115, "Folklore"}, {116, "Ballad"}, {117, "Power Ballad"}, {118, "Rhythmic Soul"}, {119, "Freestyle"},
1134 {120, "Duet"}, {121, "Punk Rock"}, {122, "Drum Solo"}, {123, "A Cappella"}, {124, "Euro-House"},
1135 {125, "Dance Hall"},
1136 // TODO?: More IDs have been defined, by various versions of Winamp.
1137 {255, "unspecified"} };
1138 size_t k;
1140 for(k=0; k<DE_ARRAYCOUNT(genre_list); k++) {
1141 if(genre_list[k].id==g) {
1142 return genre_list[k].name;
1145 return "unknown";
1148 static void do_id3v1(deark *c, i64 pos1)
1150 i64 pos = pos1;
1151 de_ucstring *s = NULL;
1152 u8 genre;
1154 s = ucstring_create(c);
1155 pos += 3;
1157 dbuf_read_to_ucstring(c->infile, pos, 30, s, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
1158 ucstring_strip_trailing_spaces(s);
1159 de_dbg(c, "song title: \"%s\"", ucstring_getpsz(s));
1160 pos += 30;
1162 ucstring_empty(s);
1163 dbuf_read_to_ucstring(c->infile, pos, 30, s, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
1164 ucstring_strip_trailing_spaces(s);
1165 de_dbg(c, "artist: \"%s\"", ucstring_getpsz(s));
1166 pos += 30;
1168 ucstring_empty(s);
1169 dbuf_read_to_ucstring(c->infile, pos, 30, s, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
1170 ucstring_strip_trailing_spaces(s);
1171 de_dbg(c, "album: \"%s\"", ucstring_getpsz(s));
1172 pos += 30;
1174 ucstring_empty(s);
1175 dbuf_read_to_ucstring(c->infile, pos, 4, s, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
1176 de_dbg(c, "year: \"%s\"", ucstring_getpsz(s));
1177 pos += 4;
1179 ucstring_empty(s);
1180 dbuf_read_to_ucstring(c->infile, pos, 30, s, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
1181 ucstring_strip_trailing_spaces(s);
1182 de_dbg(c, "comment: \"%s\"", ucstring_getpsz(s));
1183 pos += 28;
1184 if(de_getbyte(pos)==0) {
1185 u8 trknum;
1186 trknum = de_getbyte(pos+1);
1187 if(trknum!=0) {
1188 // Looks like ID3v1.1
1189 de_dbg(c, "track number: %d", (int)trknum);
1192 pos += 2;
1194 genre = de_getbyte(pos);
1195 de_dbg(c, "genre: %d (%s)", (int)genre, get_id3v1_genre_name(genre));
1197 ucstring_destroy(s);
1200 // **************************************************************************
1202 static void de_run_id3(deark *c, de_module_params *mparams)
1204 if(de_havemodcode(c, mparams, 'I')) { // raw ID3v2
1205 i64 bytes_consumed_id3v2 = 0;
1206 do_id3v2(c, c->infile, 0, c->infile->len, &bytes_consumed_id3v2);
1207 if(mparams) {
1208 mparams->out_params.int64_1 = bytes_consumed_id3v2;
1210 goto done;
1212 if(de_havemodcode(c, mparams, '1')) { // raw ID3v1
1213 do_id3v1(c, 0);
1214 goto done;
1216 if(de_havemodcode(c, mparams, 'P')) { // Windows WM/Picture
1217 do_wmpicture(c, c->infile, 0, c->infile->len);
1218 goto done;
1220 if(de_havemodcode(c, mparams, 'F')) { // FLAC PICTURE
1221 do_flacpicture(c, c->infile, 0, c->infile->len);
1222 goto done;
1225 done:
1229 // Figure out the size of the ID3V2 segment at the beginning of the file.
1230 // Sets c->detection_data.id3.bytes_at_start, etc.
1231 // Note code duplication with do_id3v2_header().
1232 // Note code duplication with de_fmtutil_handle_id3().
1233 static int de_identify_id3(deark *c)
1235 u8 flags;
1236 u8 version_code;
1237 u8 has_footer = 0;
1238 i64 total_len;
1240 c->detection_data->id3.detection_attempted = 1;
1241 if(dbuf_memcmp(c->infile, 0, "ID3", 3)) return 0;
1242 c->detection_data->id3.has_id3v2 = 1;
1244 version_code = dbuf_getbyte(c->infile, 3);
1245 flags = dbuf_getbyte(c->infile, 5);
1246 if(version_code >= 4) {
1247 has_footer = (flags&0x10)?1:0;
1250 total_len = 10;
1251 total_len += get_synchsafe_int(c->infile, 6);
1252 if(has_footer) total_len += 10;
1254 de_dbg2(c, "[id3detect] calculated end of ID3v2 data: %u", (unsigned int)total_len);
1255 c->detection_data->id3.bytes_at_start = (u32)total_len;
1257 // This module is never "detected". It's only used for its side effects,
1258 // and as a submodule.
1259 return 0;
1262 void de_module_id3(deark *c, struct deark_module_info *mi)
1264 mi->id = "id3";
1265 mi->desc = "ID3 metadata";
1266 mi->run_fn = de_run_id3;
1267 mi->identify_fn = de_identify_id3;
1268 mi->flags |= DE_MODFLAG_HIDDEN | DE_MODFLAG_SHAREDDETECTION;