1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // Ogg multimedia format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_ogg
);
12 struct localctx_struct
;
13 typedef struct localctx_struct lctx
;
17 typedef void (*page_handler_fn_type
)(deark
*c
, lctx
*d
,
18 struct page_info
*pgi
, struct stream_info
*si
);
20 #define STREAMTYPE_VORBIS 1
21 #define STREAMTYPE_THEORA 2
22 #define STREAMTYPE_SKELETON 3
23 #define STREAMTYPE_FLAC 4
24 #define STREAMTYPE_SPEEX 5
25 #define STREAMTYPE_OPUS 6
26 #define STREAMTYPE_DIRAC 7
27 #define STREAMTYPE_KATE 8
28 #define STREAMTYPE_MIDI 9
29 #define STREAMTYPE_PCM 10
30 #define STREAMTYPE_YUV4MPEG 11
31 #define STREAMTYPE_OGM_V 20
32 #define STREAMTYPE_OGM_A 21
33 #define STREAMTYPE_OGM_T 22
34 #define STREAMTYPE_OGM_S 23
36 struct stream_type_info
{
42 page_handler_fn_type page_fn
;
48 int is_first_page
; // Is this the first page of some bitstream?
57 int stream_type
; // 0 if unknown
58 const struct stream_type_info
*sti
; // NULL if stream type is unknown
62 // Number of pages we've counted for this stream so far.
63 // Expected to equal page_info::page_seq_num.
66 // Private use data, owned by the stream type.
69 // 0 = Headers have not been processed.
70 // 1 = Headers have been processed.
71 unsigned int stream_state
;
73 // Theora: A copy of the Comment and Setup streams.
77 struct localctx_struct
{
81 struct de_inthashtable
*streamtable
;
84 u8 found_skeleton
, found_ogm
;
85 u8 found_vorbis
, found_theora
;
87 int first_stream_type_valid
;
88 int first_stream_type
;
89 int has_unknown_or_multiple_stream_types
;
90 int has_non_vorbis_non_theora_stream
;
91 const struct stream_type_info
*first_stream_sti
;
94 static unsigned int getu24be_p(dbuf
*f
, i64
*ppos
)
97 u
= (unsigned int)dbuf_getint_ext(f
, *ppos
, 3, 0, 0);
102 // To be called when we encounter a page that is not the first page of
103 // its bitstream (or at EOF).
104 static void declare_ogg_format(deark
*c
, lctx
*d
)
107 const char *name
= NULL
;
109 if(d
->format_declared
) return;
110 d
->format_declared
= 1;
112 // There's no nice way, that I know of, to characterize the contents of an Ogg
113 // file. But I think it's worth trying.
115 if(d
->bitstream_count
<1) {
116 // If there are zero streams : "other"
118 else if(d
->found_ogm
) {
119 // else if there's an OGM stream of any kind...
122 else if(d
->found_skeleton
) {
123 // else If there's a Skeleton stream...
126 else if(d
->first_stream_type_valid
&& d
->first_stream_sti
&&
127 !d
->has_unknown_or_multiple_stream_types
)
129 // else if all streams are the same known type: that stream type
130 name
= d
->first_stream_sti
->name
;
132 else if(d
->found_theora
&& d
->found_vorbis
&& !d
->has_non_vorbis_non_theora_stream
) {
133 // else if there are Theora and Vorbis streams and nothing else...
134 name
="Theora+Vorbis";
136 else if(d
->found_theora
) {
137 // else if there's a Theora stream...
142 de_snprintf(tmps
, sizeof(tmps
), "Ogg %s", name
?name
:"(other)");
143 de_declare_fmt(c
, tmps
);
146 static char *get_hdrtype_descr(deark
*c
, char *buf
, size_t buflen
, u8 hdr_type
)
149 de_strlcpy(buf
, "", buflen
);
152 de_ucstring
*s
= NULL
;
153 s
= ucstring_create(c
);
154 if(hdr_type
&0x01) ucstring_append_flags_item(s
, "continuation page");
155 if(hdr_type
&0x02) ucstring_append_flags_item(s
, "first page");
156 if(hdr_type
&0x04) ucstring_append_flags_item(s
, "last page");
157 de_snprintf(buf
, buflen
, " (%s)", ucstring_getpsz(s
));
163 static void do_vorbis_id_header(deark
*c
, lctx
*d
, struct page_info
*pgi
, struct stream_info
*si
)
169 pos
+= 7; // Skip signature
170 u1
= (unsigned int)de_getu32le_p(&pos
);
171 de_dbg(c
, "version: %u", u1
);
172 u1
= (unsigned int)de_getbyte_p(&pos
);
173 de_dbg(c
, "channels: %u", u1
);
174 u1
= (unsigned int)de_getu32le_p(&pos
);
175 de_dbg(c
, "sample rate: %u", u1
);
176 x
= de_geti32le(pos
); pos
+= 4;
177 de_dbg(c
, "max bitrate: %d", (int)x
);
178 x
= de_geti32le(pos
); pos
+= 4;
179 de_dbg(c
, "nominal bitrate: %d", (int)x
);
180 x
= de_geti32le(pos
); pos
+= 4;
181 de_dbg(c
, "min bitrate: %d", (int)x
);
184 static void do_theora_id_header(deark
*c
, lctx
*d
, struct page_info
*pgi
, struct stream_info
*si
)
191 pos
+= 7; // Skip signature
192 vmaj
= de_getbyte_p(&pos
);
193 vmin
= de_getbyte_p(&pos
);
194 vrev
= de_getbyte_p(&pos
);
195 de_dbg(c
, "version: %u.%u.%u", (unsigned int)vmaj
, (unsigned int)vmin
,
197 x1
= de_getu16be_p(&pos
);
198 x2
= de_getu16be_p(&pos
);
199 de_dbg(c
, "frame dimensions: %d"DE_CHAR_TIMES
"%d macroblocks", (int)x1
, (int)x2
);
201 u1
= getu24be_p(c
->infile
, &pos
);
202 u2
= getu24be_p(c
->infile
, &pos
);
203 de_dbg(c
, "picture dimensions: %u"DE_CHAR_TIMES
"%u pixels", u1
, u2
);
205 u1
= (unsigned int)de_getbyte_p(&pos
);
206 u2
= (unsigned int)de_getbyte_p(&pos
);
207 de_dbg(c
, "picture region offset: %u,%u pixels", u1
, u2
);
209 u1
= (unsigned int)de_getu32be_p(&pos
);
210 u2
= (unsigned int)de_getu32be_p(&pos
);
211 de_dbg(c
, "frame rate: %u/%u", u1
, u2
);
213 u1
= getu24be_p(c
->infile
, &pos
);
214 u2
= getu24be_p(c
->infile
, &pos
);
215 de_dbg(c
, "aspect ratio: %u/%u", u1
, u2
);
217 u1
= (unsigned int)de_getbyte_p(&pos
);
218 de_dbg(c
, "color space: %u", u1
);
220 u1
= getu24be_p(c
->infile
, &pos
);
221 de_dbg(c
, "nominal bitrate: %u bits/sec", u1
);
224 static void do_vorbis_comment_block(deark
*c
, lctx
*d
, dbuf
*f
, i64 pos1
)
230 de_ucstring
*s
= NULL
;
232 n
= dbuf_getu32le_p(f
, &pos
);
233 if(pos
+n
> f
->len
) goto done
;
234 s
= ucstring_create(c
);
235 dbuf_read_to_ucstring_n(f
, pos
, n
, DE_DBG_MAX_STRLEN
, s
, 0, DE_ENCODING_UTF8
);
236 de_dbg(c
, "vendor: \"%s\"", ucstring_getpsz_d(s
));
239 ncomments
= dbuf_getu32le_p(f
, &pos
);
240 de_dbg(c
, "number of comments: %d", (int)ncomments
);
242 for(k
=0; k
<ncomments
; k
++) {
243 if(pos
+4 > f
->len
) goto done
;
244 n
= dbuf_getu32le_p(f
, &pos
);
245 if(pos
+n
> f
->len
) goto done
;
247 dbuf_read_to_ucstring_n(f
, pos
, n
, DE_DBG_MAX_STRLEN
, s
, 0, DE_ENCODING_UTF8
);
248 de_dbg(c
, "comment[%d]: \"%s\"", (int)k
, ucstring_getpsz_d(s
));
256 static void do_theora_vorbis_after_headers(deark
*c
, lctx
*d
, struct stream_info
*si
)
259 int saved_indent_level
;
262 de_dbg_indent_save(c
, &saved_indent_level
);
263 if(si
->stream_state
!=0) goto done
;
264 if(!si
->header_stream
) goto done
;
265 if(!si
->sti
) goto done
;
266 f
= si
->header_stream
;
268 de_dbg(c
, "[decoding %s comment/setup headers, %d bytes]",
269 si
->sti
->name
, (int)si
->header_stream
->len
);
272 // Make sure the comment header signature is present.
273 if(si
->stream_type
==STREAMTYPE_VORBIS
) {
274 if(dbuf_memcmp(f
, 0, "\x03" "vorbis", 7)) {
279 else if(si
->stream_type
==STREAMTYPE_THEORA
) {
280 if(dbuf_memcmp(f
, 0, "\x81" "theora", 7)) {
286 do_vorbis_comment_block(c
, d
, f
, pos
);
288 // TODO: "Setup" header
291 si
->stream_state
= 1;
292 dbuf_close(si
->header_stream
);
293 si
->header_stream
= NULL
;
294 de_dbg_indent_restore(c
, saved_indent_level
);
297 static void do_vorbis_page(deark
*c
, lctx
*d
, struct page_info
*pgi
, struct stream_info
*si
)
301 if(pgi
->is_first_page
) {
302 // The first Ogg page of a bitstream usually contains enough data to be
303 // useful. So, we'll try to process it directly, without reconstructing
304 // the codec bitstream.
305 do_vorbis_id_header(c
, d
, pgi
, si
);
308 // We want to save a copy of the Comment and Setup header data,
309 // but not the Identification header which is handled elsewhere.
311 if(si
->stream_state
!=0) {
312 // We've already handled the Comment & Setup headers.
316 if(pgi
->dlen
<1) goto done
;
318 firstbyte
= de_getbyte(pgi
->dpos
);
320 if(si
->page_count
==0 || pgi
->page_seq_num
==0 || (pgi
->hdr_type
&0x02)) {
321 // This appears to be the Identification header page. Skip it.
325 if(pgi
->page_seq_num
>=1 && (firstbyte
&0x01)) {
326 // This appears to be one of the pages we care about.
327 // Save its data for later.
328 if(!si
->header_stream
) {
329 si
->header_stream
= dbuf_create_membuf(c
, 1048576, 1);
331 dbuf_copy(c
->infile
, pgi
->dpos
, pgi
->dlen
, si
->header_stream
);
333 else if((firstbyte
&0x01)==0) {
334 // Reached the end of headers (by encountering a non-header page).
335 // (There is required to be an Ogg page break immediately after the
336 // Vorbis headers, so the start of the first data *page* must correspond
337 // to the start of the first data *packet*. A Vorbis data packet always
338 // begins with a byte whose low bit is 0.)
339 do_theora_vorbis_after_headers(c
, d
, si
);
346 static void do_theora_page(deark
*c
, lctx
*d
, struct page_info
*pgi
, struct stream_info
*si
)
350 if(pgi
->is_first_page
) {
351 do_theora_id_header(c
, d
, pgi
, si
);
354 // We want to save a copy of the Comment and Setup header data,
355 // but not the Identification header which is handled elsewhere.
357 if(si
->stream_state
!=0) {
358 // We've already handled the Comment & Setup headers.
362 if(pgi
->dlen
<1) goto done
;
364 firstbyte
= de_getbyte(pgi
->dpos
);
366 if(si
->page_count
==0 || pgi
->page_seq_num
==0 || (pgi
->hdr_type
&0x02)) {
367 // This appears to be the Identification header page. Skip it.
371 if(pgi
->page_seq_num
>=1 && firstbyte
>=0x80) {
372 // This appears to be one of the pages we care about.
373 // Save its data for later.
374 if(!si
->header_stream
) {
375 si
->header_stream
= dbuf_create_membuf(c
, 1048576, 1);
377 dbuf_copy(c
->infile
, pgi
->dpos
, pgi
->dlen
, si
->header_stream
);
379 else if(firstbyte
<0x80) {
380 // Reached the end of headers (by encountering a non-header page).
381 // (There is required to be an Ogg page break immediately after the
382 // Theora headers, so the start of the first data *page* must correspond
383 // to the start of the first data *packet*. A Theora data packet always
384 // begins with a byte < 0x80.)
385 do_theora_vorbis_after_headers(c
, d
, si
);
392 // .flags: 0x1=An OGM stream type
393 const struct stream_type_info stream_type_info_arr
[] = {
394 { STREAMTYPE_VORBIS
, 0, 7, (const u8
*)"\x01" "vorbis", "Vorbis", do_vorbis_page
},
395 { STREAMTYPE_THEORA
, 0, 7, (const u8
*)"\x80" "theora", "Theora", do_theora_page
},
396 { STREAMTYPE_SKELETON
, 0, 8, (const u8
*)"fishead\0", "Skeleton", NULL
},
397 { STREAMTYPE_FLAC
, 0, 5, (const u8
*)"\x7f" "FLAC", "FLAC", NULL
},
398 { STREAMTYPE_SPEEX
, 0, 8, (const u8
*)"Speex ", "Speex", NULL
},
399 { STREAMTYPE_OPUS
, 0, 8, (const u8
*)"OpusHead", "Opus", NULL
},
400 { STREAMTYPE_DIRAC
, 0, 5, (const u8
*)"BBCD\0", "Dirac", NULL
},
401 { STREAMTYPE_KATE
, 0, 8, (const u8
*)"\x80" "kate\0\0\0", "Kate", NULL
},
402 { STREAMTYPE_MIDI
, 0, 8, (const u8
*)"OggMIDI\0", "MIDI", NULL
},
403 { STREAMTYPE_PCM
, 0, 8, (const u8
*)"PCM ", "PCM", NULL
},
404 { STREAMTYPE_YUV4MPEG
, 0, 8, (const u8
*)"YUV4MPEG", "YUV4MPEG", NULL
},
405 { STREAMTYPE_OGM_V
, 0x1, 6, (const u8
*)"\x01" "video", "OGM video", NULL
},
406 { STREAMTYPE_OGM_A
, 0x1, 6, (const u8
*)"\x01" "audio", "OGM audio", NULL
},
407 { STREAMTYPE_OGM_T
, 0x1, 5, (const u8
*)"\x01" "text", "OGM text", NULL
},
408 { STREAMTYPE_OGM_S
, 0x1, 16, (const u8
*)"\x01" "Direct Show Sam", "OGM DS samples", NULL
}
411 static void do_identify_bitstream(deark
*c
, lctx
*d
, struct stream_info
*si
, i64 pos
, i64 len
)
414 size_t bytes_to_scan
;
417 bytes_to_scan
= (size_t)len
;
418 if(bytes_to_scan
> sizeof(idbuf
)) {
419 bytes_to_scan
= sizeof(idbuf
);
422 de_read(idbuf
, pos
, bytes_to_scan
);
424 for(k
=0; k
<DE_ARRAYCOUNT(stream_type_info_arr
); k
++) {
425 if(!de_memcmp(idbuf
, stream_type_info_arr
[k
].magic
,
426 stream_type_info_arr
[k
].magic_len
))
428 si
->sti
= &stream_type_info_arr
[k
];
429 si
->stream_type
= si
->sti
->stream_type
;
434 if(si
->stream_type
==STREAMTYPE_VORBIS
) {
437 else if(si
->stream_type
==STREAMTYPE_THEORA
) {
440 else if(si
->stream_type
==STREAMTYPE_SKELETON
) {
441 d
->found_skeleton
= 1;
443 else if(si
->sti
&& (si
->sti
->flags
&0x1)) {
447 if(si
->stream_type
!=STREAMTYPE_VORBIS
&& si
->stream_type
!=STREAMTYPE_THEORA
) {
448 d
->has_non_vorbis_non_theora_stream
= 1;
451 de_dbg(c
, "bitstream type: %s", si
->sti
?si
->sti
->name
:"unknown");
454 // This function is a continuation of do_ogg_page(). Here we dig
455 // a little deeper, and look at the bitstream type and contents.
456 static void do_bitstream_page(deark
*c
, lctx
*d
, struct page_info
*pgi
,
457 struct stream_info
*si
)
459 if(d
->always_hexdump
|| (pgi
->is_first_page
&& (c
->debug_level
>=2))) {
460 de_dbg_hexdump(c
, c
->infile
, pgi
->dpos
, pgi
->dlen
, 256, NULL
, 0x1);
463 if(pgi
->is_first_page
) {
464 do_identify_bitstream(c
, d
, si
, pgi
->dpos
, pgi
->dlen
);
466 if(d
->total_page_count
==0) {
467 // This is the first stream in the file.
468 if(si
->sti
&& si
->stream_type
!=0) {
469 // This stream's type is known. Remember it, for format declaration purpose.
470 d
->first_stream_type
= si
->stream_type
;
471 d
->first_stream_sti
= si
->sti
;
472 d
->first_stream_type_valid
= 1;
476 // Not the first stream in the file
477 if(si
->stream_type
!=0 && d
->first_stream_type_valid
&&
478 (si
->stream_type
==d
->first_stream_type
))
480 // This stream is the same type as the first stream. Do nothing.
483 d
->has_unknown_or_multiple_stream_types
= 1;
487 if(si
->stream_type
==0) {
488 d
->has_unknown_or_multiple_stream_types
= 1;
492 if(!d
->format_declared
) declare_ogg_format(c
, d
);
495 if(si
->sti
&& si
->sti
->page_fn
) {
496 si
->sti
->page_fn(c
, d
, pgi
, si
);
500 static int do_ogg_page(deark
*c
, lctx
*d
, i64 pos1
, i64
*bytes_consumed
)
504 i64 num_page_segments
;
510 struct stream_info
*si
= NULL
;
511 struct page_info
*pgi
= NULL
;
513 pgi
= de_malloc(c
, sizeof(struct page_info
));
514 pos
+= 4; // signature, already read
516 pgi
->version
= de_getbyte_p(&pos
);
517 de_dbg(c
, "version: %d", (int)pgi
->version
);
519 pgi
->hdr_type
= de_getbyte_p(&pos
);
520 de_dbg(c
, "header type: 0x%02x%s", (unsigned int)pgi
->hdr_type
,
521 get_hdrtype_descr(c
, buf
, sizeof(buf
), pgi
->hdr_type
));
523 pgi
->granule_pos
= de_geti64le(pos
); pos
+= 8;
524 de_dbg(c
, "granule position: %"I64_FMT
, pgi
->granule_pos
);
526 pgi
->stream_serialno
= de_getu32le_p(&pos
);
527 de_dbg(c
, "bitstream serial number: %"I64_FMT
, pgi
->stream_serialno
);
529 ret
= de_inthashtable_get_item(c
, d
->streamtable
, pgi
->stream_serialno
, &item
);
531 si
= (struct stream_info
*)item
;
532 // We've seen this stream before.
534 de_dbg(c
, "bitstream %"I64_FMT
" type: %s", pgi
->stream_serialno
,
535 si
->sti
?si
->sti
->name
:"unknown");
536 de_dbg_indent(c
, -1);
539 // This the first page we've encountered of this stream.
540 si
= de_malloc(c
, sizeof(struct stream_info
));
541 de_inthashtable_add_item(c
, d
->streamtable
, pgi
->stream_serialno
, (void*)si
);
542 d
->bitstream_count
++;
544 si
->serialno
= pgi
->stream_serialno
;
546 pgi
->page_seq_num
= de_getu32le_p(&pos
);
547 de_dbg(c
, "page sequence number: %"I64_FMT
, pgi
->page_seq_num
);
549 x
= de_getu32le_p(&pos
);
550 de_dbg(c
, "crc (reported): 0x%08x", (unsigned int)x
);
552 num_page_segments
= (i64
)de_getbyte_p(&pos
);
553 de_dbg(c
, "number of page segments: %d", (int)num_page_segments
);
557 for(k
=0; k
<num_page_segments
; k
++) {
558 x
= (i64
)de_getbyte_p(&pos
);
564 // Apparently we have 3 ways to identify the first page of a bitstream.
565 // We'll require them all to be consistent.
566 pgi
->is_first_page
= (si
->page_count
==0) && (pgi
->page_seq_num
==0) && ((pgi
->hdr_type
&0x02)!=0);
569 de_dbg(c
, "[%"I64_FMT
" total bytes of page data, at %"I64_FMT
"]", pgi
->dlen
, pgi
->dpos
);
571 do_bitstream_page(c
, d
, pgi
, si
);
572 de_dbg_indent(c
, -1);
577 *bytes_consumed
= pos
- pos1
;
584 static void destroy_bitstream(deark
*c
, lctx
*d
, struct stream_info
*si
)
588 if((si
->stream_type
==STREAMTYPE_VORBIS
|| si
->stream_type
==STREAMTYPE_THEORA
)
589 && si
->stream_state
==0)
591 do_theora_vorbis_after_headers(c
, d
, si
);
594 if(si
->header_stream
) {
595 dbuf_close(si
->header_stream
);
601 static void destroy_streamtable(deark
*c
, lctx
*d
)
605 void *removed_item
= NULL
;
606 struct stream_info
*si
;
608 if(!de_inthashtable_remove_any_item(c
, d
->streamtable
, &key
, &removed_item
)) {
611 si
= (struct stream_info
*)removed_item
;
612 destroy_bitstream(c
, d
, si
);
615 de_inthashtable_destroy(c
, d
->streamtable
);
616 d
->streamtable
= NULL
;
619 static void run_ogg_internal(deark
*c
, lctx
*d
)
623 struct de_id3info id3i
;
625 d
->always_hexdump
= de_get_ext_option(c
, "ogg:hexdump")?1:0;
626 d
->streamtable
= de_inthashtable_create(c
);
628 fmtutil_handle_id3(c
, c
->infile
, &id3i
, 0);
629 pos
= id3i
.main_start
;
630 ogg_end
= id3i
.main_end
;
635 i64 bytes_consumed
= 0;
637 if(pos
>= ogg_end
) break;
638 sig
= (u32
)de_getu32be(pos
);
639 if(sig
!=0x04f676753U
) {
640 de_err(c
, "Ogg page signature not found at %"I64_FMT
, pos
);
643 de_dbg(c
, "page at %"I64_FMT
, pos
);
645 ret
= do_ogg_page(c
, d
, pos
, &bytes_consumed
);
646 de_dbg_indent(c
, -1);
647 if(!ret
|| bytes_consumed
<=4) break;
648 pos
+= bytes_consumed
;
649 d
->total_page_count
++;
652 if(!d
->format_declared
) declare_ogg_format(c
, d
);
654 de_dbg(c
, "number of bitstreams: %d", (int)d
->bitstream_count
);
657 static void de_run_ogg(deark
*c
, de_module_params
*mparams
)
661 d
= de_malloc(c
, sizeof(lctx
));
663 if(de_havemodcode(c
, mparams
, 'C')) {
664 do_vorbis_comment_block(c
, d
, c
->infile
, 0);
668 run_ogg_internal(c
, d
);
671 if(d
&& d
->streamtable
) {
672 destroy_streamtable(c
, d
);
677 static int de_identify_ogg(deark
*c
)
681 if(!c
->detection_data
->id3
.detection_attempted
) {
682 de_err(c
, "ogg detection requires id3 module");
686 if(c
->detection_data
->id3
.has_id3v2
) {
687 pos
= (i64
)c
->detection_data
->id3
.bytes_at_start
;
690 if(!dbuf_memcmp(c
->infile
, pos
, "OggS", 4))
696 static void de_help_ogg(deark
*c
)
698 de_msg(c
, "-opt ogg:hexdump : Hex dump the first part of all segments");
701 void de_module_ogg(deark
*c
, struct deark_module_info
*mi
)
704 mi
->desc
= "Ogg multimedia";
705 mi
->run_fn
= de_run_ogg
;
706 mi
->identify_fn
= de_identify_ogg
;
707 mi
->help_fn
= de_help_ogg
;