1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/formats/webm/webm_cluster_parser.h"
9 #include "base/logging.h"
10 #include "base/sys_byteorder.h"
11 #include "media/base/buffers.h"
12 #include "media/base/decrypt_config.h"
13 #include "media/filters/webvtt_util.h"
14 #include "media/formats/webm/webm_constants.h"
15 #include "media/formats/webm/webm_crypto_helpers.h"
16 #include "media/formats/webm/webm_webvtt_parser.h"
20 WebMClusterParser::WebMClusterParser(
23 base::TimeDelta audio_default_duration
,
25 base::TimeDelta video_default_duration
,
26 const WebMTracksParser::TextTracks
& text_tracks
,
27 const std::set
<int64
>& ignored_tracks
,
28 const std::string
& audio_encryption_key_id
,
29 const std::string
& video_encryption_key_id
,
31 : timecode_multiplier_(timecode_scale
/ 1000.0),
32 ignored_tracks_(ignored_tracks
),
33 audio_encryption_key_id_(audio_encryption_key_id
),
34 video_encryption_key_id_(video_encryption_key_id
),
35 parser_(kWebMIdCluster
, this),
36 last_block_timecode_(-1),
40 block_additional_data_size_(-1),
42 cluster_timecode_(-1),
43 cluster_start_time_(kNoTimestamp()),
44 cluster_ended_(false),
45 audio_(audio_track_num
, false, audio_default_duration
, log_cb
),
46 video_(video_track_num
, true, video_default_duration
, log_cb
),
47 ready_buffer_upper_bound_(kNoDecodeTimestamp()),
49 for (WebMTracksParser::TextTracks::const_iterator it
= text_tracks
.begin();
50 it
!= text_tracks
.end();
52 text_track_map_
.insert(std::make_pair(
53 it
->first
, Track(it
->first
, false, kNoTimestamp(), log_cb_
)));
57 WebMClusterParser::~WebMClusterParser() {}
59 void WebMClusterParser::Reset() {
60 last_block_timecode_
= -1;
61 cluster_timecode_
= -1;
62 cluster_start_time_
= kNoTimestamp();
63 cluster_ended_
= false;
68 ready_buffer_upper_bound_
= kNoDecodeTimestamp();
71 int WebMClusterParser::Parse(const uint8
* buf
, int size
) {
72 audio_
.ClearReadyBuffers();
73 video_
.ClearReadyBuffers();
74 ClearTextTrackReadyBuffers();
75 ready_buffer_upper_bound_
= kNoDecodeTimestamp();
77 int result
= parser_
.Parse(buf
, size
);
80 cluster_ended_
= false;
84 cluster_ended_
= parser_
.IsParsingComplete();
86 // If there were no buffers in this cluster, set the cluster start time to
87 // be the |cluster_timecode_|.
88 if (cluster_start_time_
== kNoTimestamp()) {
89 // If the cluster did not even have a |cluster_timecode_|, signal parse
91 if (cluster_timecode_
< 0)
94 cluster_start_time_
= base::TimeDelta::FromMicroseconds(
95 cluster_timecode_
* timecode_multiplier_
);
98 // Reset the parser if we're done parsing so that
99 // it is ready to accept another cluster on the next
103 last_block_timecode_
= -1;
104 cluster_timecode_
= -1;
110 const WebMClusterParser::BufferQueue
& WebMClusterParser::GetAudioBuffers() {
111 if (ready_buffer_upper_bound_
== kNoDecodeTimestamp())
112 UpdateReadyBuffers();
114 return audio_
.ready_buffers();
117 const WebMClusterParser::BufferQueue
& WebMClusterParser::GetVideoBuffers() {
118 if (ready_buffer_upper_bound_
== kNoDecodeTimestamp())
119 UpdateReadyBuffers();
121 return video_
.ready_buffers();
124 const WebMClusterParser::TextBufferQueueMap
&
125 WebMClusterParser::GetTextBuffers() {
126 if (ready_buffer_upper_bound_
== kNoDecodeTimestamp())
127 UpdateReadyBuffers();
129 // Translate our |text_track_map_| into |text_buffers_map_|, inserting rows in
130 // the output only for non-empty ready_buffer() queues in |text_track_map_|.
131 text_buffers_map_
.clear();
132 for (TextTrackMap::const_iterator itr
= text_track_map_
.begin();
133 itr
!= text_track_map_
.end();
135 const BufferQueue
& text_buffers
= itr
->second
.ready_buffers();
136 if (!text_buffers
.empty())
137 text_buffers_map_
.insert(std::make_pair(itr
->first
, text_buffers
));
140 return text_buffers_map_
;
143 WebMParserClient
* WebMClusterParser::OnListStart(int id
) {
144 if (id
== kWebMIdCluster
) {
145 cluster_timecode_
= -1;
146 cluster_start_time_
= kNoTimestamp();
147 } else if (id
== kWebMIdBlockGroup
) {
149 block_data_size_
= -1;
150 block_duration_
= -1;
151 discard_padding_
= -1;
152 discard_padding_set_
= false;
153 } else if (id
== kWebMIdBlockAdditions
) {
155 block_additional_data_
.reset();
156 block_additional_data_size_
= -1;
162 bool WebMClusterParser::OnListEnd(int id
) {
163 if (id
!= kWebMIdBlockGroup
)
166 // Make sure the BlockGroup actually had a Block.
167 if (block_data_size_
== -1) {
168 MEDIA_LOG(log_cb_
) << "Block missing from BlockGroup.";
172 bool result
= ParseBlock(false, block_data_
.get(), block_data_size_
,
173 block_additional_data_
.get(),
174 block_additional_data_size_
, block_duration_
,
175 discard_padding_set_
? discard_padding_
: 0);
177 block_data_size_
= -1;
178 block_duration_
= -1;
180 block_additional_data_
.reset();
181 block_additional_data_size_
= -1;
182 discard_padding_
= -1;
183 discard_padding_set_
= false;
187 bool WebMClusterParser::OnUInt(int id
, int64 val
) {
190 case kWebMIdTimecode
:
191 dst
= &cluster_timecode_
;
193 case kWebMIdBlockDuration
:
194 dst
= &block_duration_
;
196 case kWebMIdBlockAddID
:
197 dst
= &block_add_id_
;
208 bool WebMClusterParser::ParseBlock(bool is_simple_block
, const uint8
* buf
,
209 int size
, const uint8
* additional
,
210 int additional_size
, int duration
,
211 int64 discard_padding
) {
215 // Return an error if the trackNum > 127. We just aren't
216 // going to support large track numbers right now.
217 if (!(buf
[0] & 0x80)) {
218 MEDIA_LOG(log_cb_
) << "TrackNumber over 127 not supported";
222 int track_num
= buf
[0] & 0x7f;
223 int timecode
= buf
[1] << 8 | buf
[2];
224 int flags
= buf
[3] & 0xff;
225 int lacing
= (flags
>> 1) & 0x3;
228 MEDIA_LOG(log_cb_
) << "Lacing " << lacing
<< " is not supported yet.";
232 // Sign extend negative timecode offsets.
233 if (timecode
& 0x8000)
236 const uint8
* frame_data
= buf
+ 4;
237 int frame_size
= size
- (frame_data
- buf
);
238 return OnBlock(is_simple_block
, track_num
, timecode
, duration
, flags
,
239 frame_data
, frame_size
, additional
, additional_size
,
243 bool WebMClusterParser::OnBinary(int id
, const uint8
* data
, int size
) {
245 case kWebMIdSimpleBlock
:
246 return ParseBlock(true, data
, size
, NULL
, -1, -1, 0);
250 MEDIA_LOG(log_cb_
) << "More than 1 Block in a BlockGroup is not "
254 block_data_
.reset(new uint8
[size
]);
255 memcpy(block_data_
.get(), data
, size
);
256 block_data_size_
= size
;
259 case kWebMIdBlockAdditional
: {
260 uint64 block_add_id
= base::HostToNet64(block_add_id_
);
261 if (block_additional_data_
) {
262 // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
263 // as per matroska spec. But for now we don't have a use case to
264 // support parsing of such files. Take a look at this again when such a
266 MEDIA_LOG(log_cb_
) << "More than 1 BlockAdditional in a BlockGroup is "
270 // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
271 // element's value in Big Endian format. This is done to mimic ffmpeg
272 // demuxer's behavior.
273 block_additional_data_size_
= size
+ sizeof(block_add_id
);
274 block_additional_data_
.reset(new uint8
[block_additional_data_size_
]);
275 memcpy(block_additional_data_
.get(), &block_add_id
,
276 sizeof(block_add_id
));
277 memcpy(block_additional_data_
.get() + 8, data
, size
);
280 case kWebMIdDiscardPadding
: {
281 if (discard_padding_set_
|| size
<= 0 || size
> 8)
283 discard_padding_set_
= true;
285 // Read in the big-endian integer.
286 discard_padding_
= static_cast<int8
>(data
[0]);
287 for (int i
= 1; i
< size
; ++i
)
288 discard_padding_
= (discard_padding_
<< 8) | data
[i
];
297 bool WebMClusterParser::OnBlock(bool is_simple_block
, int track_num
,
301 const uint8
* data
, int size
,
302 const uint8
* additional
, int additional_size
,
303 int64 discard_padding
) {
305 if (cluster_timecode_
== -1) {
306 MEDIA_LOG(log_cb_
) << "Got a block before cluster timecode.";
310 // TODO(acolwell): Should relative negative timecode offsets be rejected? Or
311 // only when the absolute timecode is negative? See http://crbug.com/271794
313 MEDIA_LOG(log_cb_
) << "Got a block with negative timecode offset "
318 if (last_block_timecode_
!= -1 && timecode
< last_block_timecode_
) {
320 << "Got a block with a timecode before the previous block.";
325 StreamParserBuffer::Type buffer_type
= DemuxerStream::AUDIO
;
326 std::string encryption_key_id
;
327 if (track_num
== audio_
.track_num()) {
329 encryption_key_id
= audio_encryption_key_id_
;
330 } else if (track_num
== video_
.track_num()) {
332 encryption_key_id
= video_encryption_key_id_
;
333 buffer_type
= DemuxerStream::VIDEO
;
334 } else if (ignored_tracks_
.find(track_num
) != ignored_tracks_
.end()) {
336 } else if (Track
* const text_track
= FindTextTrack(track_num
)) {
337 if (is_simple_block
) // BlockGroup is required for WebVTT cues
339 if (block_duration
< 0) // not specified
342 buffer_type
= DemuxerStream::TEXT
;
344 MEDIA_LOG(log_cb_
) << "Unexpected track number " << track_num
;
348 last_block_timecode_
= timecode
;
350 base::TimeDelta timestamp
= base::TimeDelta::FromMicroseconds(
351 (cluster_timecode_
+ timecode
) * timecode_multiplier_
);
353 scoped_refptr
<StreamParserBuffer
> buffer
;
354 if (buffer_type
!= DemuxerStream::TEXT
) {
355 // The first bit of the flags is set when a SimpleBlock contains only
356 // keyframes. If this is a Block, then inspection of the payload is
357 // necessary to determine whether it contains a keyframe or not.
358 // http://www.matroska.org/technical/specs/index.html
360 is_simple_block
? (flags
& 0x80) != 0 : track
->IsKeyframe(data
, size
);
362 // Every encrypted Block has a signal byte and IV prepended to it. Current
363 // encrypted WebM request for comments specification is here
364 // http://wiki.webmproject.org/encryption/webm-encryption-rfc
365 scoped_ptr
<DecryptConfig
> decrypt_config
;
367 if (!encryption_key_id
.empty() &&
368 !WebMCreateDecryptConfig(
370 reinterpret_cast<const uint8
*>(encryption_key_id
.data()),
371 encryption_key_id
.size(),
372 &decrypt_config
, &data_offset
)) {
376 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
377 // type with remapped bytestream track numbers and allow multiple tracks as
378 // applicable. See https://crbug.com/341581.
379 buffer
= StreamParserBuffer::CopyFrom(
380 data
+ data_offset
, size
- data_offset
,
381 additional
, additional_size
,
382 is_keyframe
, buffer_type
, track_num
);
385 buffer
->set_decrypt_config(decrypt_config
.Pass());
387 std::string id
, settings
, content
;
388 WebMWebVTTParser::Parse(data
, size
, &id
, &settings
, &content
);
390 std::vector
<uint8
> side_data
;
391 MakeSideData(id
.begin(), id
.end(),
392 settings
.begin(), settings
.end(),
395 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
396 // type with remapped bytestream track numbers and allow multiple tracks as
397 // applicable. See https://crbug.com/341581.
398 buffer
= StreamParserBuffer::CopyFrom(
399 reinterpret_cast<const uint8
*>(content
.data()),
403 true, buffer_type
, track_num
);
406 buffer
->set_timestamp(timestamp
);
407 if (cluster_start_time_
== kNoTimestamp())
408 cluster_start_time_
= timestamp
;
410 if (block_duration
>= 0) {
411 buffer
->set_duration(base::TimeDelta::FromMicroseconds(
412 block_duration
* timecode_multiplier_
));
414 DCHECK_NE(buffer_type
, DemuxerStream::TEXT
);
415 buffer
->set_duration(track
->default_duration());
418 if (discard_padding
!= 0) {
419 buffer
->set_discard_padding(std::make_pair(
421 base::TimeDelta::FromMicroseconds(discard_padding
/ 1000)));
424 return track
->AddBuffer(buffer
);
427 WebMClusterParser::Track::Track(int track_num
,
429 base::TimeDelta default_duration
,
431 : track_num_(track_num
),
433 default_duration_(default_duration
),
434 estimated_next_frame_duration_(kNoTimestamp()),
436 DCHECK(default_duration_
== kNoTimestamp() ||
437 default_duration_
> base::TimeDelta());
440 WebMClusterParser::Track::~Track() {}
442 DecodeTimestamp
WebMClusterParser::Track::GetReadyUpperBound() {
443 DCHECK(ready_buffers_
.empty());
444 if (last_added_buffer_missing_duration_
.get())
445 return last_added_buffer_missing_duration_
->GetDecodeTimestamp();
447 return DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
450 void WebMClusterParser::Track::ExtractReadyBuffers(
451 const DecodeTimestamp before_timestamp
) {
452 DCHECK(ready_buffers_
.empty());
453 DCHECK(DecodeTimestamp() <= before_timestamp
);
454 DCHECK(kNoDecodeTimestamp() != before_timestamp
);
456 if (buffers_
.empty())
459 if (buffers_
.back()->GetDecodeTimestamp() < before_timestamp
) {
460 // All of |buffers_| are ready.
461 ready_buffers_
.swap(buffers_
);
462 DVLOG(3) << __FUNCTION__
<< " : " << track_num_
<< " All "
463 << ready_buffers_
.size() << " are ready: before upper bound ts "
464 << before_timestamp
.InSecondsF();
468 // Not all of |buffers_| are ready yet. Move any that are ready to
471 const scoped_refptr
<StreamParserBuffer
>& buffer
= buffers_
.front();
472 if (buffer
->GetDecodeTimestamp() >= before_timestamp
)
474 ready_buffers_
.push_back(buffer
);
475 buffers_
.pop_front();
476 DCHECK(!buffers_
.empty());
479 DVLOG(3) << __FUNCTION__
<< " : " << track_num_
<< " Only "
480 << ready_buffers_
.size() << " ready, " << buffers_
.size()
481 << " at or after upper bound ts " << before_timestamp
.InSecondsF();
484 bool WebMClusterParser::Track::AddBuffer(
485 const scoped_refptr
<StreamParserBuffer
>& buffer
) {
486 DVLOG(2) << "AddBuffer() : " << track_num_
487 << " ts " << buffer
->timestamp().InSecondsF()
488 << " dur " << buffer
->duration().InSecondsF()
489 << " kf " << buffer
->is_key_frame()
490 << " size " << buffer
->data_size();
492 if (last_added_buffer_missing_duration_
.get()) {
493 base::TimeDelta derived_duration
=
494 buffer
->timestamp() - last_added_buffer_missing_duration_
->timestamp();
495 last_added_buffer_missing_duration_
->set_duration(derived_duration
);
497 DVLOG(2) << "AddBuffer() : applied derived duration to held-back buffer : "
499 << last_added_buffer_missing_duration_
->timestamp().InSecondsF()
501 << last_added_buffer_missing_duration_
->duration().InSecondsF()
502 << " kf " << last_added_buffer_missing_duration_
->is_key_frame()
503 << " size " << last_added_buffer_missing_duration_
->data_size();
504 scoped_refptr
<StreamParserBuffer
> updated_buffer
=
505 last_added_buffer_missing_duration_
;
506 last_added_buffer_missing_duration_
= NULL
;
507 if (!QueueBuffer(updated_buffer
))
511 if (buffer
->duration() == kNoTimestamp()) {
512 last_added_buffer_missing_duration_
= buffer
;
513 DVLOG(2) << "AddBuffer() : holding back buffer that is missing duration";
517 return QueueBuffer(buffer
);
520 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
521 if (!last_added_buffer_missing_duration_
.get())
524 last_added_buffer_missing_duration_
->set_duration(GetDurationEstimate());
526 DVLOG(2) << "ApplyDurationEstimateIfNeeded() : new dur : "
528 << last_added_buffer_missing_duration_
->timestamp().InSecondsF()
530 << last_added_buffer_missing_duration_
->duration().InSecondsF()
531 << " kf " << last_added_buffer_missing_duration_
->is_key_frame()
532 << " size " << last_added_buffer_missing_duration_
->data_size();
534 // Don't use the applied duration as a future estimation (don't use
535 // QueueBuffer() here.)
536 buffers_
.push_back(last_added_buffer_missing_duration_
);
537 last_added_buffer_missing_duration_
= NULL
;
540 void WebMClusterParser::Track::ClearReadyBuffers() {
541 // Note that |buffers_| are kept and |estimated_next_frame_duration_| is not
543 ready_buffers_
.clear();
546 void WebMClusterParser::Track::Reset() {
549 last_added_buffer_missing_duration_
= NULL
;
552 bool WebMClusterParser::Track::IsKeyframe(const uint8
* data
, int size
) const {
553 // For now, assume that all blocks are keyframes for datatypes other than
554 // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
558 // Make sure the block is big enough for the minimal keyframe header size.
562 // The LSb of the first byte must be a 0 for a keyframe.
563 // http://tools.ietf.org/html/rfc6386 Section 19.1
564 if ((data
[0] & 0x01) != 0)
567 // Verify VP8 keyframe startcode.
568 // http://tools.ietf.org/html/rfc6386 Section 19.1
569 if (data
[3] != 0x9d || data
[4] != 0x01 || data
[5] != 0x2a)
575 bool WebMClusterParser::Track::QueueBuffer(
576 const scoped_refptr
<StreamParserBuffer
>& buffer
) {
577 DCHECK(!last_added_buffer_missing_duration_
.get());
579 // WebMClusterParser::OnBlock() gives MEDIA_LOG and parse error on decreasing
580 // block timecode detection within a cluster. Therefore, we should not see
582 DecodeTimestamp previous_buffers_timestamp
= buffers_
.empty() ?
583 DecodeTimestamp() : buffers_
.back()->GetDecodeTimestamp();
584 CHECK(previous_buffers_timestamp
<= buffer
->GetDecodeTimestamp());
586 base::TimeDelta duration
= buffer
->duration();
587 if (duration
< base::TimeDelta() || duration
== kNoTimestamp()) {
588 MEDIA_LOG(log_cb_
) << "Invalid buffer duration: " << duration
.InSecondsF();
592 // The estimated frame duration is the minimum non-zero duration since the
593 // last initialization segment. The minimum is used to ensure frame durations
594 // aren't overestimated.
595 if (duration
> base::TimeDelta()) {
596 if (estimated_next_frame_duration_
== kNoTimestamp()) {
597 estimated_next_frame_duration_
= duration
;
599 estimated_next_frame_duration_
=
600 std::min(duration
, estimated_next_frame_duration_
);
604 buffers_
.push_back(buffer
);
608 base::TimeDelta
WebMClusterParser::Track::GetDurationEstimate() {
609 base::TimeDelta duration
= estimated_next_frame_duration_
;
610 if (duration
!= kNoTimestamp()) {
611 DVLOG(3) << __FUNCTION__
<< " : using estimated duration";
613 DVLOG(3) << __FUNCTION__
<< " : using hardcoded default duration";
615 duration
= base::TimeDelta::FromMilliseconds(
616 kDefaultVideoBufferDurationInMs
);
618 duration
= base::TimeDelta::FromMilliseconds(
619 kDefaultAudioBufferDurationInMs
);
623 DCHECK(duration
> base::TimeDelta());
624 DCHECK(duration
!= kNoTimestamp());
628 void WebMClusterParser::ClearTextTrackReadyBuffers() {
629 text_buffers_map_
.clear();
630 for (TextTrackMap::iterator it
= text_track_map_
.begin();
631 it
!= text_track_map_
.end();
633 it
->second
.ClearReadyBuffers();
637 void WebMClusterParser::ResetTextTracks() {
638 ClearTextTrackReadyBuffers();
639 for (TextTrackMap::iterator it
= text_track_map_
.begin();
640 it
!= text_track_map_
.end();
646 void WebMClusterParser::UpdateReadyBuffers() {
647 DCHECK(ready_buffer_upper_bound_
== kNoDecodeTimestamp());
648 DCHECK(text_buffers_map_
.empty());
650 if (cluster_ended_
) {
651 audio_
.ApplyDurationEstimateIfNeeded();
652 video_
.ApplyDurationEstimateIfNeeded();
653 // Per OnBlock(), all text buffers should already have valid durations, so
654 // there is no need to call ApplyDurationEstimateIfNeeded() on text tracks
656 ready_buffer_upper_bound_
=
657 DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
658 DCHECK(ready_buffer_upper_bound_
== audio_
.GetReadyUpperBound());
659 DCHECK(ready_buffer_upper_bound_
== video_
.GetReadyUpperBound());
661 ready_buffer_upper_bound_
= std::min(audio_
.GetReadyUpperBound(),
662 video_
.GetReadyUpperBound());
663 DCHECK(DecodeTimestamp() <= ready_buffer_upper_bound_
);
664 DCHECK(kNoDecodeTimestamp() != ready_buffer_upper_bound_
);
667 // Prepare each track's ready buffers for retrieval.
668 audio_
.ExtractReadyBuffers(ready_buffer_upper_bound_
);
669 video_
.ExtractReadyBuffers(ready_buffer_upper_bound_
);
670 for (TextTrackMap::iterator itr
= text_track_map_
.begin();
671 itr
!= text_track_map_
.end();
673 itr
->second
.ExtractReadyBuffers(ready_buffer_upper_bound_
);
677 WebMClusterParser::Track
*
678 WebMClusterParser::FindTextTrack(int track_num
) {
679 const TextTrackMap::iterator it
= text_track_map_
.find(track_num
);
681 if (it
== text_track_map_
.end())