Pin Chrome's shortcut to the Win10 Start menu on install and OS upgrade.
[chromium-blink-merge.git] / media / formats / webm / webm_cluster_parser.cc
blobd530f0ae7e4a9f2b1ddc56aa21a91fa5a2202d63
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/formats/webm/webm_cluster_parser.h"
7 #include <vector>
9 #include "base/logging.h"
10 #include "base/sys_byteorder.h"
11 #include "media/base/buffers.h"
12 #include "media/base/decrypt_config.h"
13 #include "media/filters/webvtt_util.h"
14 #include "media/formats/webm/webm_constants.h"
15 #include "media/formats/webm/webm_crypto_helpers.h"
16 #include "media/formats/webm/webm_webvtt_parser.h"
18 namespace media {
20 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
21 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
22 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
23 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
25 enum {
26 // Limits the number of MEDIA_LOG() calls in the path of reading encoded
27 // duration to avoid spamming for corrupted data.
28 kMaxDurationErrorLogs = 10,
29 // Limits the number of MEDIA_LOG() calls warning the user that buffer
30 // durations have been estimated.
31 kMaxDurationEstimateLogs = 10,
34 WebMClusterParser::WebMClusterParser(
35 int64 timecode_scale,
36 int audio_track_num,
37 base::TimeDelta audio_default_duration,
38 int video_track_num,
39 base::TimeDelta video_default_duration,
40 const WebMTracksParser::TextTracks& text_tracks,
41 const std::set<int64>& ignored_tracks,
42 const std::string& audio_encryption_key_id,
43 const std::string& video_encryption_key_id,
44 const AudioCodec audio_codec,
45 const scoped_refptr<MediaLog>& media_log)
46 : num_duration_errors_(0),
47 timecode_multiplier_(timecode_scale / 1000.0),
48 ignored_tracks_(ignored_tracks),
49 audio_encryption_key_id_(audio_encryption_key_id),
50 video_encryption_key_id_(video_encryption_key_id),
51 audio_codec_(audio_codec),
52 parser_(kWebMIdCluster, this),
53 last_block_timecode_(-1),
54 block_data_size_(-1),
55 block_duration_(-1),
56 block_add_id_(-1),
57 block_additional_data_size_(0),
58 discard_padding_(-1),
59 cluster_timecode_(-1),
60 cluster_start_time_(kNoTimestamp()),
61 cluster_ended_(false),
62 audio_(audio_track_num, false, audio_default_duration, media_log),
63 video_(video_track_num, true, video_default_duration, media_log),
64 ready_buffer_upper_bound_(kNoDecodeTimestamp()),
65 media_log_(media_log) {
66 for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
67 it != text_tracks.end();
68 ++it) {
69 text_track_map_.insert(std::make_pair(
70 it->first, Track(it->first, false, kNoTimestamp(), media_log_)));
74 WebMClusterParser::~WebMClusterParser() {}
76 void WebMClusterParser::Reset() {
77 last_block_timecode_ = -1;
78 cluster_timecode_ = -1;
79 cluster_start_time_ = kNoTimestamp();
80 cluster_ended_ = false;
81 parser_.Reset();
82 audio_.Reset();
83 video_.Reset();
84 ResetTextTracks();
85 ready_buffer_upper_bound_ = kNoDecodeTimestamp();
88 int WebMClusterParser::Parse(const uint8_t* buf, int size) {
89 audio_.ClearReadyBuffers();
90 video_.ClearReadyBuffers();
91 ClearTextTrackReadyBuffers();
92 ready_buffer_upper_bound_ = kNoDecodeTimestamp();
94 int result = parser_.Parse(buf, size);
96 if (result < 0) {
97 cluster_ended_ = false;
98 return result;
101 cluster_ended_ = parser_.IsParsingComplete();
102 if (cluster_ended_) {
103 // If there were no buffers in this cluster, set the cluster start time to
104 // be the |cluster_timecode_|.
105 if (cluster_start_time_ == kNoTimestamp()) {
106 // If the cluster did not even have a |cluster_timecode_|, signal parse
107 // error.
108 if (cluster_timecode_ < 0)
109 return -1;
111 cluster_start_time_ = base::TimeDelta::FromMicroseconds(
112 cluster_timecode_ * timecode_multiplier_);
115 // Reset the parser if we're done parsing so that
116 // it is ready to accept another cluster on the next
117 // call.
118 parser_.Reset();
120 last_block_timecode_ = -1;
121 cluster_timecode_ = -1;
124 return result;
127 const WebMClusterParser::BufferQueue& WebMClusterParser::GetAudioBuffers() {
128 if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
129 UpdateReadyBuffers();
131 return audio_.ready_buffers();
134 const WebMClusterParser::BufferQueue& WebMClusterParser::GetVideoBuffers() {
135 if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
136 UpdateReadyBuffers();
138 return video_.ready_buffers();
141 const WebMClusterParser::TextBufferQueueMap&
142 WebMClusterParser::GetTextBuffers() {
143 if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
144 UpdateReadyBuffers();
146 // Translate our |text_track_map_| into |text_buffers_map_|, inserting rows in
147 // the output only for non-empty ready_buffer() queues in |text_track_map_|.
148 text_buffers_map_.clear();
149 for (TextTrackMap::const_iterator itr = text_track_map_.begin();
150 itr != text_track_map_.end();
151 ++itr) {
152 const BufferQueue& text_buffers = itr->second.ready_buffers();
153 if (!text_buffers.empty())
154 text_buffers_map_.insert(std::make_pair(itr->first, text_buffers));
157 return text_buffers_map_;
160 base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(
161 const uint8_t* data,
162 int size) {
164 // Duration is currently read assuming the *entire* stream is unencrypted.
165 // The special "Signal Byte" prepended to Blocks in encrypted streams is
166 // assumed to not be present.
167 // TODO(chcunningham): Consider parsing "Signal Byte" for encrypted streams
168 // to return duration for any unencrypted blocks.
170 if (audio_codec_ == kCodecOpus) {
171 return ReadOpusDuration(data, size);
174 // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See
175 // motivations in http://crbug.com/396634.
177 return kNoTimestamp();
180 base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
181 int size) {
182 // Masks and constants for Opus packets. See
183 // https://tools.ietf.org/html/rfc6716#page-14
184 static const uint8_t kTocConfigMask = 0xf8;
185 static const uint8_t kTocFrameCountCodeMask = 0x03;
186 static const uint8_t kFrameCountMask = 0x3f;
187 static const base::TimeDelta kPacketDurationMax =
188 base::TimeDelta::FromMilliseconds(120);
190 if (size < 1) {
191 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
192 kMaxDurationErrorLogs)
193 << "Invalid zero-byte Opus packet; demuxed block duration may be "
194 "imprecise.";
195 return kNoTimestamp();
198 // Frame count type described by last 2 bits of Opus TOC byte.
199 int frame_count_type = data[0] & kTocFrameCountCodeMask;
201 int frame_count = 0;
202 switch (frame_count_type) {
203 case 0:
204 frame_count = 1;
205 break;
206 case 1:
207 case 2:
208 frame_count = 2;
209 break;
210 case 3:
211 // Type 3 indicates an arbitrary frame count described in the next byte.
212 if (size < 2) {
213 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
214 kMaxDurationErrorLogs)
215 << "Second byte missing from 'Code 3' Opus packet; demuxed block "
216 "duration may be imprecise.";
217 return kNoTimestamp();
220 frame_count = data[1] & kFrameCountMask;
222 if (frame_count == 0) {
223 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
224 kMaxDurationErrorLogs)
225 << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
226 "block duration may be imprecise.";
227 return kNoTimestamp();
230 break;
231 default:
232 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
233 kMaxDurationErrorLogs)
234 << "Unexpected Opus frame count type: " << frame_count_type << "; "
235 << "demuxed block duration may be imprecise.";
236 return kNoTimestamp();
239 int opusConfig = (data[0] & kTocConfigMask) >> 3;
240 CHECK_GE(opusConfig, 0);
241 CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
243 DCHECK_GT(frame_count, 0);
244 base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
245 kOpusFrameDurationsMu[opusConfig] * frame_count);
247 if (duration > kPacketDurationMax) {
248 // Intentionally allowing packet to pass through for now. Decoder should
249 // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case
250 // things go sideways.
251 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
252 kMaxDurationErrorLogs)
253 << "Warning, demuxed Opus packet with encoded duration: " << duration
254 << ". Should be no greater than " << kPacketDurationMax;
257 return duration;
260 WebMParserClient* WebMClusterParser::OnListStart(int id) {
261 if (id == kWebMIdCluster) {
262 cluster_timecode_ = -1;
263 cluster_start_time_ = kNoTimestamp();
264 } else if (id == kWebMIdBlockGroup) {
265 block_data_.reset();
266 block_data_size_ = -1;
267 block_duration_ = -1;
268 discard_padding_ = -1;
269 discard_padding_set_ = false;
270 } else if (id == kWebMIdBlockAdditions) {
271 block_add_id_ = -1;
272 block_additional_data_.reset();
273 block_additional_data_size_ = 0;
276 return this;
279 bool WebMClusterParser::OnListEnd(int id) {
280 if (id != kWebMIdBlockGroup)
281 return true;
283 // Make sure the BlockGroup actually had a Block.
284 if (block_data_size_ == -1) {
285 MEDIA_LOG(ERROR, media_log_) << "Block missing from BlockGroup.";
286 return false;
289 bool result = ParseBlock(false, block_data_.get(), block_data_size_,
290 block_additional_data_.get(),
291 block_additional_data_size_, block_duration_,
292 discard_padding_set_ ? discard_padding_ : 0);
293 block_data_.reset();
294 block_data_size_ = -1;
295 block_duration_ = -1;
296 block_add_id_ = -1;
297 block_additional_data_.reset();
298 block_additional_data_size_ = 0;
299 discard_padding_ = -1;
300 discard_padding_set_ = false;
301 return result;
304 bool WebMClusterParser::OnUInt(int id, int64 val) {
305 int64* dst;
306 switch (id) {
307 case kWebMIdTimecode:
308 dst = &cluster_timecode_;
309 break;
310 case kWebMIdBlockDuration:
311 dst = &block_duration_;
312 break;
313 case kWebMIdBlockAddID:
314 dst = &block_add_id_;
315 break;
316 default:
317 return true;
319 if (*dst != -1)
320 return false;
321 *dst = val;
322 return true;
325 bool WebMClusterParser::ParseBlock(bool is_simple_block,
326 const uint8_t* buf,
327 int size,
328 const uint8_t* additional,
329 int additional_size,
330 int duration,
331 int64 discard_padding) {
332 if (size < 4)
333 return false;
335 // Return an error if the trackNum > 127. We just aren't
336 // going to support large track numbers right now.
337 if (!(buf[0] & 0x80)) {
338 MEDIA_LOG(ERROR, media_log_) << "TrackNumber over 127 not supported";
339 return false;
342 int track_num = buf[0] & 0x7f;
343 int timecode = buf[1] << 8 | buf[2];
344 int flags = buf[3] & 0xff;
345 int lacing = (flags >> 1) & 0x3;
347 if (lacing) {
348 MEDIA_LOG(ERROR, media_log_) << "Lacing " << lacing
349 << " is not supported yet.";
350 return false;
353 // Sign extend negative timecode offsets.
354 if (timecode & 0x8000)
355 timecode |= ~0xffff;
357 const uint8_t* frame_data = buf + 4;
358 int frame_size = size - (frame_data - buf);
359 return OnBlock(is_simple_block, track_num, timecode, duration, flags,
360 frame_data, frame_size, additional, additional_size,
361 discard_padding);
364 bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) {
365 switch (id) {
366 case kWebMIdSimpleBlock:
367 return ParseBlock(true, data, size, NULL, 0, -1, 0);
369 case kWebMIdBlock:
370 if (block_data_) {
371 MEDIA_LOG(ERROR, media_log_)
372 << "More than 1 Block in a BlockGroup is not "
373 "supported.";
374 return false;
376 block_data_.reset(new uint8_t[size]);
377 memcpy(block_data_.get(), data, size);
378 block_data_size_ = size;
379 return true;
381 case kWebMIdBlockAdditional: {
382 uint64 block_add_id = base::HostToNet64(block_add_id_);
383 if (block_additional_data_) {
384 // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
385 // as per matroska spec. But for now we don't have a use case to
386 // support parsing of such files. Take a look at this again when such a
387 // case arises.
388 MEDIA_LOG(ERROR, media_log_) << "More than 1 BlockAdditional in a "
389 "BlockGroup is not supported.";
390 return false;
392 // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
393 // element's value in Big Endian format. This is done to mimic ffmpeg
394 // demuxer's behavior.
395 block_additional_data_size_ = size + sizeof(block_add_id);
396 block_additional_data_.reset(new uint8_t[block_additional_data_size_]);
397 memcpy(block_additional_data_.get(), &block_add_id,
398 sizeof(block_add_id));
399 memcpy(block_additional_data_.get() + 8, data, size);
400 return true;
402 case kWebMIdDiscardPadding: {
403 if (discard_padding_set_ || size <= 0 || size > 8)
404 return false;
405 discard_padding_set_ = true;
407 // Read in the big-endian integer.
408 discard_padding_ = static_cast<int8>(data[0]);
409 for (int i = 1; i < size; ++i)
410 discard_padding_ = (discard_padding_ << 8) | data[i];
412 return true;
414 default:
415 return true;
419 bool WebMClusterParser::OnBlock(bool is_simple_block,
420 int track_num,
421 int timecode,
422 int block_duration,
423 int flags,
424 const uint8_t* data,
425 int size,
426 const uint8_t* additional,
427 int additional_size,
428 int64 discard_padding) {
429 DCHECK_GE(size, 0);
430 if (cluster_timecode_ == -1) {
431 MEDIA_LOG(ERROR, media_log_) << "Got a block before cluster timecode.";
432 return false;
435 // TODO(acolwell): Should relative negative timecode offsets be rejected? Or
436 // only when the absolute timecode is negative? See http://crbug.com/271794
437 if (timecode < 0) {
438 MEDIA_LOG(ERROR, media_log_) << "Got a block with negative timecode offset "
439 << timecode;
440 return false;
443 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
444 MEDIA_LOG(ERROR, media_log_)
445 << "Got a block with a timecode before the previous block.";
446 return false;
449 Track* track = NULL;
450 StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
451 std::string encryption_key_id;
452 base::TimeDelta encoded_duration = kNoTimestamp();
453 if (track_num == audio_.track_num()) {
454 track = &audio_;
455 encryption_key_id = audio_encryption_key_id_;
456 if (encryption_key_id.empty()) {
457 encoded_duration = TryGetEncodedAudioDuration(data, size);
459 } else if (track_num == video_.track_num()) {
460 track = &video_;
461 encryption_key_id = video_encryption_key_id_;
462 buffer_type = DemuxerStream::VIDEO;
463 } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
464 return true;
465 } else if (Track* const text_track = FindTextTrack(track_num)) {
466 if (is_simple_block) // BlockGroup is required for WebVTT cues
467 return false;
468 if (block_duration < 0) // not specified
469 return false;
470 track = text_track;
471 buffer_type = DemuxerStream::TEXT;
472 } else {
473 MEDIA_LOG(ERROR, media_log_) << "Unexpected track number " << track_num;
474 return false;
477 last_block_timecode_ = timecode;
479 base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
480 (cluster_timecode_ + timecode) * timecode_multiplier_);
482 scoped_refptr<StreamParserBuffer> buffer;
483 if (buffer_type != DemuxerStream::TEXT) {
484 // The first bit of the flags is set when a SimpleBlock contains only
485 // keyframes. If this is a Block, then inspection of the payload is
486 // necessary to determine whether it contains a keyframe or not.
487 // http://www.matroska.org/technical/specs/index.html
488 bool is_keyframe =
489 is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
491 // Every encrypted Block has a signal byte and IV prepended to it. Current
492 // encrypted WebM request for comments specification is here
493 // http://wiki.webmproject.org/encryption/webm-encryption-rfc
494 scoped_ptr<DecryptConfig> decrypt_config;
495 int data_offset = 0;
496 if (!encryption_key_id.empty() &&
497 !WebMCreateDecryptConfig(
498 data, size,
499 reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
500 encryption_key_id.size(),
501 &decrypt_config, &data_offset)) {
502 return false;
505 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
506 // type with remapped bytestream track numbers and allow multiple tracks as
507 // applicable. See https://crbug.com/341581.
508 buffer = StreamParserBuffer::CopyFrom(
509 data + data_offset, size - data_offset,
510 additional, additional_size,
511 is_keyframe, buffer_type, track_num);
513 if (decrypt_config)
514 buffer->set_decrypt_config(decrypt_config.Pass());
515 } else {
516 std::string id, settings, content;
517 WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
519 std::vector<uint8_t> side_data;
520 MakeSideData(id.begin(), id.end(),
521 settings.begin(), settings.end(),
522 &side_data);
524 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
525 // type with remapped bytestream track numbers and allow multiple tracks as
526 // applicable. See https://crbug.com/341581.
527 buffer = StreamParserBuffer::CopyFrom(
528 reinterpret_cast<const uint8_t*>(content.data()),
529 content.length(),
530 &side_data[0],
531 side_data.size(),
532 true, buffer_type, track_num);
535 buffer->set_timestamp(timestamp);
536 if (cluster_start_time_ == kNoTimestamp())
537 cluster_start_time_ = timestamp;
539 base::TimeDelta block_duration_time_delta = kNoTimestamp();
540 if (block_duration >= 0) {
541 block_duration_time_delta = base::TimeDelta::FromMicroseconds(
542 block_duration * timecode_multiplier_);
545 // Prefer encoded duration over BlockGroup->BlockDuration or
546 // TrackEntry->DefaultDuration when available. This layering violation is a
547 // workaround for http://crbug.com/396634, decreasing the likelihood of
548 // fall-back to rough estimation techniques for Blocks that lack a
549 // BlockDuration at the end of a cluster. Cross cluster durations are not
550 // feasible given flexibility of cluster ordering and MSE APIs. Duration
551 // estimation may still apply in cases of encryption and codecs for which
552 // we do not extract encoded duration. Within a cluster, estimates are applied
553 // as Block Timecode deltas, or once the whole cluster is parsed in the case
554 // of the last Block in the cluster. See Track::AddBuffer and
555 // ApplyDurationEstimateIfNeeded().
556 if (encoded_duration != kNoTimestamp()) {
557 DCHECK(encoded_duration != kInfiniteDuration());
558 DCHECK(encoded_duration > base::TimeDelta());
559 buffer->set_duration(encoded_duration);
561 DVLOG(3) << __FUNCTION__ << " : "
562 << "Using encoded duration " << encoded_duration.InSecondsF();
564 if (block_duration_time_delta != kNoTimestamp()) {
565 base::TimeDelta duration_difference =
566 block_duration_time_delta - encoded_duration;
568 const auto kWarnDurationDiff =
569 base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
570 if (duration_difference.magnitude() > kWarnDurationDiff) {
571 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
572 kMaxDurationErrorLogs)
573 << "BlockDuration "
574 << "(" << block_duration_time_delta << ") "
575 << "differs significantly from encoded duration "
576 << "(" << encoded_duration << ").";
579 } else if (block_duration_time_delta != kNoTimestamp()) {
580 buffer->set_duration(block_duration_time_delta);
581 } else {
582 DCHECK_NE(buffer_type, DemuxerStream::TEXT);
583 buffer->set_duration(track->default_duration());
586 if (discard_padding != 0) {
587 buffer->set_discard_padding(std::make_pair(
588 base::TimeDelta(),
589 base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
592 return track->AddBuffer(buffer);
595 WebMClusterParser::Track::Track(int track_num,
596 bool is_video,
597 base::TimeDelta default_duration,
598 const scoped_refptr<MediaLog>& media_log)
599 : num_duration_estimates_(0),
600 track_num_(track_num),
601 is_video_(is_video),
602 default_duration_(default_duration),
603 estimated_next_frame_duration_(kNoTimestamp()),
604 media_log_(media_log) {
605 DCHECK(default_duration_ == kNoTimestamp() ||
606 default_duration_ > base::TimeDelta());
609 WebMClusterParser::Track::~Track() {}
611 DecodeTimestamp WebMClusterParser::Track::GetReadyUpperBound() {
612 DCHECK(ready_buffers_.empty());
613 if (last_added_buffer_missing_duration_.get())
614 return last_added_buffer_missing_duration_->GetDecodeTimestamp();
616 return DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
619 void WebMClusterParser::Track::ExtractReadyBuffers(
620 const DecodeTimestamp before_timestamp) {
621 DCHECK(ready_buffers_.empty());
622 DCHECK(DecodeTimestamp() <= before_timestamp);
623 DCHECK(kNoDecodeTimestamp() != before_timestamp);
625 if (buffers_.empty())
626 return;
628 if (buffers_.back()->GetDecodeTimestamp() < before_timestamp) {
629 // All of |buffers_| are ready.
630 ready_buffers_.swap(buffers_);
631 DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " All "
632 << ready_buffers_.size() << " are ready: before upper bound ts "
633 << before_timestamp.InSecondsF();
634 return;
637 // Not all of |buffers_| are ready yet. Move any that are ready to
638 // |ready_buffers_|.
639 while (true) {
640 const scoped_refptr<StreamParserBuffer>& buffer = buffers_.front();
641 if (buffer->GetDecodeTimestamp() >= before_timestamp)
642 break;
643 ready_buffers_.push_back(buffer);
644 buffers_.pop_front();
645 DCHECK(!buffers_.empty());
648 DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " Only "
649 << ready_buffers_.size() << " ready, " << buffers_.size()
650 << " at or after upper bound ts " << before_timestamp.InSecondsF();
653 bool WebMClusterParser::Track::AddBuffer(
654 const scoped_refptr<StreamParserBuffer>& buffer) {
655 DVLOG(2) << "AddBuffer() : " << track_num_
656 << " ts " << buffer->timestamp().InSecondsF()
657 << " dur " << buffer->duration().InSecondsF()
658 << " kf " << buffer->is_key_frame()
659 << " size " << buffer->data_size();
661 if (last_added_buffer_missing_duration_.get()) {
662 base::TimeDelta derived_duration =
663 buffer->timestamp() - last_added_buffer_missing_duration_->timestamp();
664 last_added_buffer_missing_duration_->set_duration(derived_duration);
666 DVLOG(2) << "AddBuffer() : applied derived duration to held-back buffer : "
667 << " ts "
668 << last_added_buffer_missing_duration_->timestamp().InSecondsF()
669 << " dur "
670 << last_added_buffer_missing_duration_->duration().InSecondsF()
671 << " kf " << last_added_buffer_missing_duration_->is_key_frame()
672 << " size " << last_added_buffer_missing_duration_->data_size();
673 scoped_refptr<StreamParserBuffer> updated_buffer =
674 last_added_buffer_missing_duration_;
675 last_added_buffer_missing_duration_ = NULL;
676 if (!QueueBuffer(updated_buffer))
677 return false;
680 if (buffer->duration() == kNoTimestamp()) {
681 last_added_buffer_missing_duration_ = buffer;
682 DVLOG(2) << "AddBuffer() : holding back buffer that is missing duration";
683 return true;
686 return QueueBuffer(buffer);
689 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
690 if (!last_added_buffer_missing_duration_.get())
691 return;
693 base::TimeDelta estimated_duration = GetDurationEstimate();
694 last_added_buffer_missing_duration_->set_duration(estimated_duration);
696 if (is_video_) {
697 // Exposing estimation so splicing/overlap frame processing can make
698 // informed decisions downstream.
699 // TODO(chcunningham): Set this for audio as well in later change where
700 // audio is switched to max estimation and splicing is disabled.
701 last_added_buffer_missing_duration_->set_is_duration_estimated(true);
704 LIMITED_MEDIA_LOG(INFO, media_log_, num_duration_estimates_,
705 kMaxDurationEstimateLogs)
706 << "Estimating WebM block duration to be " << estimated_duration << " "
707 << "for the last (Simple)Block in the Cluster for this Track. Use "
708 << "BlockGroups with BlockDurations at the end of each Track in a "
709 << "Cluster to avoid estimation.";
711 DVLOG(2) << __FUNCTION__ << " new dur : ts "
712 << last_added_buffer_missing_duration_->timestamp().InSecondsF()
713 << " dur "
714 << last_added_buffer_missing_duration_->duration().InSecondsF()
715 << " kf " << last_added_buffer_missing_duration_->is_key_frame()
716 << " size " << last_added_buffer_missing_duration_->data_size();
718 // Don't use the applied duration as a future estimation (don't use
719 // QueueBuffer() here.)
720 buffers_.push_back(last_added_buffer_missing_duration_);
721 last_added_buffer_missing_duration_ = NULL;
724 void WebMClusterParser::Track::ClearReadyBuffers() {
725 // Note that |buffers_| are kept and |estimated_next_frame_duration_| is not
726 // reset here.
727 ready_buffers_.clear();
730 void WebMClusterParser::Track::Reset() {
731 ClearReadyBuffers();
732 buffers_.clear();
733 last_added_buffer_missing_duration_ = NULL;
736 bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
737 // For now, assume that all blocks are keyframes for datatypes other than
738 // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
739 if (!is_video_)
740 return true;
742 // Make sure the block is big enough for the minimal keyframe header size.
743 if (size < 7)
744 return false;
746 // The LSb of the first byte must be a 0 for a keyframe.
747 // http://tools.ietf.org/html/rfc6386 Section 19.1
748 if ((data[0] & 0x01) != 0)
749 return false;
751 // Verify VP8 keyframe startcode.
752 // http://tools.ietf.org/html/rfc6386 Section 19.1
753 if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
754 return false;
756 return true;
759 bool WebMClusterParser::Track::QueueBuffer(
760 const scoped_refptr<StreamParserBuffer>& buffer) {
761 DCHECK(!last_added_buffer_missing_duration_.get());
763 // WebMClusterParser::OnBlock() gives MEDIA_LOG and parse error on decreasing
764 // block timecode detection within a cluster. Therefore, we should not see
765 // those here.
766 DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ?
767 DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();
768 CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());
770 base::TimeDelta duration = buffer->duration();
771 if (duration < base::TimeDelta() || duration == kNoTimestamp()) {
772 MEDIA_LOG(ERROR, media_log_)
773 << "Invalid buffer duration: " << duration.InSecondsF();
774 return false;
777 // The estimated frame duration is the minimum (for audio) or the maximum
778 // (for video) non-zero duration since the last initialization segment. The
779 // minimum is used for audio to ensure frame durations aren't overestimated,
780 // triggering unnecessary frame splicing. For video, splicing does not apply,
781 // so maximum is used and overlap is simply resolved by showing the
782 // later of the overlapping frames at its given PTS, effectively trimming down
783 // the over-estimated duration of the previous frame.
784 // TODO(chcunningham): Use max for audio and disable splicing whenever
785 // estimated buffers are encountered.
786 if (duration > base::TimeDelta()) {
787 base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_;
788 if (estimated_next_frame_duration_ == kNoTimestamp()) {
789 estimated_next_frame_duration_ = duration;
790 } else if (is_video_) {
791 estimated_next_frame_duration_ =
792 std::max(duration, estimated_next_frame_duration_);
793 } else {
794 estimated_next_frame_duration_ =
795 std::min(duration, estimated_next_frame_duration_);
798 if (orig_duration_estimate != estimated_next_frame_duration_) {
799 DVLOG(3) << "Updated duration estimate:"
800 << orig_duration_estimate
801 << " -> "
802 << estimated_next_frame_duration_
803 << " at timestamp: "
804 << buffer->GetDecodeTimestamp().InSecondsF();
808 buffers_.push_back(buffer);
809 return true;
812 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {
813 base::TimeDelta duration = estimated_next_frame_duration_;
814 if (duration != kNoTimestamp()) {
815 DVLOG(3) << __FUNCTION__ << " : using estimated duration";
816 } else {
817 DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration";
818 if (is_video_) {
819 duration = base::TimeDelta::FromMilliseconds(
820 kDefaultVideoBufferDurationInMs);
821 } else {
822 duration = base::TimeDelta::FromMilliseconds(
823 kDefaultAudioBufferDurationInMs);
827 DCHECK(duration > base::TimeDelta());
828 DCHECK(duration != kNoTimestamp());
829 return duration;
832 void WebMClusterParser::ClearTextTrackReadyBuffers() {
833 text_buffers_map_.clear();
834 for (TextTrackMap::iterator it = text_track_map_.begin();
835 it != text_track_map_.end();
836 ++it) {
837 it->second.ClearReadyBuffers();
841 void WebMClusterParser::ResetTextTracks() {
842 ClearTextTrackReadyBuffers();
843 for (TextTrackMap::iterator it = text_track_map_.begin();
844 it != text_track_map_.end();
845 ++it) {
846 it->second.Reset();
850 void WebMClusterParser::UpdateReadyBuffers() {
851 DCHECK(ready_buffer_upper_bound_ == kNoDecodeTimestamp());
852 DCHECK(text_buffers_map_.empty());
854 if (cluster_ended_) {
855 audio_.ApplyDurationEstimateIfNeeded();
856 video_.ApplyDurationEstimateIfNeeded();
857 // Per OnBlock(), all text buffers should already have valid durations, so
858 // there is no need to call ApplyDurationEstimateIfNeeded() on text tracks
859 // here.
860 ready_buffer_upper_bound_ =
861 DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
862 DCHECK(ready_buffer_upper_bound_ == audio_.GetReadyUpperBound());
863 DCHECK(ready_buffer_upper_bound_ == video_.GetReadyUpperBound());
864 } else {
865 ready_buffer_upper_bound_ = std::min(audio_.GetReadyUpperBound(),
866 video_.GetReadyUpperBound());
867 DCHECK(DecodeTimestamp() <= ready_buffer_upper_bound_);
868 DCHECK(kNoDecodeTimestamp() != ready_buffer_upper_bound_);
871 // Prepare each track's ready buffers for retrieval.
872 audio_.ExtractReadyBuffers(ready_buffer_upper_bound_);
873 video_.ExtractReadyBuffers(ready_buffer_upper_bound_);
874 for (TextTrackMap::iterator itr = text_track_map_.begin();
875 itr != text_track_map_.end();
876 ++itr) {
877 itr->second.ExtractReadyBuffers(ready_buffer_upper_bound_);
881 WebMClusterParser::Track*
882 WebMClusterParser::FindTextTrack(int track_num) {
883 const TextTrackMap::iterator it = text_track_map_.find(track_num);
885 if (it == text_track_map_.end())
886 return NULL;
888 return &it->second;
891 } // namespace media