Update V8 to version 4.7.56.
[chromium-blink-merge.git] / media / formats / webm / webm_cluster_parser.cc
blobee491371d3cd116c148142b30d3797fc1c418504
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/formats/webm/webm_cluster_parser.h"
7 #include <vector>
9 #include "base/logging.h"
10 #include "base/sys_byteorder.h"
11 #include "media/base/decrypt_config.h"
12 #include "media/base/timestamp_constants.h"
13 #include "media/filters/webvtt_util.h"
14 #include "media/formats/webm/webm_constants.h"
15 #include "media/formats/webm/webm_crypto_helpers.h"
16 #include "media/formats/webm/webm_webvtt_parser.h"
18 namespace media {
20 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
21 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
22 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
23 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
25 enum {
26 // Limits the number of MEDIA_LOG() calls in the path of reading encoded
27 // duration to avoid spamming for corrupted data.
28 kMaxDurationErrorLogs = 10,
29 // Limits the number of MEDIA_LOG() calls warning the user that buffer
30 // durations have been estimated.
31 kMaxDurationEstimateLogs = 10,
34 WebMClusterParser::WebMClusterParser(
35 int64 timecode_scale,
36 int audio_track_num,
37 base::TimeDelta audio_default_duration,
38 int video_track_num,
39 base::TimeDelta video_default_duration,
40 const WebMTracksParser::TextTracks& text_tracks,
41 const std::set<int64>& ignored_tracks,
42 const std::string& audio_encryption_key_id,
43 const std::string& video_encryption_key_id,
44 const AudioCodec audio_codec,
45 const scoped_refptr<MediaLog>& media_log)
46 : timecode_multiplier_(timecode_scale / 1000.0),
47 ignored_tracks_(ignored_tracks),
48 audio_encryption_key_id_(audio_encryption_key_id),
49 video_encryption_key_id_(video_encryption_key_id),
50 audio_codec_(audio_codec),
51 parser_(kWebMIdCluster, this),
52 cluster_start_time_(kNoTimestamp()),
53 audio_(audio_track_num, false, audio_default_duration, media_log),
54 video_(video_track_num, true, video_default_duration, media_log),
55 ready_buffer_upper_bound_(kNoDecodeTimestamp()),
56 media_log_(media_log) {
57 for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
58 it != text_tracks.end();
59 ++it) {
60 text_track_map_.insert(std::make_pair(
61 it->first, Track(it->first, false, kNoTimestamp(), media_log_)));
65 WebMClusterParser::~WebMClusterParser() {}
67 void WebMClusterParser::Reset() {
68 last_block_timecode_ = -1;
69 cluster_timecode_ = -1;
70 cluster_start_time_ = kNoTimestamp();
71 cluster_ended_ = false;
72 parser_.Reset();
73 audio_.Reset();
74 video_.Reset();
75 ResetTextTracks();
76 ready_buffer_upper_bound_ = kNoDecodeTimestamp();
79 int WebMClusterParser::Parse(const uint8_t* buf, int size) {
80 audio_.ClearReadyBuffers();
81 video_.ClearReadyBuffers();
82 ClearTextTrackReadyBuffers();
83 ready_buffer_upper_bound_ = kNoDecodeTimestamp();
85 int result = parser_.Parse(buf, size);
87 if (result < 0) {
88 cluster_ended_ = false;
89 return result;
92 cluster_ended_ = parser_.IsParsingComplete();
93 if (cluster_ended_) {
94 // If there were no buffers in this cluster, set the cluster start time to
95 // be the |cluster_timecode_|.
96 if (cluster_start_time_ == kNoTimestamp()) {
97 // If the cluster did not even have a |cluster_timecode_|, signal parse
98 // error.
99 if (cluster_timecode_ < 0)
100 return -1;
102 cluster_start_time_ = base::TimeDelta::FromMicroseconds(
103 cluster_timecode_ * timecode_multiplier_);
106 // Reset the parser if we're done parsing so that
107 // it is ready to accept another cluster on the next
108 // call.
109 parser_.Reset();
111 last_block_timecode_ = -1;
112 cluster_timecode_ = -1;
115 return result;
118 const WebMClusterParser::BufferQueue& WebMClusterParser::GetAudioBuffers() {
119 if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
120 UpdateReadyBuffers();
122 return audio_.ready_buffers();
125 const WebMClusterParser::BufferQueue& WebMClusterParser::GetVideoBuffers() {
126 if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
127 UpdateReadyBuffers();
129 return video_.ready_buffers();
132 const WebMClusterParser::TextBufferQueueMap&
133 WebMClusterParser::GetTextBuffers() {
134 if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
135 UpdateReadyBuffers();
137 // Translate our |text_track_map_| into |text_buffers_map_|, inserting rows in
138 // the output only for non-empty ready_buffer() queues in |text_track_map_|.
139 text_buffers_map_.clear();
140 for (TextTrackMap::const_iterator itr = text_track_map_.begin();
141 itr != text_track_map_.end();
142 ++itr) {
143 const BufferQueue& text_buffers = itr->second.ready_buffers();
144 if (!text_buffers.empty())
145 text_buffers_map_.insert(std::make_pair(itr->first, text_buffers));
148 return text_buffers_map_;
151 base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(
152 const uint8_t* data,
153 int size) {
155 // Duration is currently read assuming the *entire* stream is unencrypted.
156 // The special "Signal Byte" prepended to Blocks in encrypted streams is
157 // assumed to not be present.
158 // TODO(chcunningham): Consider parsing "Signal Byte" for encrypted streams
159 // to return duration for any unencrypted blocks.
161 if (audio_codec_ == kCodecOpus) {
162 return ReadOpusDuration(data, size);
165 // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See
166 // motivations in http://crbug.com/396634.
168 return kNoTimestamp();
171 base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
172 int size) {
173 // Masks and constants for Opus packets. See
174 // https://tools.ietf.org/html/rfc6716#page-14
175 static const uint8_t kTocConfigMask = 0xf8;
176 static const uint8_t kTocFrameCountCodeMask = 0x03;
177 static const uint8_t kFrameCountMask = 0x3f;
178 static const base::TimeDelta kPacketDurationMax =
179 base::TimeDelta::FromMilliseconds(120);
181 if (size < 1) {
182 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
183 kMaxDurationErrorLogs)
184 << "Invalid zero-byte Opus packet; demuxed block duration may be "
185 "imprecise.";
186 return kNoTimestamp();
189 // Frame count type described by last 2 bits of Opus TOC byte.
190 int frame_count_type = data[0] & kTocFrameCountCodeMask;
192 int frame_count = 0;
193 switch (frame_count_type) {
194 case 0:
195 frame_count = 1;
196 break;
197 case 1:
198 case 2:
199 frame_count = 2;
200 break;
201 case 3:
202 // Type 3 indicates an arbitrary frame count described in the next byte.
203 if (size < 2) {
204 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
205 kMaxDurationErrorLogs)
206 << "Second byte missing from 'Code 3' Opus packet; demuxed block "
207 "duration may be imprecise.";
208 return kNoTimestamp();
211 frame_count = data[1] & kFrameCountMask;
213 if (frame_count == 0) {
214 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
215 kMaxDurationErrorLogs)
216 << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
217 "block duration may be imprecise.";
218 return kNoTimestamp();
221 break;
222 default:
223 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
224 kMaxDurationErrorLogs)
225 << "Unexpected Opus frame count type: " << frame_count_type << "; "
226 << "demuxed block duration may be imprecise.";
227 return kNoTimestamp();
230 int opusConfig = (data[0] & kTocConfigMask) >> 3;
231 CHECK_GE(opusConfig, 0);
232 CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
234 DCHECK_GT(frame_count, 0);
235 base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
236 kOpusFrameDurationsMu[opusConfig] * frame_count);
238 if (duration > kPacketDurationMax) {
239 // Intentionally allowing packet to pass through for now. Decoder should
240 // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case
241 // things go sideways.
242 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
243 kMaxDurationErrorLogs)
244 << "Warning, demuxed Opus packet with encoded duration: "
245 << duration.InMilliseconds() << "ms. Should be no greater than "
246 << kPacketDurationMax.InMilliseconds() << "ms.";
249 return duration;
252 WebMParserClient* WebMClusterParser::OnListStart(int id) {
253 if (id == kWebMIdCluster) {
254 cluster_timecode_ = -1;
255 cluster_start_time_ = kNoTimestamp();
256 } else if (id == kWebMIdBlockGroup) {
257 block_data_.reset();
258 block_data_size_ = -1;
259 block_duration_ = -1;
260 discard_padding_ = -1;
261 discard_padding_set_ = false;
262 } else if (id == kWebMIdBlockAdditions) {
263 block_add_id_ = -1;
264 block_additional_data_.reset();
265 block_additional_data_size_ = 0;
268 return this;
271 bool WebMClusterParser::OnListEnd(int id) {
272 if (id != kWebMIdBlockGroup)
273 return true;
275 // Make sure the BlockGroup actually had a Block.
276 if (block_data_size_ == -1) {
277 MEDIA_LOG(ERROR, media_log_) << "Block missing from BlockGroup.";
278 return false;
281 bool result = ParseBlock(false, block_data_.get(), block_data_size_,
282 block_additional_data_.get(),
283 block_additional_data_size_, block_duration_,
284 discard_padding_set_ ? discard_padding_ : 0);
285 block_data_.reset();
286 block_data_size_ = -1;
287 block_duration_ = -1;
288 block_add_id_ = -1;
289 block_additional_data_.reset();
290 block_additional_data_size_ = 0;
291 discard_padding_ = -1;
292 discard_padding_set_ = false;
293 return result;
296 bool WebMClusterParser::OnUInt(int id, int64 val) {
297 int64* dst;
298 switch (id) {
299 case kWebMIdTimecode:
300 dst = &cluster_timecode_;
301 break;
302 case kWebMIdBlockDuration:
303 dst = &block_duration_;
304 break;
305 case kWebMIdBlockAddID:
306 dst = &block_add_id_;
307 break;
308 default:
309 return true;
311 if (*dst != -1)
312 return false;
313 *dst = val;
314 return true;
317 bool WebMClusterParser::ParseBlock(bool is_simple_block,
318 const uint8_t* buf,
319 int size,
320 const uint8_t* additional,
321 int additional_size,
322 int duration,
323 int64 discard_padding) {
324 if (size < 4)
325 return false;
327 // Return an error if the trackNum > 127. We just aren't
328 // going to support large track numbers right now.
329 if (!(buf[0] & 0x80)) {
330 MEDIA_LOG(ERROR, media_log_) << "TrackNumber over 127 not supported";
331 return false;
334 int track_num = buf[0] & 0x7f;
335 int timecode = buf[1] << 8 | buf[2];
336 int flags = buf[3] & 0xff;
337 int lacing = (flags >> 1) & 0x3;
339 if (lacing) {
340 MEDIA_LOG(ERROR, media_log_) << "Lacing " << lacing
341 << " is not supported yet.";
342 return false;
345 // Sign extend negative timecode offsets.
346 if (timecode & 0x8000)
347 timecode |= ~0xffff;
349 const uint8_t* frame_data = buf + 4;
350 int frame_size = size - (frame_data - buf);
351 return OnBlock(is_simple_block, track_num, timecode, duration, flags,
352 frame_data, frame_size, additional, additional_size,
353 discard_padding);
356 bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) {
357 switch (id) {
358 case kWebMIdSimpleBlock:
359 return ParseBlock(true, data, size, NULL, 0, -1, 0);
361 case kWebMIdBlock:
362 if (block_data_) {
363 MEDIA_LOG(ERROR, media_log_)
364 << "More than 1 Block in a BlockGroup is not "
365 "supported.";
366 return false;
368 block_data_.reset(new uint8_t[size]);
369 memcpy(block_data_.get(), data, size);
370 block_data_size_ = size;
371 return true;
373 case kWebMIdBlockAdditional: {
374 uint64 block_add_id = base::HostToNet64(block_add_id_);
375 if (block_additional_data_) {
376 // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
377 // as per matroska spec. But for now we don't have a use case to
378 // support parsing of such files. Take a look at this again when such a
379 // case arises.
380 MEDIA_LOG(ERROR, media_log_) << "More than 1 BlockAdditional in a "
381 "BlockGroup is not supported.";
382 return false;
384 // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
385 // element's value in Big Endian format. This is done to mimic ffmpeg
386 // demuxer's behavior.
387 block_additional_data_size_ = size + sizeof(block_add_id);
388 block_additional_data_.reset(new uint8_t[block_additional_data_size_]);
389 memcpy(block_additional_data_.get(), &block_add_id,
390 sizeof(block_add_id));
391 memcpy(block_additional_data_.get() + 8, data, size);
392 return true;
394 case kWebMIdDiscardPadding: {
395 if (discard_padding_set_ || size <= 0 || size > 8)
396 return false;
397 discard_padding_set_ = true;
399 // Read in the big-endian integer.
400 discard_padding_ = static_cast<int8>(data[0]);
401 for (int i = 1; i < size; ++i)
402 discard_padding_ = (discard_padding_ << 8) | data[i];
404 return true;
406 default:
407 return true;
411 bool WebMClusterParser::OnBlock(bool is_simple_block,
412 int track_num,
413 int timecode,
414 int block_duration,
415 int flags,
416 const uint8_t* data,
417 int size,
418 const uint8_t* additional,
419 int additional_size,
420 int64 discard_padding) {
421 DCHECK_GE(size, 0);
422 if (cluster_timecode_ == -1) {
423 MEDIA_LOG(ERROR, media_log_) << "Got a block before cluster timecode.";
424 return false;
427 // TODO(acolwell): Should relative negative timecode offsets be rejected? Or
428 // only when the absolute timecode is negative? See http://crbug.com/271794
429 if (timecode < 0) {
430 MEDIA_LOG(ERROR, media_log_) << "Got a block with negative timecode offset "
431 << timecode;
432 return false;
435 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
436 MEDIA_LOG(ERROR, media_log_)
437 << "Got a block with a timecode before the previous block.";
438 return false;
441 Track* track = NULL;
442 StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
443 std::string encryption_key_id;
444 base::TimeDelta encoded_duration = kNoTimestamp();
445 if (track_num == audio_.track_num()) {
446 track = &audio_;
447 encryption_key_id = audio_encryption_key_id_;
448 if (encryption_key_id.empty()) {
449 encoded_duration = TryGetEncodedAudioDuration(data, size);
451 } else if (track_num == video_.track_num()) {
452 track = &video_;
453 encryption_key_id = video_encryption_key_id_;
454 buffer_type = DemuxerStream::VIDEO;
455 } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
456 return true;
457 } else if (Track* const text_track = FindTextTrack(track_num)) {
458 if (is_simple_block) // BlockGroup is required for WebVTT cues
459 return false;
460 if (block_duration < 0) // not specified
461 return false;
462 track = text_track;
463 buffer_type = DemuxerStream::TEXT;
464 } else {
465 MEDIA_LOG(ERROR, media_log_) << "Unexpected track number " << track_num;
466 return false;
469 last_block_timecode_ = timecode;
471 base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
472 (cluster_timecode_ + timecode) * timecode_multiplier_);
474 scoped_refptr<StreamParserBuffer> buffer;
475 if (buffer_type != DemuxerStream::TEXT) {
476 // The first bit of the flags is set when a SimpleBlock contains only
477 // keyframes. If this is a Block, then inspection of the payload is
478 // necessary to determine whether it contains a keyframe or not.
479 // http://www.matroska.org/technical/specs/index.html
480 bool is_keyframe =
481 is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
483 // Every encrypted Block has a signal byte and IV prepended to it. Current
484 // encrypted WebM request for comments specification is here
485 // http://wiki.webmproject.org/encryption/webm-encryption-rfc
486 scoped_ptr<DecryptConfig> decrypt_config;
487 int data_offset = 0;
488 if (!encryption_key_id.empty() &&
489 !WebMCreateDecryptConfig(
490 data, size,
491 reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
492 encryption_key_id.size(),
493 &decrypt_config, &data_offset)) {
494 return false;
497 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
498 // type with remapped bytestream track numbers and allow multiple tracks as
499 // applicable. See https://crbug.com/341581.
500 buffer = StreamParserBuffer::CopyFrom(
501 data + data_offset, size - data_offset,
502 additional, additional_size,
503 is_keyframe, buffer_type, track_num);
505 if (decrypt_config)
506 buffer->set_decrypt_config(decrypt_config.Pass());
507 } else {
508 std::string id, settings, content;
509 WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
511 std::vector<uint8_t> side_data;
512 MakeSideData(id.begin(), id.end(),
513 settings.begin(), settings.end(),
514 &side_data);
516 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
517 // type with remapped bytestream track numbers and allow multiple tracks as
518 // applicable. See https://crbug.com/341581.
519 buffer = StreamParserBuffer::CopyFrom(
520 reinterpret_cast<const uint8_t*>(content.data()),
521 content.length(),
522 &side_data[0],
523 side_data.size(),
524 true, buffer_type, track_num);
527 buffer->set_timestamp(timestamp);
528 if (cluster_start_time_ == kNoTimestamp())
529 cluster_start_time_ = timestamp;
531 base::TimeDelta block_duration_time_delta = kNoTimestamp();
532 if (block_duration >= 0) {
533 block_duration_time_delta = base::TimeDelta::FromMicroseconds(
534 block_duration * timecode_multiplier_);
537 // Prefer encoded duration over BlockGroup->BlockDuration or
538 // TrackEntry->DefaultDuration when available. This layering violation is a
539 // workaround for http://crbug.com/396634, decreasing the likelihood of
540 // fall-back to rough estimation techniques for Blocks that lack a
541 // BlockDuration at the end of a cluster. Cross cluster durations are not
542 // feasible given flexibility of cluster ordering and MSE APIs. Duration
543 // estimation may still apply in cases of encryption and codecs for which
544 // we do not extract encoded duration. Within a cluster, estimates are applied
545 // as Block Timecode deltas, or once the whole cluster is parsed in the case
546 // of the last Block in the cluster. See Track::AddBuffer and
547 // ApplyDurationEstimateIfNeeded().
548 if (encoded_duration != kNoTimestamp()) {
549 DCHECK(encoded_duration != kInfiniteDuration());
550 DCHECK(encoded_duration > base::TimeDelta());
551 buffer->set_duration(encoded_duration);
553 DVLOG(3) << __FUNCTION__ << " : "
554 << "Using encoded duration " << encoded_duration.InSecondsF();
556 if (block_duration_time_delta != kNoTimestamp()) {
557 base::TimeDelta duration_difference =
558 block_duration_time_delta - encoded_duration;
560 const auto kWarnDurationDiff =
561 base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
562 if (duration_difference.magnitude() > kWarnDurationDiff) {
563 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
564 kMaxDurationErrorLogs)
565 << "BlockDuration (" << block_duration_time_delta.InMilliseconds()
566 << "ms) differs significantly from encoded duration ("
567 << encoded_duration.InMilliseconds() << "ms).";
570 } else if (block_duration_time_delta != kNoTimestamp()) {
571 buffer->set_duration(block_duration_time_delta);
572 } else {
573 DCHECK_NE(buffer_type, DemuxerStream::TEXT);
574 buffer->set_duration(track->default_duration());
577 if (discard_padding != 0) {
578 buffer->set_discard_padding(std::make_pair(
579 base::TimeDelta(),
580 base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
583 return track->AddBuffer(buffer);
586 WebMClusterParser::Track::Track(int track_num,
587 bool is_video,
588 base::TimeDelta default_duration,
589 const scoped_refptr<MediaLog>& media_log)
590 : track_num_(track_num),
591 is_video_(is_video),
592 default_duration_(default_duration),
593 estimated_next_frame_duration_(kNoTimestamp()),
594 media_log_(media_log) {
595 DCHECK(default_duration_ == kNoTimestamp() ||
596 default_duration_ > base::TimeDelta());
599 WebMClusterParser::Track::~Track() {}
601 DecodeTimestamp WebMClusterParser::Track::GetReadyUpperBound() {
602 DCHECK(ready_buffers_.empty());
603 if (last_added_buffer_missing_duration_.get())
604 return last_added_buffer_missing_duration_->GetDecodeTimestamp();
606 return DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
609 void WebMClusterParser::Track::ExtractReadyBuffers(
610 const DecodeTimestamp before_timestamp) {
611 DCHECK(ready_buffers_.empty());
612 DCHECK(DecodeTimestamp() <= before_timestamp);
613 DCHECK(kNoDecodeTimestamp() != before_timestamp);
615 if (buffers_.empty())
616 return;
618 if (buffers_.back()->GetDecodeTimestamp() < before_timestamp) {
619 // All of |buffers_| are ready.
620 ready_buffers_.swap(buffers_);
621 DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " All "
622 << ready_buffers_.size() << " are ready: before upper bound ts "
623 << before_timestamp.InSecondsF();
624 return;
627 // Not all of |buffers_| are ready yet. Move any that are ready to
628 // |ready_buffers_|.
629 while (true) {
630 const scoped_refptr<StreamParserBuffer>& buffer = buffers_.front();
631 if (buffer->GetDecodeTimestamp() >= before_timestamp)
632 break;
633 ready_buffers_.push_back(buffer);
634 buffers_.pop_front();
635 DCHECK(!buffers_.empty());
638 DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " Only "
639 << ready_buffers_.size() << " ready, " << buffers_.size()
640 << " at or after upper bound ts " << before_timestamp.InSecondsF();
643 bool WebMClusterParser::Track::AddBuffer(
644 const scoped_refptr<StreamParserBuffer>& buffer) {
645 DVLOG(2) << "AddBuffer() : " << track_num_
646 << " ts " << buffer->timestamp().InSecondsF()
647 << " dur " << buffer->duration().InSecondsF()
648 << " kf " << buffer->is_key_frame()
649 << " size " << buffer->data_size();
651 if (last_added_buffer_missing_duration_.get()) {
652 base::TimeDelta derived_duration =
653 buffer->timestamp() - last_added_buffer_missing_duration_->timestamp();
654 last_added_buffer_missing_duration_->set_duration(derived_duration);
656 DVLOG(2) << "AddBuffer() : applied derived duration to held-back buffer : "
657 << " ts "
658 << last_added_buffer_missing_duration_->timestamp().InSecondsF()
659 << " dur "
660 << last_added_buffer_missing_duration_->duration().InSecondsF()
661 << " kf " << last_added_buffer_missing_duration_->is_key_frame()
662 << " size " << last_added_buffer_missing_duration_->data_size();
663 scoped_refptr<StreamParserBuffer> updated_buffer =
664 last_added_buffer_missing_duration_;
665 last_added_buffer_missing_duration_ = NULL;
666 if (!QueueBuffer(updated_buffer))
667 return false;
670 if (buffer->duration() == kNoTimestamp()) {
671 last_added_buffer_missing_duration_ = buffer;
672 DVLOG(2) << "AddBuffer() : holding back buffer that is missing duration";
673 return true;
676 return QueueBuffer(buffer);
679 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
680 if (!last_added_buffer_missing_duration_.get())
681 return;
683 base::TimeDelta estimated_duration = GetDurationEstimate();
684 last_added_buffer_missing_duration_->set_duration(estimated_duration);
686 if (is_video_) {
687 // Exposing estimation so splicing/overlap frame processing can make
688 // informed decisions downstream.
689 // TODO(chcunningham): Set this for audio as well in later change where
690 // audio is switched to max estimation and splicing is disabled.
691 last_added_buffer_missing_duration_->set_is_duration_estimated(true);
694 LIMITED_MEDIA_LOG(INFO, media_log_, num_duration_estimates_,
695 kMaxDurationEstimateLogs)
696 << "Estimating WebM block duration to be "
697 << estimated_duration.InMilliseconds()
698 << "ms for the last (Simple)Block in the Cluster for this Track. Use "
699 "BlockGroups with BlockDurations at the end of each Track in a "
700 "Cluster to avoid estimation.";
702 DVLOG(2) << __FUNCTION__ << " new dur : ts "
703 << last_added_buffer_missing_duration_->timestamp().InSecondsF()
704 << " dur "
705 << last_added_buffer_missing_duration_->duration().InSecondsF()
706 << " kf " << last_added_buffer_missing_duration_->is_key_frame()
707 << " size " << last_added_buffer_missing_duration_->data_size();
709 // Don't use the applied duration as a future estimation (don't use
710 // QueueBuffer() here.)
711 buffers_.push_back(last_added_buffer_missing_duration_);
712 last_added_buffer_missing_duration_ = NULL;
715 void WebMClusterParser::Track::ClearReadyBuffers() {
716 // Note that |buffers_| are kept and |estimated_next_frame_duration_| is not
717 // reset here.
718 ready_buffers_.clear();
721 void WebMClusterParser::Track::Reset() {
722 ClearReadyBuffers();
723 buffers_.clear();
724 last_added_buffer_missing_duration_ = NULL;
727 bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
728 // For now, assume that all blocks are keyframes for datatypes other than
729 // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
730 if (!is_video_)
731 return true;
733 // Make sure the block is big enough for the minimal keyframe header size.
734 if (size < 7)
735 return false;
737 // The LSb of the first byte must be a 0 for a keyframe.
738 // http://tools.ietf.org/html/rfc6386 Section 19.1
739 if ((data[0] & 0x01) != 0)
740 return false;
742 // Verify VP8 keyframe startcode.
743 // http://tools.ietf.org/html/rfc6386 Section 19.1
744 if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
745 return false;
747 return true;
750 bool WebMClusterParser::Track::QueueBuffer(
751 const scoped_refptr<StreamParserBuffer>& buffer) {
752 DCHECK(!last_added_buffer_missing_duration_.get());
754 // WebMClusterParser::OnBlock() gives MEDIA_LOG and parse error on decreasing
755 // block timecode detection within a cluster. Therefore, we should not see
756 // those here.
757 DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ?
758 DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();
759 CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());
761 base::TimeDelta duration = buffer->duration();
762 if (duration < base::TimeDelta() || duration == kNoTimestamp()) {
763 MEDIA_LOG(ERROR, media_log_)
764 << "Invalid buffer duration: " << duration.InSecondsF();
765 return false;
768 // The estimated frame duration is the minimum (for audio) or the maximum
769 // (for video) non-zero duration since the last initialization segment. The
770 // minimum is used for audio to ensure frame durations aren't overestimated,
771 // triggering unnecessary frame splicing. For video, splicing does not apply,
772 // so maximum is used and overlap is simply resolved by showing the
773 // later of the overlapping frames at its given PTS, effectively trimming down
774 // the over-estimated duration of the previous frame.
775 // TODO(chcunningham): Use max for audio and disable splicing whenever
776 // estimated buffers are encountered.
777 if (duration > base::TimeDelta()) {
778 base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_;
779 if (estimated_next_frame_duration_ == kNoTimestamp()) {
780 estimated_next_frame_duration_ = duration;
781 } else if (is_video_) {
782 estimated_next_frame_duration_ =
783 std::max(duration, estimated_next_frame_duration_);
784 } else {
785 estimated_next_frame_duration_ =
786 std::min(duration, estimated_next_frame_duration_);
789 if (orig_duration_estimate != estimated_next_frame_duration_) {
790 DVLOG(3) << "Updated duration estimate:"
791 << orig_duration_estimate
792 << " -> "
793 << estimated_next_frame_duration_
794 << " at timestamp: "
795 << buffer->GetDecodeTimestamp().InSecondsF();
799 buffers_.push_back(buffer);
800 return true;
803 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {
804 base::TimeDelta duration = estimated_next_frame_duration_;
805 if (duration != kNoTimestamp()) {
806 DVLOG(3) << __FUNCTION__ << " : using estimated duration";
807 } else {
808 DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration";
809 if (is_video_) {
810 duration = base::TimeDelta::FromMilliseconds(
811 kDefaultVideoBufferDurationInMs);
812 } else {
813 duration = base::TimeDelta::FromMilliseconds(
814 kDefaultAudioBufferDurationInMs);
818 DCHECK(duration > base::TimeDelta());
819 DCHECK(duration != kNoTimestamp());
820 return duration;
823 void WebMClusterParser::ClearTextTrackReadyBuffers() {
824 text_buffers_map_.clear();
825 for (TextTrackMap::iterator it = text_track_map_.begin();
826 it != text_track_map_.end();
827 ++it) {
828 it->second.ClearReadyBuffers();
832 void WebMClusterParser::ResetTextTracks() {
833 ClearTextTrackReadyBuffers();
834 for (TextTrackMap::iterator it = text_track_map_.begin();
835 it != text_track_map_.end();
836 ++it) {
837 it->second.Reset();
841 void WebMClusterParser::UpdateReadyBuffers() {
842 DCHECK(ready_buffer_upper_bound_ == kNoDecodeTimestamp());
843 DCHECK(text_buffers_map_.empty());
845 if (cluster_ended_) {
846 audio_.ApplyDurationEstimateIfNeeded();
847 video_.ApplyDurationEstimateIfNeeded();
848 // Per OnBlock(), all text buffers should already have valid durations, so
849 // there is no need to call ApplyDurationEstimateIfNeeded() on text tracks
850 // here.
851 ready_buffer_upper_bound_ =
852 DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
853 DCHECK(ready_buffer_upper_bound_ == audio_.GetReadyUpperBound());
854 DCHECK(ready_buffer_upper_bound_ == video_.GetReadyUpperBound());
855 } else {
856 ready_buffer_upper_bound_ = std::min(audio_.GetReadyUpperBound(),
857 video_.GetReadyUpperBound());
858 DCHECK(DecodeTimestamp() <= ready_buffer_upper_bound_);
859 DCHECK(kNoDecodeTimestamp() != ready_buffer_upper_bound_);
862 // Prepare each track's ready buffers for retrieval.
863 audio_.ExtractReadyBuffers(ready_buffer_upper_bound_);
864 video_.ExtractReadyBuffers(ready_buffer_upper_bound_);
865 for (TextTrackMap::iterator itr = text_track_map_.begin();
866 itr != text_track_map_.end();
867 ++itr) {
868 itr->second.ExtractReadyBuffers(ready_buffer_upper_bound_);
872 WebMClusterParser::Track*
873 WebMClusterParser::FindTextTrack(int track_num) {
874 const TextTrackMap::iterator it = text_track_map_.find(track_num);
876 if (it == text_track_map_.end())
877 return NULL;
879 return &it->second;
882 } // namespace media