media/formats/webm/webm_cluster_parser.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/formats/webm/webm_cluster_parser.h"
   6
   7 #include <vector>
   8
   9 #include "base/logging.h"
  10 #include "base/sys_byteorder.h"
  11 #include "media/base/decrypt_config.h"
  12 #include "media/base/timestamp_constants.h"
  13 #include "media/filters/webvtt_util.h"
  14 #include "media/formats/webm/webm_constants.h"
  15 #include "media/formats/webm/webm_crypto_helpers.h"
  16 #include "media/formats/webm/webm_webvtt_parser.h"
  17
  18 namespace media {
  19
  20 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
  21     10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
  22     60000, 10000, 20000, 10000, 20000, 2500,  5000,  10000, 20000, 2500,  5000,
  23     10000, 20000, 2500,  5000,  10000, 20000, 2500,  5000,  10000, 20000};
  24
  25 enum {
  26   // Limits the number of MEDIA_LOG() calls in the path of reading encoded
  27   // duration to avoid spamming for corrupted data.
  28   kMaxDurationErrorLogs = 10,
  29   // Limits the number of MEDIA_LOG() calls warning the user that buffer
  30   // durations have been estimated.
  31   kMaxDurationEstimateLogs = 10,
  32 };
  33
  34 WebMClusterParser::WebMClusterParser(
  35     int64 timecode_scale,
  36     int audio_track_num,
  37     base::TimeDelta audio_default_duration,
  38     int video_track_num,
  39     base::TimeDelta video_default_duration,
  40     const WebMTracksParser::TextTracks& text_tracks,
  41     const std::set<int64>& ignored_tracks,
  42     const std::string& audio_encryption_key_id,
  43     const std::string& video_encryption_key_id,
  44     const AudioCodec audio_codec,
  45     const scoped_refptr<MediaLog>& media_log)
  46     : timecode_multiplier_(timecode_scale / 1000.0),
  47       ignored_tracks_(ignored_tracks),
  48       audio_encryption_key_id_(audio_encryption_key_id),
  49       video_encryption_key_id_(video_encryption_key_id),
  50       audio_codec_(audio_codec),
  51       parser_(kWebMIdCluster, this),
  52       cluster_start_time_(kNoTimestamp()),
  53       audio_(audio_track_num, false, audio_default_duration, media_log),
  54       video_(video_track_num, true, video_default_duration, media_log),
  55       ready_buffer_upper_bound_(kNoDecodeTimestamp()),
  56       media_log_(media_log) {
  57   for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
  58        it != text_tracks.end();
  59        ++it) {
  60     text_track_map_.insert(std::make_pair(
  61         it->first, Track(it->first, false, kNoTimestamp(), media_log_)));
  62   }
  63 }
  64
  65 WebMClusterParser::~WebMClusterParser() {}
  66
  67 void WebMClusterParser::Reset() {
  68   last_block_timecode_ = -1;
  69   cluster_timecode_ = -1;
  70   cluster_start_time_ = kNoTimestamp();
  71   cluster_ended_ = false;
  72   parser_.Reset();
  73   audio_.Reset();
  74   video_.Reset();
  75   ResetTextTracks();
  76   ready_buffer_upper_bound_ = kNoDecodeTimestamp();
  77 }
  78
  79 int WebMClusterParser::Parse(const uint8_t* buf, int size) {
  80   audio_.ClearReadyBuffers();
  81   video_.ClearReadyBuffers();
  82   ClearTextTrackReadyBuffers();
  83   ready_buffer_upper_bound_ = kNoDecodeTimestamp();
  84
  85   int result = parser_.Parse(buf, size);
  86
  87   if (result < 0) {
  88     cluster_ended_ = false;
  89     return result;
  90   }
  91
  92   cluster_ended_ = parser_.IsParsingComplete();
  93   if (cluster_ended_) {
  94     // If there were no buffers in this cluster, set the cluster start time to
  95     // be the |cluster_timecode_|.
  96     if (cluster_start_time_ == kNoTimestamp()) {
  97       // If the cluster did not even have a |cluster_timecode_|, signal parse
  98       // error.
  99       if (cluster_timecode_ < 0)
 100         return -1;
 101
 102       cluster_start_time_ = base::TimeDelta::FromMicroseconds(
 103           cluster_timecode_ * timecode_multiplier_);
 104     }
 105
 106     // Reset the parser if we're done parsing so that
 107     // it is ready to accept another cluster on the next
 108     // call.
 109     parser_.Reset();
 110
 111     last_block_timecode_ = -1;
 112     cluster_timecode_ = -1;
 113   }
 114
 115   return result;
 116 }
 117
 118 const WebMClusterParser::BufferQueue& WebMClusterParser::GetAudioBuffers() {
 119   if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
 120     UpdateReadyBuffers();
 121
 122   return audio_.ready_buffers();
 123 }
 124
 125 const WebMClusterParser::BufferQueue& WebMClusterParser::GetVideoBuffers() {
 126   if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
 127     UpdateReadyBuffers();
 128
 129   return video_.ready_buffers();
 130 }
 131
 132 const WebMClusterParser::TextBufferQueueMap&
 133 WebMClusterParser::GetTextBuffers() {
 134   if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
 135     UpdateReadyBuffers();
 136
 137   // Translate our |text_track_map_| into |text_buffers_map_|, inserting rows in
 138   // the output only for non-empty ready_buffer() queues in |text_track_map_|.
 139   text_buffers_map_.clear();
 140   for (TextTrackMap::const_iterator itr = text_track_map_.begin();
 141        itr != text_track_map_.end();
 142        ++itr) {
 143     const BufferQueue& text_buffers = itr->second.ready_buffers();
 144     if (!text_buffers.empty())
 145       text_buffers_map_.insert(std::make_pair(itr->first, text_buffers));
 146   }
 147
 148   return text_buffers_map_;
 149 }
 150
 151 base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(
 152     const uint8_t* data,
 153     int size) {
 154
 155   // Duration is currently read assuming the *entire* stream is unencrypted.
 156   // The special "Signal Byte" prepended to Blocks in encrypted streams is
 157   // assumed to not be present.
 158   // TODO(chcunningham): Consider parsing "Signal Byte" for encrypted streams
 159   // to return duration for any unencrypted blocks.
 160
 161   if (audio_codec_ == kCodecOpus) {
 162     return ReadOpusDuration(data, size);
 163   }
 164
 165   // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See
 166   // motivations in http://crbug.com/396634.
 167
 168   return kNoTimestamp();
 169 }
 170
 171 base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
 172                                                     int size) {
 173   // Masks and constants for Opus packets. See
 174   // https://tools.ietf.org/html/rfc6716#page-14
 175   static const uint8_t kTocConfigMask = 0xf8;
 176   static const uint8_t kTocFrameCountCodeMask = 0x03;
 177   static const uint8_t kFrameCountMask = 0x3f;
 178   static const base::TimeDelta kPacketDurationMax =
 179       base::TimeDelta::FromMilliseconds(120);
 180
 181   if (size < 1) {
 182     LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 183                       kMaxDurationErrorLogs)
 184         << "Invalid zero-byte Opus packet; demuxed block duration may be "
 185            "imprecise.";
 186     return kNoTimestamp();
 187   }
 188
 189   // Frame count type described by last 2 bits of Opus TOC byte.
 190   int frame_count_type = data[0] & kTocFrameCountCodeMask;
 191
 192   int frame_count = 0;
 193   switch (frame_count_type) {
 194     case 0:
 195       frame_count = 1;
 196       break;
 197     case 1:
 198     case 2:
 199       frame_count = 2;
 200       break;
 201     case 3:
 202       // Type 3 indicates an arbitrary frame count described in the next byte.
 203       if (size < 2) {
 204         LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 205                           kMaxDurationErrorLogs)
 206             << "Second byte missing from 'Code 3' Opus packet; demuxed block "
 207                "duration may be imprecise.";
 208         return kNoTimestamp();
 209       }
 210
 211       frame_count = data[1] & kFrameCountMask;
 212
 213       if (frame_count == 0) {
 214         LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 215                           kMaxDurationErrorLogs)
 216             << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
 217                "block duration may be imprecise.";
 218         return kNoTimestamp();
 219       }
 220
 221       break;
 222     default:
 223       LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 224                         kMaxDurationErrorLogs)
 225           << "Unexpected Opus frame count type: " << frame_count_type << "; "
 226           << "demuxed block duration may be imprecise.";
 227       return kNoTimestamp();
 228   }
 229
 230   int opusConfig = (data[0] & kTocConfigMask) >> 3;
 231   CHECK_GE(opusConfig, 0);
 232   CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
 233
 234   DCHECK_GT(frame_count, 0);
 235   base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
 236       kOpusFrameDurationsMu[opusConfig] * frame_count);
 237
 238   if (duration > kPacketDurationMax) {
 239     // Intentionally allowing packet to pass through for now. Decoder should
 240     // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case
 241     // things go sideways.
 242     LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 243                       kMaxDurationErrorLogs)
 244         << "Warning, demuxed Opus packet with encoded duration: "
 245         << duration.InMilliseconds() << "ms. Should be no greater than "
 246         << kPacketDurationMax.InMilliseconds() << "ms.";
 247   }
 248
 249   return duration;
 250 }
 251
 252 WebMParserClient* WebMClusterParser::OnListStart(int id) {
 253   if (id == kWebMIdCluster) {
 254     cluster_timecode_ = -1;
 255     cluster_start_time_ = kNoTimestamp();
 256   } else if (id == kWebMIdBlockGroup) {
 257     block_data_.reset();
 258     block_data_size_ = -1;
 259     block_duration_ = -1;
 260     discard_padding_ = -1;
 261     discard_padding_set_ = false;
 262   } else if (id == kWebMIdBlockAdditions) {
 263     block_add_id_ = -1;
 264     block_additional_data_.reset();
 265     block_additional_data_size_ = 0;
 266   }
 267
 268   return this;
 269 }
 270
 271 bool WebMClusterParser::OnListEnd(int id) {
 272   if (id != kWebMIdBlockGroup)
 273     return true;
 274
 275   // Make sure the BlockGroup actually had a Block.
 276   if (block_data_size_ == -1) {
 277     MEDIA_LOG(ERROR, media_log_) << "Block missing from BlockGroup.";
 278     return false;
 279   }
 280
 281   bool result = ParseBlock(false, block_data_.get(), block_data_size_,
 282                            block_additional_data_.get(),
 283                            block_additional_data_size_, block_duration_,
 284                            discard_padding_set_ ? discard_padding_ : 0);
 285   block_data_.reset();
 286   block_data_size_ = -1;
 287   block_duration_ = -1;
 288   block_add_id_ = -1;
 289   block_additional_data_.reset();
 290   block_additional_data_size_ = 0;
 291   discard_padding_ = -1;
 292   discard_padding_set_ = false;
 293   return result;
 294 }
 295
 296 bool WebMClusterParser::OnUInt(int id, int64 val) {
 297   int64* dst;
 298   switch (id) {
 299     case kWebMIdTimecode:
 300       dst = &cluster_timecode_;
 301       break;
 302     case kWebMIdBlockDuration:
 303       dst = &block_duration_;
 304       break;
 305     case kWebMIdBlockAddID:
 306       dst = &block_add_id_;
 307       break;
 308     default:
 309       return true;
 310   }
 311   if (*dst != -1)
 312     return false;
 313   *dst = val;
 314   return true;
 315 }
 316
 317 bool WebMClusterParser::ParseBlock(bool is_simple_block,
 318                                    const uint8_t* buf,
 319                                    int size,
 320                                    const uint8_t* additional,
 321                                    int additional_size,
 322                                    int duration,
 323                                    int64 discard_padding) {
 324   if (size < 4)
 325     return false;
 326
 327   // Return an error if the trackNum > 127. We just aren't
 328   // going to support large track numbers right now.
 329   if (!(buf[0] & 0x80)) {
 330     MEDIA_LOG(ERROR, media_log_) << "TrackNumber over 127 not supported";
 331     return false;
 332   }
 333
 334   int track_num = buf[0] & 0x7f;
 335   int timecode = buf[1] << 8 | buf[2];
 336   int flags = buf[3] & 0xff;
 337   int lacing = (flags >> 1) & 0x3;
 338
 339   if (lacing) {
 340     MEDIA_LOG(ERROR, media_log_) << "Lacing " << lacing
 341                                  << " is not supported yet.";
 342     return false;
 343   }
 344
 345   // Sign extend negative timecode offsets.
 346   if (timecode & 0x8000)
 347     timecode |= ~0xffff;
 348
 349   const uint8_t* frame_data = buf + 4;
 350   int frame_size = size - (frame_data - buf);
 351   return OnBlock(is_simple_block, track_num, timecode, duration, flags,
 352                  frame_data, frame_size, additional, additional_size,
 353                  discard_padding);
 354 }
 355
 356 bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) {
 357   switch (id) {
 358     case kWebMIdSimpleBlock:
 359       return ParseBlock(true, data, size, NULL, 0, -1, 0);
 360
 361     case kWebMIdBlock:
 362       if (block_data_) {
 363         MEDIA_LOG(ERROR, media_log_)
 364             << "More than 1 Block in a BlockGroup is not "
 365                "supported.";
 366         return false;
 367       }
 368       block_data_.reset(new uint8_t[size]);
 369       memcpy(block_data_.get(), data, size);
 370       block_data_size_ = size;
 371       return true;
 372
 373     case kWebMIdBlockAdditional: {
 374       uint64 block_add_id = base::HostToNet64(block_add_id_);
 375       if (block_additional_data_) {
 376         // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
 377         // as per matroska spec. But for now we don't have a use case to
 378         // support parsing of such files. Take a look at this again when such a
 379         // case arises.
 380         MEDIA_LOG(ERROR, media_log_) << "More than 1 BlockAdditional in a "
 381                                         "BlockGroup is not supported.";
 382         return false;
 383       }
 384       // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
 385       // element's value in Big Endian format. This is done to mimic ffmpeg
 386       // demuxer's behavior.
 387       block_additional_data_size_ = size + sizeof(block_add_id);
 388       block_additional_data_.reset(new uint8_t[block_additional_data_size_]);
 389       memcpy(block_additional_data_.get(), &block_add_id,
 390              sizeof(block_add_id));
 391       memcpy(block_additional_data_.get() + 8, data, size);
 392       return true;
 393     }
 394     case kWebMIdDiscardPadding: {
 395       if (discard_padding_set_ || size <= 0 || size > 8)
 396         return false;
 397       discard_padding_set_ = true;
 398
 399       // Read in the big-endian integer.
 400       discard_padding_ = static_cast<int8>(data[0]);
 401       for (int i = 1; i < size; ++i)
 402         discard_padding_ = (discard_padding_ << 8) | data[i];
 403
 404       return true;
 405     }
 406     default:
 407       return true;
 408   }
 409 }
 410
 411 bool WebMClusterParser::OnBlock(bool is_simple_block,
 412                                 int track_num,
 413                                 int timecode,
 414                                 int block_duration,
 415                                 int flags,
 416                                 const uint8_t* data,
 417                                 int size,
 418                                 const uint8_t* additional,
 419                                 int additional_size,
 420                                 int64 discard_padding) {
 421   DCHECK_GE(size, 0);
 422   if (cluster_timecode_ == -1) {
 423     MEDIA_LOG(ERROR, media_log_) << "Got a block before cluster timecode.";
 424     return false;
 425   }
 426
 427   // TODO(acolwell): Should relative negative timecode offsets be rejected?  Or
 428   // only when the absolute timecode is negative?  See http://crbug.com/271794
 429   if (timecode < 0) {
 430     MEDIA_LOG(ERROR, media_log_) << "Got a block with negative timecode offset "
 431                                  << timecode;
 432     return false;
 433   }
 434
 435   if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
 436     MEDIA_LOG(ERROR, media_log_)
 437         << "Got a block with a timecode before the previous block.";
 438     return false;
 439   }
 440
 441   Track* track = NULL;
 442   StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
 443   std::string encryption_key_id;
 444   base::TimeDelta encoded_duration = kNoTimestamp();
 445   if (track_num == audio_.track_num()) {
 446     track = &audio_;
 447     encryption_key_id = audio_encryption_key_id_;
 448     if (encryption_key_id.empty()) {
 449       encoded_duration = TryGetEncodedAudioDuration(data, size);
 450     }
 451   } else if (track_num == video_.track_num()) {
 452     track = &video_;
 453     encryption_key_id = video_encryption_key_id_;
 454     buffer_type = DemuxerStream::VIDEO;
 455   } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
 456     return true;
 457   } else if (Track* const text_track = FindTextTrack(track_num)) {
 458     if (is_simple_block)  // BlockGroup is required for WebVTT cues
 459       return false;
 460     if (block_duration < 0)  // not specified
 461       return false;
 462     track = text_track;
 463     buffer_type = DemuxerStream::TEXT;
 464   } else {
 465     MEDIA_LOG(ERROR, media_log_) << "Unexpected track number " << track_num;
 466     return false;
 467   }
 468
 469   last_block_timecode_ = timecode;
 470
 471   base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
 472       (cluster_timecode_ + timecode) * timecode_multiplier_);
 473
 474   scoped_refptr<StreamParserBuffer> buffer;
 475   if (buffer_type != DemuxerStream::TEXT) {
 476     // The first bit of the flags is set when a SimpleBlock contains only
 477     // keyframes. If this is a Block, then inspection of the payload is
 478     // necessary to determine whether it contains a keyframe or not.
 479     // http://www.matroska.org/technical/specs/index.html
 480     bool is_keyframe =
 481         is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
 482
 483     // Every encrypted Block has a signal byte and IV prepended to it. Current
 484     // encrypted WebM request for comments specification is here
 485     // http://wiki.webmproject.org/encryption/webm-encryption-rfc
 486     scoped_ptr<DecryptConfig> decrypt_config;
 487     int data_offset = 0;
 488     if (!encryption_key_id.empty() &&
 489         !WebMCreateDecryptConfig(
 490              data, size,
 491              reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
 492              encryption_key_id.size(),
 493              &decrypt_config, &data_offset)) {
 494       return false;
 495     }
 496
 497     // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
 498     // type with remapped bytestream track numbers and allow multiple tracks as
 499     // applicable. See https://crbug.com/341581.
 500     buffer = StreamParserBuffer::CopyFrom(
 501         data + data_offset, size - data_offset,
 502         additional, additional_size,
 503         is_keyframe, buffer_type, track_num);
 504
 505     if (decrypt_config)
 506       buffer->set_decrypt_config(decrypt_config.Pass());
 507   } else {
 508     std::string id, settings, content;
 509     WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
 510
 511     std::vector<uint8_t> side_data;
 512     MakeSideData(id.begin(), id.end(),
 513                  settings.begin(), settings.end(),
 514                  &side_data);
 515
 516     // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
 517     // type with remapped bytestream track numbers and allow multiple tracks as
 518     // applicable. See https://crbug.com/341581.
 519     buffer = StreamParserBuffer::CopyFrom(
 520         reinterpret_cast<const uint8_t*>(content.data()),
 521         content.length(),
 522         &side_data[0],
 523         side_data.size(),
 524         true, buffer_type, track_num);
 525   }
 526
 527   buffer->set_timestamp(timestamp);
 528   if (cluster_start_time_ == kNoTimestamp())
 529     cluster_start_time_ = timestamp;
 530
 531   base::TimeDelta block_duration_time_delta = kNoTimestamp();
 532   if (block_duration >= 0) {
 533     block_duration_time_delta = base::TimeDelta::FromMicroseconds(
 534         block_duration * timecode_multiplier_);
 535   }
 536
 537   // Prefer encoded duration over BlockGroup->BlockDuration or
 538   // TrackEntry->DefaultDuration when available. This layering violation is a
 539   // workaround for http://crbug.com/396634, decreasing the likelihood of
 540   // fall-back to rough estimation techniques for Blocks that lack a
 541   // BlockDuration at the end of a cluster. Cross cluster durations are not
 542   // feasible given flexibility of cluster ordering and MSE APIs. Duration
 543   // estimation may still apply in cases of encryption and codecs for which
 544   // we do not extract encoded duration. Within a cluster, estimates are applied
 545   // as Block Timecode deltas, or once the whole cluster is parsed in the case
 546   // of the last Block in the cluster. See Track::AddBuffer and
 547   // ApplyDurationEstimateIfNeeded().
 548   if (encoded_duration != kNoTimestamp()) {
 549     DCHECK(encoded_duration != kInfiniteDuration());
 550     DCHECK(encoded_duration > base::TimeDelta());
 551     buffer->set_duration(encoded_duration);
 552
 553     DVLOG(3) << __FUNCTION__ << " : "
 554              << "Using encoded duration " << encoded_duration.InSecondsF();
 555
 556     if (block_duration_time_delta != kNoTimestamp()) {
 557       base::TimeDelta duration_difference =
 558           block_duration_time_delta - encoded_duration;
 559
 560       const auto kWarnDurationDiff =
 561           base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
 562       if (duration_difference.magnitude() > kWarnDurationDiff) {
 563         LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 564                           kMaxDurationErrorLogs)
 565             << "BlockDuration (" << block_duration_time_delta.InMilliseconds()
 566             << "ms) differs significantly from encoded duration ("
 567             << encoded_duration.InMilliseconds() << "ms).";
 568       }
 569     }
 570   } else if (block_duration_time_delta != kNoTimestamp()) {
 571     buffer->set_duration(block_duration_time_delta);
 572   } else {
 573     DCHECK_NE(buffer_type, DemuxerStream::TEXT);
 574     buffer->set_duration(track->default_duration());
 575   }
 576
 577   if (discard_padding != 0) {
 578     buffer->set_discard_padding(std::make_pair(
 579         base::TimeDelta(),
 580         base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
 581   }
 582
 583   return track->AddBuffer(buffer);
 584 }
 585
 586 WebMClusterParser::Track::Track(int track_num,
 587                                 bool is_video,
 588                                 base::TimeDelta default_duration,
 589                                 const scoped_refptr<MediaLog>& media_log)
 590     : track_num_(track_num),
 591       is_video_(is_video),
 592       default_duration_(default_duration),
 593       estimated_next_frame_duration_(kNoTimestamp()),
 594       media_log_(media_log) {
 595   DCHECK(default_duration_ == kNoTimestamp() ||
 596          default_duration_ > base::TimeDelta());
 597 }
 598
 599 WebMClusterParser::Track::~Track() {}
 600
 601 DecodeTimestamp WebMClusterParser::Track::GetReadyUpperBound() {
 602   DCHECK(ready_buffers_.empty());
 603   if (last_added_buffer_missing_duration_.get())
 604     return last_added_buffer_missing_duration_->GetDecodeTimestamp();
 605
 606   return DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
 607 }
 608
 609 void WebMClusterParser::Track::ExtractReadyBuffers(
 610     const DecodeTimestamp before_timestamp) {
 611   DCHECK(ready_buffers_.empty());
 612   DCHECK(DecodeTimestamp() <= before_timestamp);
 613   DCHECK(kNoDecodeTimestamp() != before_timestamp);
 614
 615   if (buffers_.empty())
 616     return;
 617
 618   if (buffers_.back()->GetDecodeTimestamp() < before_timestamp) {
 619     // All of |buffers_| are ready.
 620     ready_buffers_.swap(buffers_);
 621     DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " All "
 622              << ready_buffers_.size() << " are ready: before upper bound ts "
 623              << before_timestamp.InSecondsF();
 624     return;
 625   }
 626
 627   // Not all of |buffers_| are ready yet. Move any that are ready to
 628   // |ready_buffers_|.
 629   while (true) {
 630     const scoped_refptr<StreamParserBuffer>& buffer = buffers_.front();
 631     if (buffer->GetDecodeTimestamp() >= before_timestamp)
 632       break;
 633     ready_buffers_.push_back(buffer);
 634     buffers_.pop_front();
 635     DCHECK(!buffers_.empty());
 636   }
 637
 638   DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " Only "
 639            << ready_buffers_.size() << " ready, " << buffers_.size()
 640            << " at or after upper bound ts " << before_timestamp.InSecondsF();
 641 }
 642
 643 bool WebMClusterParser::Track::AddBuffer(
 644     const scoped_refptr<StreamParserBuffer>& buffer) {
 645   DVLOG(2) << "AddBuffer() : " << track_num_
 646            << " ts " << buffer->timestamp().InSecondsF()
 647            << " dur " << buffer->duration().InSecondsF()
 648            << " kf " << buffer->is_key_frame()
 649            << " size " << buffer->data_size();
 650
 651   if (last_added_buffer_missing_duration_.get()) {
 652     base::TimeDelta derived_duration =
 653         buffer->timestamp() - last_added_buffer_missing_duration_->timestamp();
 654     last_added_buffer_missing_duration_->set_duration(derived_duration);
 655
 656     DVLOG(2) << "AddBuffer() : applied derived duration to held-back buffer : "
 657              << " ts "
 658              << last_added_buffer_missing_duration_->timestamp().InSecondsF()
 659              << " dur "
 660              << last_added_buffer_missing_duration_->duration().InSecondsF()
 661              << " kf " << last_added_buffer_missing_duration_->is_key_frame()
 662              << " size " << last_added_buffer_missing_duration_->data_size();
 663     scoped_refptr<StreamParserBuffer> updated_buffer =
 664         last_added_buffer_missing_duration_;
 665     last_added_buffer_missing_duration_ = NULL;
 666     if (!QueueBuffer(updated_buffer))
 667       return false;
 668   }
 669
 670   if (buffer->duration() == kNoTimestamp()) {
 671     last_added_buffer_missing_duration_ = buffer;
 672     DVLOG(2) << "AddBuffer() : holding back buffer that is missing duration";
 673     return true;
 674   }
 675
 676   return QueueBuffer(buffer);
 677 }
 678
 679 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
 680   if (!last_added_buffer_missing_duration_.get())
 681     return;
 682
 683   base::TimeDelta estimated_duration = GetDurationEstimate();
 684   last_added_buffer_missing_duration_->set_duration(estimated_duration);
 685
 686   if (is_video_) {
 687     // Exposing estimation so splicing/overlap frame processing can make
 688     // informed decisions downstream.
 689     // TODO(chcunningham): Set this for audio as well in later change where
 690     // audio is switched to max estimation and splicing is disabled.
 691     last_added_buffer_missing_duration_->set_is_duration_estimated(true);
 692   }
 693
 694   LIMITED_MEDIA_LOG(INFO, media_log_, num_duration_estimates_,
 695                     kMaxDurationEstimateLogs)
 696       << "Estimating WebM block duration to be "
 697       << estimated_duration.InMilliseconds()
 698       << "ms for the last (Simple)Block in the Cluster for this Track. Use "
 699          "BlockGroups with BlockDurations at the end of each Track in a "
 700          "Cluster to avoid estimation.";
 701
 702   DVLOG(2) << __FUNCTION__ << " new dur : ts "
 703            << last_added_buffer_missing_duration_->timestamp().InSecondsF()
 704            << " dur "
 705            << last_added_buffer_missing_duration_->duration().InSecondsF()
 706            << " kf " << last_added_buffer_missing_duration_->is_key_frame()
 707            << " size " << last_added_buffer_missing_duration_->data_size();
 708
 709   // Don't use the applied duration as a future estimation (don't use
 710   // QueueBuffer() here.)
 711   buffers_.push_back(last_added_buffer_missing_duration_);
 712   last_added_buffer_missing_duration_ = NULL;
 713 }
 714
 715 void WebMClusterParser::Track::ClearReadyBuffers() {
 716   // Note that |buffers_| are kept and |estimated_next_frame_duration_| is not
 717   // reset here.
 718   ready_buffers_.clear();
 719 }
 720
 721 void WebMClusterParser::Track::Reset() {
 722   ClearReadyBuffers();
 723   buffers_.clear();
 724   last_added_buffer_missing_duration_ = NULL;
 725 }
 726
 727 bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
 728   // For now, assume that all blocks are keyframes for datatypes other than
 729   // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
 730   if (!is_video_)
 731     return true;
 732
 733   // Make sure the block is big enough for the minimal keyframe header size.
 734   if (size < 7)
 735     return false;
 736
 737   // The LSb of the first byte must be a 0 for a keyframe.
 738   // http://tools.ietf.org/html/rfc6386 Section 19.1
 739   if ((data[0] & 0x01) != 0)
 740     return false;
 741
 742   // Verify VP8 keyframe startcode.
 743   // http://tools.ietf.org/html/rfc6386 Section 19.1
 744   if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
 745     return false;
 746
 747   return true;
 748 }
 749
 750 bool WebMClusterParser::Track::QueueBuffer(
 751     const scoped_refptr<StreamParserBuffer>& buffer) {
 752   DCHECK(!last_added_buffer_missing_duration_.get());
 753
 754   // WebMClusterParser::OnBlock() gives MEDIA_LOG and parse error on decreasing
 755   // block timecode detection within a cluster. Therefore, we should not see
 756   // those here.
 757   DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ?
 758       DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();
 759   CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());
 760
 761   base::TimeDelta duration = buffer->duration();
 762   if (duration < base::TimeDelta() || duration == kNoTimestamp()) {
 763     MEDIA_LOG(ERROR, media_log_)
 764         << "Invalid buffer duration: " << duration.InSecondsF();
 765     return false;
 766   }
 767
 768   // The estimated frame duration is the minimum (for audio) or the maximum
 769   // (for video) non-zero duration since the last initialization segment. The
 770   // minimum is used for audio to ensure frame durations aren't overestimated,
 771   // triggering unnecessary frame splicing. For video, splicing does not apply,
 772   // so maximum is used and overlap is simply resolved by showing the
 773   // later of the overlapping frames at its given PTS, effectively trimming down
 774   // the over-estimated duration of the previous frame.
 775   // TODO(chcunningham): Use max for audio and disable splicing whenever
 776   // estimated buffers are encountered.
 777   if (duration > base::TimeDelta()) {
 778     base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_;
 779     if (estimated_next_frame_duration_ == kNoTimestamp()) {
 780       estimated_next_frame_duration_ = duration;
 781     } else if (is_video_) {
 782       estimated_next_frame_duration_ =
 783           std::max(duration, estimated_next_frame_duration_);
 784     } else {
 785       estimated_next_frame_duration_ =
 786           std::min(duration, estimated_next_frame_duration_);
 787     }
 788
 789     if (orig_duration_estimate != estimated_next_frame_duration_) {
 790       DVLOG(3) << "Updated duration estimate:"
 791                << orig_duration_estimate
 792                << " -> "
 793                << estimated_next_frame_duration_
 794                << " at timestamp: "
 795                << buffer->GetDecodeTimestamp().InSecondsF();
 796     }
 797   }
 798
 799   buffers_.push_back(buffer);
 800   return true;
 801 }
 802
 803 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {
 804   base::TimeDelta duration = estimated_next_frame_duration_;
 805   if (duration != kNoTimestamp()) {
 806     DVLOG(3) << __FUNCTION__ << " : using estimated duration";
 807   } else {
 808     DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration";
 809     if (is_video_) {
 810       duration = base::TimeDelta::FromMilliseconds(
 811           kDefaultVideoBufferDurationInMs);
 812     } else {
 813       duration = base::TimeDelta::FromMilliseconds(
 814           kDefaultAudioBufferDurationInMs);
 815     }
 816   }
 817
 818   DCHECK(duration > base::TimeDelta());
 819   DCHECK(duration != kNoTimestamp());
 820   return duration;
 821 }
 822
 823 void WebMClusterParser::ClearTextTrackReadyBuffers() {
 824   text_buffers_map_.clear();
 825   for (TextTrackMap::iterator it = text_track_map_.begin();
 826        it != text_track_map_.end();
 827        ++it) {
 828     it->second.ClearReadyBuffers();
 829   }
 830 }
 831
 832 void WebMClusterParser::ResetTextTracks() {
 833   ClearTextTrackReadyBuffers();
 834   for (TextTrackMap::iterator it = text_track_map_.begin();
 835        it != text_track_map_.end();
 836        ++it) {
 837     it->second.Reset();
 838   }
 839 }
 840
 841 void WebMClusterParser::UpdateReadyBuffers() {
 842   DCHECK(ready_buffer_upper_bound_ == kNoDecodeTimestamp());
 843   DCHECK(text_buffers_map_.empty());
 844
 845   if (cluster_ended_) {
 846     audio_.ApplyDurationEstimateIfNeeded();
 847     video_.ApplyDurationEstimateIfNeeded();
 848     // Per OnBlock(), all text buffers should already have valid durations, so
 849     // there is no need to call ApplyDurationEstimateIfNeeded() on text tracks
 850     // here.
 851     ready_buffer_upper_bound_ =
 852         DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
 853     DCHECK(ready_buffer_upper_bound_ == audio_.GetReadyUpperBound());
 854     DCHECK(ready_buffer_upper_bound_ == video_.GetReadyUpperBound());
 855   } else {
 856     ready_buffer_upper_bound_ = std::min(audio_.GetReadyUpperBound(),
 857                                          video_.GetReadyUpperBound());
 858     DCHECK(DecodeTimestamp() <= ready_buffer_upper_bound_);
 859     DCHECK(kNoDecodeTimestamp() != ready_buffer_upper_bound_);
 860   }
 861
 862   // Prepare each track's ready buffers for retrieval.
 863   audio_.ExtractReadyBuffers(ready_buffer_upper_bound_);
 864   video_.ExtractReadyBuffers(ready_buffer_upper_bound_);
 865   for (TextTrackMap::iterator itr = text_track_map_.begin();
 866        itr != text_track_map_.end();
 867        ++itr) {
 868     itr->second.ExtractReadyBuffers(ready_buffer_upper_bound_);
 869   }
 870 }
 871
 872 WebMClusterParser::Track*
 873 WebMClusterParser::FindTextTrack(int track_num) {
 874   const TextTrackMap::iterator it = text_track_map_.find(track_num);
 875
 876   if (it == text_track_map_.end())
 877     return NULL;
 878
 879   return &it->second;
 880 }
 881
 882 }  // namespace media