media/formats/webm/webm_cluster_parser.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/formats/webm/webm_cluster_parser.h"
   6
   7 #include <vector>
   8
   9 #include "base/logging.h"
  10 #include "base/sys_byteorder.h"
  11 #include "media/base/buffers.h"
  12 #include "media/base/decrypt_config.h"
  13 #include "media/filters/webvtt_util.h"
  14 #include "media/formats/webm/webm_constants.h"
  15 #include "media/formats/webm/webm_crypto_helpers.h"
  16 #include "media/formats/webm/webm_webvtt_parser.h"
  17
  18 namespace media {
  19
  20 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
  21     10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
  22     60000, 10000, 20000, 10000, 20000, 2500,  5000,  10000, 20000, 2500,  5000,
  23     10000, 20000, 2500,  5000,  10000, 20000, 2500,  5000,  10000, 20000};
  24
  25 enum {
  26   // Limits the number of MEDIA_LOG() calls in the path of reading encoded
  27   // duration to avoid spamming for corrupted data.
  28   kMaxDurationErrorLogs = 10,
  29   // Limits the number of MEDIA_LOG() calls warning the user that buffer
  30   // durations have been estimated.
  31   kMaxDurationEstimateLogs = 10,
  32 };
  33
  34 WebMClusterParser::WebMClusterParser(
  35     int64 timecode_scale,
  36     int audio_track_num,
  37     base::TimeDelta audio_default_duration,
  38     int video_track_num,
  39     base::TimeDelta video_default_duration,
  40     const WebMTracksParser::TextTracks& text_tracks,
  41     const std::set<int64>& ignored_tracks,
  42     const std::string& audio_encryption_key_id,
  43     const std::string& video_encryption_key_id,
  44     const AudioCodec audio_codec,
  45     const scoped_refptr<MediaLog>& media_log)
  46     : num_duration_errors_(0),
  47       timecode_multiplier_(timecode_scale / 1000.0),
  48       ignored_tracks_(ignored_tracks),
  49       audio_encryption_key_id_(audio_encryption_key_id),
  50       video_encryption_key_id_(video_encryption_key_id),
  51       audio_codec_(audio_codec),
  52       parser_(kWebMIdCluster, this),
  53       last_block_timecode_(-1),
  54       block_data_size_(-1),
  55       block_duration_(-1),
  56       block_add_id_(-1),
  57       block_additional_data_size_(0),
  58       discard_padding_(-1),
  59       cluster_timecode_(-1),
  60       cluster_start_time_(kNoTimestamp()),
  61       cluster_ended_(false),
  62       audio_(audio_track_num, false, audio_default_duration, media_log),
  63       video_(video_track_num, true, video_default_duration, media_log),
  64       ready_buffer_upper_bound_(kNoDecodeTimestamp()),
  65       media_log_(media_log) {
  66   for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
  67        it != text_tracks.end();
  68        ++it) {
  69     text_track_map_.insert(std::make_pair(
  70         it->first, Track(it->first, false, kNoTimestamp(), media_log_)));
  71   }
  72 }
  73
  74 WebMClusterParser::~WebMClusterParser() {}
  75
  76 void WebMClusterParser::Reset() {
  77   last_block_timecode_ = -1;
  78   cluster_timecode_ = -1;
  79   cluster_start_time_ = kNoTimestamp();
  80   cluster_ended_ = false;
  81   parser_.Reset();
  82   audio_.Reset();
  83   video_.Reset();
  84   ResetTextTracks();
  85   ready_buffer_upper_bound_ = kNoDecodeTimestamp();
  86 }
  87
  88 int WebMClusterParser::Parse(const uint8_t* buf, int size) {
  89   audio_.ClearReadyBuffers();
  90   video_.ClearReadyBuffers();
  91   ClearTextTrackReadyBuffers();
  92   ready_buffer_upper_bound_ = kNoDecodeTimestamp();
  93
  94   int result = parser_.Parse(buf, size);
  95
  96   if (result < 0) {
  97     cluster_ended_ = false;
  98     return result;
  99   }
 100
 101   cluster_ended_ = parser_.IsParsingComplete();
 102   if (cluster_ended_) {
 103     // If there were no buffers in this cluster, set the cluster start time to
 104     // be the |cluster_timecode_|.
 105     if (cluster_start_time_ == kNoTimestamp()) {
 106       // If the cluster did not even have a |cluster_timecode_|, signal parse
 107       // error.
 108       if (cluster_timecode_ < 0)
 109         return -1;
 110
 111       cluster_start_time_ = base::TimeDelta::FromMicroseconds(
 112           cluster_timecode_ * timecode_multiplier_);
 113     }
 114
 115     // Reset the parser if we're done parsing so that
 116     // it is ready to accept another cluster on the next
 117     // call.
 118     parser_.Reset();
 119
 120     last_block_timecode_ = -1;
 121     cluster_timecode_ = -1;
 122   }
 123
 124   return result;
 125 }
 126
 127 const WebMClusterParser::BufferQueue& WebMClusterParser::GetAudioBuffers() {
 128   if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
 129     UpdateReadyBuffers();
 130
 131   return audio_.ready_buffers();
 132 }
 133
 134 const WebMClusterParser::BufferQueue& WebMClusterParser::GetVideoBuffers() {
 135   if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
 136     UpdateReadyBuffers();
 137
 138   return video_.ready_buffers();
 139 }
 140
 141 const WebMClusterParser::TextBufferQueueMap&
 142 WebMClusterParser::GetTextBuffers() {
 143   if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
 144     UpdateReadyBuffers();
 145
 146   // Translate our |text_track_map_| into |text_buffers_map_|, inserting rows in
 147   // the output only for non-empty ready_buffer() queues in |text_track_map_|.
 148   text_buffers_map_.clear();
 149   for (TextTrackMap::const_iterator itr = text_track_map_.begin();
 150        itr != text_track_map_.end();
 151        ++itr) {
 152     const BufferQueue& text_buffers = itr->second.ready_buffers();
 153     if (!text_buffers.empty())
 154       text_buffers_map_.insert(std::make_pair(itr->first, text_buffers));
 155   }
 156
 157   return text_buffers_map_;
 158 }
 159
 160 base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(
 161     const uint8_t* data,
 162     int size) {
 163
 164   // Duration is currently read assuming the *entire* stream is unencrypted.
 165   // The special "Signal Byte" prepended to Blocks in encrypted streams is
 166   // assumed to not be present.
 167   // TODO(chcunningham): Consider parsing "Signal Byte" for encrypted streams
 168   // to return duration for any unencrypted blocks.
 169
 170   if (audio_codec_ == kCodecOpus) {
 171     return ReadOpusDuration(data, size);
 172   }
 173
 174   // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See
 175   // motivations in http://crbug.com/396634.
 176
 177   return kNoTimestamp();
 178 }
 179
 180 base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
 181                                                     int size) {
 182   // Masks and constants for Opus packets. See
 183   // https://tools.ietf.org/html/rfc6716#page-14
 184   static const uint8_t kTocConfigMask = 0xf8;
 185   static const uint8_t kTocFrameCountCodeMask = 0x03;
 186   static const uint8_t kFrameCountMask = 0x3f;
 187   static const base::TimeDelta kPacketDurationMax =
 188       base::TimeDelta::FromMilliseconds(120);
 189
 190   if (size < 1) {
 191     LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 192                       kMaxDurationErrorLogs)
 193         << "Invalid zero-byte Opus packet; demuxed block duration may be "
 194            "imprecise.";
 195     return kNoTimestamp();
 196   }
 197
 198   // Frame count type described by last 2 bits of Opus TOC byte.
 199   int frame_count_type = data[0] & kTocFrameCountCodeMask;
 200
 201   int frame_count = 0;
 202   switch (frame_count_type) {
 203     case 0:
 204       frame_count = 1;
 205       break;
 206     case 1:
 207     case 2:
 208       frame_count = 2;
 209       break;
 210     case 3:
 211       // Type 3 indicates an arbitrary frame count described in the next byte.
 212       if (size < 2) {
 213         LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 214                           kMaxDurationErrorLogs)
 215             << "Second byte missing from 'Code 3' Opus packet; demuxed block "
 216                "duration may be imprecise.";
 217         return kNoTimestamp();
 218       }
 219
 220       frame_count = data[1] & kFrameCountMask;
 221
 222       if (frame_count == 0) {
 223         LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 224                           kMaxDurationErrorLogs)
 225             << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
 226                "block duration may be imprecise.";
 227         return kNoTimestamp();
 228       }
 229
 230       break;
 231     default:
 232       LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 233                         kMaxDurationErrorLogs)
 234           << "Unexpected Opus frame count type: " << frame_count_type << "; "
 235           << "demuxed block duration may be imprecise.";
 236       return kNoTimestamp();
 237   }
 238
 239   int opusConfig = (data[0] & kTocConfigMask) >> 3;
 240   CHECK_GE(opusConfig, 0);
 241   CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
 242
 243   DCHECK_GT(frame_count, 0);
 244   base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
 245       kOpusFrameDurationsMu[opusConfig] * frame_count);
 246
 247   if (duration > kPacketDurationMax) {
 248     // Intentionally allowing packet to pass through for now. Decoder should
 249     // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case
 250     // things go sideways.
 251     LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 252                       kMaxDurationErrorLogs)
 253         << "Warning, demuxed Opus packet with encoded duration: " << duration
 254         << ". Should be no greater than " << kPacketDurationMax;
 255   }
 256
 257   return duration;
 258 }
 259
 260 WebMParserClient* WebMClusterParser::OnListStart(int id) {
 261   if (id == kWebMIdCluster) {
 262     cluster_timecode_ = -1;
 263     cluster_start_time_ = kNoTimestamp();
 264   } else if (id == kWebMIdBlockGroup) {
 265     block_data_.reset();
 266     block_data_size_ = -1;
 267     block_duration_ = -1;
 268     discard_padding_ = -1;
 269     discard_padding_set_ = false;
 270   } else if (id == kWebMIdBlockAdditions) {
 271     block_add_id_ = -1;
 272     block_additional_data_.reset();
 273     block_additional_data_size_ = 0;
 274   }
 275
 276   return this;
 277 }
 278
 279 bool WebMClusterParser::OnListEnd(int id) {
 280   if (id != kWebMIdBlockGroup)
 281     return true;
 282
 283   // Make sure the BlockGroup actually had a Block.
 284   if (block_data_size_ == -1) {
 285     MEDIA_LOG(ERROR, media_log_) << "Block missing from BlockGroup.";
 286     return false;
 287   }
 288
 289   bool result = ParseBlock(false, block_data_.get(), block_data_size_,
 290                            block_additional_data_.get(),
 291                            block_additional_data_size_, block_duration_,
 292                            discard_padding_set_ ? discard_padding_ : 0);
 293   block_data_.reset();
 294   block_data_size_ = -1;
 295   block_duration_ = -1;
 296   block_add_id_ = -1;
 297   block_additional_data_.reset();
 298   block_additional_data_size_ = 0;
 299   discard_padding_ = -1;
 300   discard_padding_set_ = false;
 301   return result;
 302 }
 303
 304 bool WebMClusterParser::OnUInt(int id, int64 val) {
 305   int64* dst;
 306   switch (id) {
 307     case kWebMIdTimecode:
 308       dst = &cluster_timecode_;
 309       break;
 310     case kWebMIdBlockDuration:
 311       dst = &block_duration_;
 312       break;
 313     case kWebMIdBlockAddID:
 314       dst = &block_add_id_;
 315       break;
 316     default:
 317       return true;
 318   }
 319   if (*dst != -1)
 320     return false;
 321   *dst = val;
 322   return true;
 323 }
 324
 325 bool WebMClusterParser::ParseBlock(bool is_simple_block,
 326                                    const uint8_t* buf,
 327                                    int size,
 328                                    const uint8_t* additional,
 329                                    int additional_size,
 330                                    int duration,
 331                                    int64 discard_padding) {
 332   if (size < 4)
 333     return false;
 334
 335   // Return an error if the trackNum > 127. We just aren't
 336   // going to support large track numbers right now.
 337   if (!(buf[0] & 0x80)) {
 338     MEDIA_LOG(ERROR, media_log_) << "TrackNumber over 127 not supported";
 339     return false;
 340   }
 341
 342   int track_num = buf[0] & 0x7f;
 343   int timecode = buf[1] << 8 | buf[2];
 344   int flags = buf[3] & 0xff;
 345   int lacing = (flags >> 1) & 0x3;
 346
 347   if (lacing) {
 348     MEDIA_LOG(ERROR, media_log_) << "Lacing " << lacing
 349                                  << " is not supported yet.";
 350     return false;
 351   }
 352
 353   // Sign extend negative timecode offsets.
 354   if (timecode & 0x8000)
 355     timecode |= ~0xffff;
 356
 357   const uint8_t* frame_data = buf + 4;
 358   int frame_size = size - (frame_data - buf);
 359   return OnBlock(is_simple_block, track_num, timecode, duration, flags,
 360                  frame_data, frame_size, additional, additional_size,
 361                  discard_padding);
 362 }
 363
 364 bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) {
 365   switch (id) {
 366     case kWebMIdSimpleBlock:
 367       return ParseBlock(true, data, size, NULL, 0, -1, 0);
 368
 369     case kWebMIdBlock:
 370       if (block_data_) {
 371         MEDIA_LOG(ERROR, media_log_)
 372             << "More than 1 Block in a BlockGroup is not "
 373                "supported.";
 374         return false;
 375       }
 376       block_data_.reset(new uint8_t[size]);
 377       memcpy(block_data_.get(), data, size);
 378       block_data_size_ = size;
 379       return true;
 380
 381     case kWebMIdBlockAdditional: {
 382       uint64 block_add_id = base::HostToNet64(block_add_id_);
 383       if (block_additional_data_) {
 384         // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
 385         // as per matroska spec. But for now we don't have a use case to
 386         // support parsing of such files. Take a look at this again when such a
 387         // case arises.
 388         MEDIA_LOG(ERROR, media_log_) << "More than 1 BlockAdditional in a "
 389                                         "BlockGroup is not supported.";
 390         return false;
 391       }
 392       // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
 393       // element's value in Big Endian format. This is done to mimic ffmpeg
 394       // demuxer's behavior.
 395       block_additional_data_size_ = size + sizeof(block_add_id);
 396       block_additional_data_.reset(new uint8_t[block_additional_data_size_]);
 397       memcpy(block_additional_data_.get(), &block_add_id,
 398              sizeof(block_add_id));
 399       memcpy(block_additional_data_.get() + 8, data, size);
 400       return true;
 401     }
 402     case kWebMIdDiscardPadding: {
 403       if (discard_padding_set_ || size <= 0 || size > 8)
 404         return false;
 405       discard_padding_set_ = true;
 406
 407       // Read in the big-endian integer.
 408       discard_padding_ = static_cast<int8>(data[0]);
 409       for (int i = 1; i < size; ++i)
 410         discard_padding_ = (discard_padding_ << 8) | data[i];
 411
 412       return true;
 413     }
 414     default:
 415       return true;
 416   }
 417 }
 418
 419 bool WebMClusterParser::OnBlock(bool is_simple_block,
 420                                 int track_num,
 421                                 int timecode,
 422                                 int block_duration,
 423                                 int flags,
 424                                 const uint8_t* data,
 425                                 int size,
 426                                 const uint8_t* additional,
 427                                 int additional_size,
 428                                 int64 discard_padding) {
 429   DCHECK_GE(size, 0);
 430   if (cluster_timecode_ == -1) {
 431     MEDIA_LOG(ERROR, media_log_) << "Got a block before cluster timecode.";
 432     return false;
 433   }
 434
 435   // TODO(acolwell): Should relative negative timecode offsets be rejected?  Or
 436   // only when the absolute timecode is negative?  See http://crbug.com/271794
 437   if (timecode < 0) {
 438     MEDIA_LOG(ERROR, media_log_) << "Got a block with negative timecode offset "
 439                                  << timecode;
 440     return false;
 441   }
 442
 443   if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
 444     MEDIA_LOG(ERROR, media_log_)
 445         << "Got a block with a timecode before the previous block.";
 446     return false;
 447   }
 448
 449   Track* track = NULL;
 450   StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
 451   std::string encryption_key_id;
 452   base::TimeDelta encoded_duration = kNoTimestamp();
 453   if (track_num == audio_.track_num()) {
 454     track = &audio_;
 455     encryption_key_id = audio_encryption_key_id_;
 456     if (encryption_key_id.empty()) {
 457       encoded_duration = TryGetEncodedAudioDuration(data, size);
 458     }
 459   } else if (track_num == video_.track_num()) {
 460     track = &video_;
 461     encryption_key_id = video_encryption_key_id_;
 462     buffer_type = DemuxerStream::VIDEO;
 463   } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
 464     return true;
 465   } else if (Track* const text_track = FindTextTrack(track_num)) {
 466     if (is_simple_block)  // BlockGroup is required for WebVTT cues
 467       return false;
 468     if (block_duration < 0)  // not specified
 469       return false;
 470     track = text_track;
 471     buffer_type = DemuxerStream::TEXT;
 472   } else {
 473     MEDIA_LOG(ERROR, media_log_) << "Unexpected track number " << track_num;
 474     return false;
 475   }
 476
 477   last_block_timecode_ = timecode;
 478
 479   base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
 480       (cluster_timecode_ + timecode) * timecode_multiplier_);
 481
 482   scoped_refptr<StreamParserBuffer> buffer;
 483   if (buffer_type != DemuxerStream::TEXT) {
 484     // The first bit of the flags is set when a SimpleBlock contains only
 485     // keyframes. If this is a Block, then inspection of the payload is
 486     // necessary to determine whether it contains a keyframe or not.
 487     // http://www.matroska.org/technical/specs/index.html
 488     bool is_keyframe =
 489         is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
 490
 491     // Every encrypted Block has a signal byte and IV prepended to it. Current
 492     // encrypted WebM request for comments specification is here
 493     // http://wiki.webmproject.org/encryption/webm-encryption-rfc
 494     scoped_ptr<DecryptConfig> decrypt_config;
 495     int data_offset = 0;
 496     if (!encryption_key_id.empty() &&
 497         !WebMCreateDecryptConfig(
 498              data, size,
 499              reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
 500              encryption_key_id.size(),
 501              &decrypt_config, &data_offset)) {
 502       return false;
 503     }
 504
 505     // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
 506     // type with remapped bytestream track numbers and allow multiple tracks as
 507     // applicable. See https://crbug.com/341581.
 508     buffer = StreamParserBuffer::CopyFrom(
 509         data + data_offset, size - data_offset,
 510         additional, additional_size,
 511         is_keyframe, buffer_type, track_num);
 512
 513     if (decrypt_config)
 514       buffer->set_decrypt_config(decrypt_config.Pass());
 515   } else {
 516     std::string id, settings, content;
 517     WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
 518
 519     std::vector<uint8_t> side_data;
 520     MakeSideData(id.begin(), id.end(),
 521                  settings.begin(), settings.end(),
 522                  &side_data);
 523
 524     // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
 525     // type with remapped bytestream track numbers and allow multiple tracks as
 526     // applicable. See https://crbug.com/341581.
 527     buffer = StreamParserBuffer::CopyFrom(
 528         reinterpret_cast<const uint8_t*>(content.data()),
 529         content.length(),
 530         &side_data[0],
 531         side_data.size(),
 532         true, buffer_type, track_num);
 533   }
 534
 535   buffer->set_timestamp(timestamp);
 536   if (cluster_start_time_ == kNoTimestamp())
 537     cluster_start_time_ = timestamp;
 538
 539   base::TimeDelta block_duration_time_delta = kNoTimestamp();
 540   if (block_duration >= 0) {
 541     block_duration_time_delta = base::TimeDelta::FromMicroseconds(
 542         block_duration * timecode_multiplier_);
 543   }
 544
 545   // Prefer encoded duration over BlockGroup->BlockDuration or
 546   // TrackEntry->DefaultDuration when available. This layering violation is a
 547   // workaround for http://crbug.com/396634, decreasing the likelihood of
 548   // fall-back to rough estimation techniques for Blocks that lack a
 549   // BlockDuration at the end of a cluster. Cross cluster durations are not
 550   // feasible given flexibility of cluster ordering and MSE APIs. Duration
 551   // estimation may still apply in cases of encryption and codecs for which
 552   // we do not extract encoded duration. Within a cluster, estimates are applied
 553   // as Block Timecode deltas, or once the whole cluster is parsed in the case
 554   // of the last Block in the cluster. See Track::AddBuffer and
 555   // ApplyDurationEstimateIfNeeded().
 556   if (encoded_duration != kNoTimestamp()) {
 557     DCHECK(encoded_duration != kInfiniteDuration());
 558     DCHECK(encoded_duration > base::TimeDelta());
 559     buffer->set_duration(encoded_duration);
 560
 561     DVLOG(3) << __FUNCTION__ << " : "
 562              << "Using encoded duration " << encoded_duration.InSecondsF();
 563
 564     if (block_duration_time_delta != kNoTimestamp()) {
 565       base::TimeDelta duration_difference =
 566           block_duration_time_delta - encoded_duration;
 567
 568       const auto kWarnDurationDiff =
 569           base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
 570       if (duration_difference.magnitude() > kWarnDurationDiff) {
 571         LIMITED_MEDIA_LOG(DEBUG, media_log_, num_duration_errors_,
 572                           kMaxDurationErrorLogs)
 573             << "BlockDuration "
 574             << "(" << block_duration_time_delta << ") "
 575             << "differs significantly from encoded duration "
 576             << "(" << encoded_duration << ").";
 577       }
 578     }
 579   } else if (block_duration_time_delta != kNoTimestamp()) {
 580     buffer->set_duration(block_duration_time_delta);
 581   } else {
 582     DCHECK_NE(buffer_type, DemuxerStream::TEXT);
 583     buffer->set_duration(track->default_duration());
 584   }
 585
 586   if (discard_padding != 0) {
 587     buffer->set_discard_padding(std::make_pair(
 588         base::TimeDelta(),
 589         base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
 590   }
 591
 592   return track->AddBuffer(buffer);
 593 }
 594
 595 WebMClusterParser::Track::Track(int track_num,
 596                                 bool is_video,
 597                                 base::TimeDelta default_duration,
 598                                 const scoped_refptr<MediaLog>& media_log)
 599     : num_duration_estimates_(0),
 600       track_num_(track_num),
 601       is_video_(is_video),
 602       default_duration_(default_duration),
 603       estimated_next_frame_duration_(kNoTimestamp()),
 604       media_log_(media_log) {
 605   DCHECK(default_duration_ == kNoTimestamp() ||
 606          default_duration_ > base::TimeDelta());
 607 }
 608
 609 WebMClusterParser::Track::~Track() {}
 610
 611 DecodeTimestamp WebMClusterParser::Track::GetReadyUpperBound() {
 612   DCHECK(ready_buffers_.empty());
 613   if (last_added_buffer_missing_duration_.get())
 614     return last_added_buffer_missing_duration_->GetDecodeTimestamp();
 615
 616   return DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
 617 }
 618
 619 void WebMClusterParser::Track::ExtractReadyBuffers(
 620     const DecodeTimestamp before_timestamp) {
 621   DCHECK(ready_buffers_.empty());
 622   DCHECK(DecodeTimestamp() <= before_timestamp);
 623   DCHECK(kNoDecodeTimestamp() != before_timestamp);
 624
 625   if (buffers_.empty())
 626     return;
 627
 628   if (buffers_.back()->GetDecodeTimestamp() < before_timestamp) {
 629     // All of |buffers_| are ready.
 630     ready_buffers_.swap(buffers_);
 631     DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " All "
 632              << ready_buffers_.size() << " are ready: before upper bound ts "
 633              << before_timestamp.InSecondsF();
 634     return;
 635   }
 636
 637   // Not all of |buffers_| are ready yet. Move any that are ready to
 638   // |ready_buffers_|.
 639   while (true) {
 640     const scoped_refptr<StreamParserBuffer>& buffer = buffers_.front();
 641     if (buffer->GetDecodeTimestamp() >= before_timestamp)
 642       break;
 643     ready_buffers_.push_back(buffer);
 644     buffers_.pop_front();
 645     DCHECK(!buffers_.empty());
 646   }
 647
 648   DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " Only "
 649            << ready_buffers_.size() << " ready, " << buffers_.size()
 650            << " at or after upper bound ts " << before_timestamp.InSecondsF();
 651 }
 652
 653 bool WebMClusterParser::Track::AddBuffer(
 654     const scoped_refptr<StreamParserBuffer>& buffer) {
 655   DVLOG(2) << "AddBuffer() : " << track_num_
 656            << " ts " << buffer->timestamp().InSecondsF()
 657            << " dur " << buffer->duration().InSecondsF()
 658            << " kf " << buffer->is_key_frame()
 659            << " size " << buffer->data_size();
 660
 661   if (last_added_buffer_missing_duration_.get()) {
 662     base::TimeDelta derived_duration =
 663         buffer->timestamp() - last_added_buffer_missing_duration_->timestamp();
 664     last_added_buffer_missing_duration_->set_duration(derived_duration);
 665
 666     DVLOG(2) << "AddBuffer() : applied derived duration to held-back buffer : "
 667              << " ts "
 668              << last_added_buffer_missing_duration_->timestamp().InSecondsF()
 669              << " dur "
 670              << last_added_buffer_missing_duration_->duration().InSecondsF()
 671              << " kf " << last_added_buffer_missing_duration_->is_key_frame()
 672              << " size " << last_added_buffer_missing_duration_->data_size();
 673     scoped_refptr<StreamParserBuffer> updated_buffer =
 674         last_added_buffer_missing_duration_;
 675     last_added_buffer_missing_duration_ = NULL;
 676     if (!QueueBuffer(updated_buffer))
 677       return false;
 678   }
 679
 680   if (buffer->duration() == kNoTimestamp()) {
 681     last_added_buffer_missing_duration_ = buffer;
 682     DVLOG(2) << "AddBuffer() : holding back buffer that is missing duration";
 683     return true;
 684   }
 685
 686   return QueueBuffer(buffer);
 687 }
 688
 689 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
 690   if (!last_added_buffer_missing_duration_.get())
 691     return;
 692
 693   base::TimeDelta estimated_duration = GetDurationEstimate();
 694   last_added_buffer_missing_duration_->set_duration(estimated_duration);
 695
 696   if (is_video_) {
 697     // Exposing estimation so splicing/overlap frame processing can make
 698     // informed decisions downstream.
 699     // TODO(chcunningham): Set this for audio as well in later change where
 700     // audio is switched to max estimation and splicing is disabled.
 701     last_added_buffer_missing_duration_->set_is_duration_estimated(true);
 702   }
 703
 704   LIMITED_MEDIA_LOG(INFO, media_log_, num_duration_estimates_,
 705                     kMaxDurationEstimateLogs)
 706       << "Estimating WebM block duration to be " << estimated_duration << " "
 707       << "for the last (Simple)Block in the Cluster for this Track. Use "
 708       << "BlockGroups with BlockDurations at the end of each Track in a "
 709       << "Cluster to avoid estimation.";
 710
 711   DVLOG(2) << __FUNCTION__ << " new dur : ts "
 712            << last_added_buffer_missing_duration_->timestamp().InSecondsF()
 713            << " dur "
 714            << last_added_buffer_missing_duration_->duration().InSecondsF()
 715            << " kf " << last_added_buffer_missing_duration_->is_key_frame()
 716            << " size " << last_added_buffer_missing_duration_->data_size();
 717
 718   // Don't use the applied duration as a future estimation (don't use
 719   // QueueBuffer() here.)
 720   buffers_.push_back(last_added_buffer_missing_duration_);
 721   last_added_buffer_missing_duration_ = NULL;
 722 }
 723
 724 void WebMClusterParser::Track::ClearReadyBuffers() {
 725   // Note that |buffers_| are kept and |estimated_next_frame_duration_| is not
 726   // reset here.
 727   ready_buffers_.clear();
 728 }
 729
 730 void WebMClusterParser::Track::Reset() {
 731   ClearReadyBuffers();
 732   buffers_.clear();
 733   last_added_buffer_missing_duration_ = NULL;
 734 }
 735
 736 bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
 737   // For now, assume that all blocks are keyframes for datatypes other than
 738   // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
 739   if (!is_video_)
 740     return true;
 741
 742   // Make sure the block is big enough for the minimal keyframe header size.
 743   if (size < 7)
 744     return false;
 745
 746   // The LSb of the first byte must be a 0 for a keyframe.
 747   // http://tools.ietf.org/html/rfc6386 Section 19.1
 748   if ((data[0] & 0x01) != 0)
 749     return false;
 750
 751   // Verify VP8 keyframe startcode.
 752   // http://tools.ietf.org/html/rfc6386 Section 19.1
 753   if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
 754     return false;
 755
 756   return true;
 757 }
 758
 759 bool WebMClusterParser::Track::QueueBuffer(
 760     const scoped_refptr<StreamParserBuffer>& buffer) {
 761   DCHECK(!last_added_buffer_missing_duration_.get());
 762
 763   // WebMClusterParser::OnBlock() gives MEDIA_LOG and parse error on decreasing
 764   // block timecode detection within a cluster. Therefore, we should not see
 765   // those here.
 766   DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ?
 767       DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();
 768   CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());
 769
 770   base::TimeDelta duration = buffer->duration();
 771   if (duration < base::TimeDelta() || duration == kNoTimestamp()) {
 772     MEDIA_LOG(ERROR, media_log_)
 773         << "Invalid buffer duration: " << duration.InSecondsF();
 774     return false;
 775   }
 776
 777   // The estimated frame duration is the minimum (for audio) or the maximum
 778   // (for video) non-zero duration since the last initialization segment. The
 779   // minimum is used for audio to ensure frame durations aren't overestimated,
 780   // triggering unnecessary frame splicing. For video, splicing does not apply,
 781   // so maximum is used and overlap is simply resolved by showing the
 782   // later of the overlapping frames at its given PTS, effectively trimming down
 783   // the over-estimated duration of the previous frame.
 784   // TODO(chcunningham): Use max for audio and disable splicing whenever
 785   // estimated buffers are encountered.
 786   if (duration > base::TimeDelta()) {
 787     base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_;
 788     if (estimated_next_frame_duration_ == kNoTimestamp()) {
 789       estimated_next_frame_duration_ = duration;
 790     } else if (is_video_) {
 791       estimated_next_frame_duration_ =
 792           std::max(duration, estimated_next_frame_duration_);
 793     } else {
 794       estimated_next_frame_duration_ =
 795           std::min(duration, estimated_next_frame_duration_);
 796     }
 797
 798     if (orig_duration_estimate != estimated_next_frame_duration_) {
 799       DVLOG(3) << "Updated duration estimate:"
 800                << orig_duration_estimate
 801                << " -> "
 802                << estimated_next_frame_duration_
 803                << " at timestamp: "
 804                << buffer->GetDecodeTimestamp().InSecondsF();
 805     }
 806   }
 807
 808   buffers_.push_back(buffer);
 809   return true;
 810 }
 811
 812 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {
 813   base::TimeDelta duration = estimated_next_frame_duration_;
 814   if (duration != kNoTimestamp()) {
 815     DVLOG(3) << __FUNCTION__ << " : using estimated duration";
 816   } else {
 817     DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration";
 818     if (is_video_) {
 819       duration = base::TimeDelta::FromMilliseconds(
 820           kDefaultVideoBufferDurationInMs);
 821     } else {
 822       duration = base::TimeDelta::FromMilliseconds(
 823           kDefaultAudioBufferDurationInMs);
 824     }
 825   }
 826
 827   DCHECK(duration > base::TimeDelta());
 828   DCHECK(duration != kNoTimestamp());
 829   return duration;
 830 }
 831
 832 void WebMClusterParser::ClearTextTrackReadyBuffers() {
 833   text_buffers_map_.clear();
 834   for (TextTrackMap::iterator it = text_track_map_.begin();
 835        it != text_track_map_.end();
 836        ++it) {
 837     it->second.ClearReadyBuffers();
 838   }
 839 }
 840
 841 void WebMClusterParser::ResetTextTracks() {
 842   ClearTextTrackReadyBuffers();
 843   for (TextTrackMap::iterator it = text_track_map_.begin();
 844        it != text_track_map_.end();
 845        ++it) {
 846     it->second.Reset();
 847   }
 848 }
 849
 850 void WebMClusterParser::UpdateReadyBuffers() {
 851   DCHECK(ready_buffer_upper_bound_ == kNoDecodeTimestamp());
 852   DCHECK(text_buffers_map_.empty());
 853
 854   if (cluster_ended_) {
 855     audio_.ApplyDurationEstimateIfNeeded();
 856     video_.ApplyDurationEstimateIfNeeded();
 857     // Per OnBlock(), all text buffers should already have valid durations, so
 858     // there is no need to call ApplyDurationEstimateIfNeeded() on text tracks
 859     // here.
 860     ready_buffer_upper_bound_ =
 861         DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
 862     DCHECK(ready_buffer_upper_bound_ == audio_.GetReadyUpperBound());
 863     DCHECK(ready_buffer_upper_bound_ == video_.GetReadyUpperBound());
 864   } else {
 865     ready_buffer_upper_bound_ = std::min(audio_.GetReadyUpperBound(),
 866                                          video_.GetReadyUpperBound());
 867     DCHECK(DecodeTimestamp() <= ready_buffer_upper_bound_);
 868     DCHECK(kNoDecodeTimestamp() != ready_buffer_upper_bound_);
 869   }
 870
 871   // Prepare each track's ready buffers for retrieval.
 872   audio_.ExtractReadyBuffers(ready_buffer_upper_bound_);
 873   video_.ExtractReadyBuffers(ready_buffer_upper_bound_);
 874   for (TextTrackMap::iterator itr = text_track_map_.begin();
 875        itr != text_track_map_.end();
 876        ++itr) {
 877     itr->second.ExtractReadyBuffers(ready_buffer_upper_bound_);
 878   }
 879 }
 880
 881 WebMClusterParser::Track*
 882 WebMClusterParser::FindTextTrack(int track_num) {
 883   const TextTrackMap::iterator it = text_track_map_.find(track_num);
 884
 885   if (it == text_track_map_.end())
 886     return NULL;
 887
 888   return &it->second;
 889 }
 890
 891 }  // namespace media