media/webm/webm_cluster_parser.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/webm/webm_cluster_parser.h"
   6
   7 #include <vector>
   8
   9 #include "base/logging.h"
  10 #include "base/sys_byteorder.h"
  11 #include "media/base/buffers.h"
  12 #include "media/base/decrypt_config.h"
  13 #include "media/filters/webvtt_util.h"
  14 #include "media/webm/webm_constants.h"
  15 #include "media/webm/webm_crypto_helpers.h"
  16 #include "media/webm/webm_webvtt_parser.h"
  17
  18 namespace media {
  19
  20 WebMClusterParser::TextTrackIterator::TextTrackIterator(
  21     const TextTrackMap& text_track_map) :
  22     iterator_(text_track_map.begin()),
  23     iterator_end_(text_track_map.end()) {
  24 }
  25
  26 WebMClusterParser::TextTrackIterator::TextTrackIterator(
  27     const TextTrackIterator& rhs) :
  28     iterator_(rhs.iterator_),
  29     iterator_end_(rhs.iterator_end_) {
  30 }
  31
  32 WebMClusterParser::TextTrackIterator::~TextTrackIterator() {
  33 }
  34
  35 bool WebMClusterParser::TextTrackIterator::operator()(
  36   int* track_num,
  37   const BufferQueue** buffers) {
  38   if (iterator_ == iterator_end_) {
  39     *track_num = 0;
  40     *buffers = NULL;
  41
  42     return false;
  43   }
  44
  45   *track_num = iterator_->first;
  46   *buffers = &iterator_->second.buffers();
  47
  48   ++iterator_;
  49   return true;
  50 }
  51
  52 WebMClusterParser::WebMClusterParser(
  53     int64 timecode_scale, int audio_track_num, int video_track_num,
  54     const WebMTracksParser::TextTracks& text_tracks,
  55     const std::set<int64>& ignored_tracks,
  56     const std::string& audio_encryption_key_id,
  57     const std::string& video_encryption_key_id,
  58     const LogCB& log_cb)
  59     : timecode_multiplier_(timecode_scale / 1000.0),
  60       ignored_tracks_(ignored_tracks),
  61       audio_encryption_key_id_(audio_encryption_key_id),
  62       video_encryption_key_id_(video_encryption_key_id),
  63       parser_(kWebMIdCluster, this),
  64       last_block_timecode_(-1),
  65       block_data_size_(-1),
  66       block_duration_(-1),
  67       block_add_id_(-1),
  68       block_additional_data_size_(-1),
  69       discard_padding_(-1),
  70       cluster_timecode_(-1),
  71       cluster_start_time_(kNoTimestamp()),
  72       cluster_ended_(false),
  73       audio_(audio_track_num, false),
  74       video_(video_track_num, true),
  75       log_cb_(log_cb) {
  76   for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
  77        it != text_tracks.end();
  78        ++it) {
  79     text_track_map_.insert(std::make_pair(it->first, Track(it->first, false)));
  80   }
  81 }
  82
  83 WebMClusterParser::~WebMClusterParser() {}
  84
  85 void WebMClusterParser::Reset() {
  86   last_block_timecode_ = -1;
  87   cluster_timecode_ = -1;
  88   cluster_start_time_ = kNoTimestamp();
  89   cluster_ended_ = false;
  90   parser_.Reset();
  91   audio_.Reset();
  92   video_.Reset();
  93   ResetTextTracks();
  94 }
  95
  96 int WebMClusterParser::Parse(const uint8* buf, int size) {
  97   audio_.Reset();
  98   video_.Reset();
  99   ResetTextTracks();
 100
 101   int result = parser_.Parse(buf, size);
 102
 103   if (result < 0) {
 104     cluster_ended_ = false;
 105     return result;
 106   }
 107
 108   cluster_ended_ = parser_.IsParsingComplete();
 109   if (cluster_ended_) {
 110     // If there were no buffers in this cluster, set the cluster start time to
 111     // be the |cluster_timecode_|.
 112     if (cluster_start_time_ == kNoTimestamp()) {
 113       // If the cluster did not even have a |cluster_timecode_|, signal parse
 114       // error.
 115       if (cluster_timecode_ < 0)
 116         return -1;
 117
 118       cluster_start_time_ = base::TimeDelta::FromMicroseconds(
 119           cluster_timecode_ * timecode_multiplier_);
 120     }
 121
 122     // Reset the parser if we're done parsing so that
 123     // it is ready to accept another cluster on the next
 124     // call.
 125     parser_.Reset();
 126
 127     last_block_timecode_ = -1;
 128     cluster_timecode_ = -1;
 129   }
 130
 131   return result;
 132 }
 133
 134 WebMClusterParser::TextTrackIterator
 135 WebMClusterParser::CreateTextTrackIterator() const {
 136   return TextTrackIterator(text_track_map_);
 137 }
 138
 139 WebMParserClient* WebMClusterParser::OnListStart(int id) {
 140   if (id == kWebMIdCluster) {
 141     cluster_timecode_ = -1;
 142     cluster_start_time_ = kNoTimestamp();
 143   } else if (id == kWebMIdBlockGroup) {
 144     block_data_.reset();
 145     block_data_size_ = -1;
 146     block_duration_ = -1;
 147     discard_padding_ = -1;
 148     discard_padding_set_ = false;
 149   } else if (id == kWebMIdBlockAdditions) {
 150     block_add_id_ = -1;
 151     block_additional_data_.reset();
 152     block_additional_data_size_ = -1;
 153   }
 154
 155   return this;
 156 }
 157
 158 bool WebMClusterParser::OnListEnd(int id) {
 159   if (id != kWebMIdBlockGroup)
 160     return true;
 161
 162   // Make sure the BlockGroup actually had a Block.
 163   if (block_data_size_ == -1) {
 164     MEDIA_LOG(log_cb_) << "Block missing from BlockGroup.";
 165     return false;
 166   }
 167
 168   bool result = ParseBlock(false, block_data_.get(), block_data_size_,
 169                            block_additional_data_.get(),
 170                            block_additional_data_size_, block_duration_,
 171                            discard_padding_set_ ? discard_padding_ : 0);
 172   block_data_.reset();
 173   block_data_size_ = -1;
 174   block_duration_ = -1;
 175   block_add_id_ = -1;
 176   block_additional_data_.reset();
 177   block_additional_data_size_ = -1;
 178   discard_padding_ = -1;
 179   discard_padding_set_ = false;
 180   return result;
 181 }
 182
 183 bool WebMClusterParser::OnUInt(int id, int64 val) {
 184   int64* dst;
 185   switch (id) {
 186     case kWebMIdTimecode:
 187       dst = &cluster_timecode_;
 188       break;
 189     case kWebMIdBlockDuration:
 190       dst = &block_duration_;
 191       break;
 192     case kWebMIdBlockAddID:
 193       dst = &block_add_id_;
 194       break;
 195     case kWebMIdDiscardPadding:
 196       if (discard_padding_set_)
 197         return false;
 198       discard_padding_set_ = true;
 199       discard_padding_ = val;
 200       return true;
 201     default:
 202       return true;
 203   }
 204   if (*dst != -1)
 205     return false;
 206   *dst = val;
 207   return true;
 208 }
 209
 210 bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf,
 211                                    int size, const uint8* additional,
 212                                    int additional_size, int duration,
 213                                    int64 discard_padding) {
 214   if (size < 4)
 215     return false;
 216
 217   // Return an error if the trackNum > 127. We just aren't
 218   // going to support large track numbers right now.
 219   if (!(buf[0] & 0x80)) {
 220     MEDIA_LOG(log_cb_) << "TrackNumber over 127 not supported";
 221     return false;
 222   }
 223
 224   int track_num = buf[0] & 0x7f;
 225   int timecode = buf[1] << 8 | buf[2];
 226   int flags = buf[3] & 0xff;
 227   int lacing = (flags >> 1) & 0x3;
 228
 229   if (lacing) {
 230     MEDIA_LOG(log_cb_) << "Lacing " << lacing << " is not supported yet.";
 231     return false;
 232   }
 233
 234   // Sign extend negative timecode offsets.
 235   if (timecode & 0x8000)
 236     timecode |= ~0xffff;
 237
 238   const uint8* frame_data = buf + 4;
 239   int frame_size = size - (frame_data - buf);
 240   return OnBlock(is_simple_block, track_num, timecode, duration, flags,
 241                  frame_data, frame_size, additional, additional_size,
 242                  discard_padding);
 243 }
 244
 245 bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
 246   switch (id) {
 247     case kWebMIdSimpleBlock:
 248       return ParseBlock(true, data, size, NULL, -1, -1, 0);
 249
 250     case kWebMIdBlock:
 251       if (block_data_) {
 252         MEDIA_LOG(log_cb_) << "More than 1 Block in a BlockGroup is not "
 253                               "supported.";
 254         return false;
 255       }
 256       block_data_.reset(new uint8[size]);
 257       memcpy(block_data_.get(), data, size);
 258       block_data_size_ = size;
 259       return true;
 260
 261     case kWebMIdBlockAdditional: {
 262       uint64 block_add_id = base::HostToNet64(block_add_id_);
 263       if (block_additional_data_) {
 264         // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
 265         // as per matroska spec. But for now we don't have a use case to
 266         // support parsing of such files. Take a look at this again when such a
 267         // case arises.
 268         MEDIA_LOG(log_cb_) << "More than 1 BlockAdditional in a BlockGroup is "
 269                               "not supported.";
 270         return false;
 271       }
 272       // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
 273       // element's value in Big Endian format. This is done to mimic ffmpeg
 274       // demuxer's behavior.
 275       block_additional_data_size_ = size + sizeof(block_add_id);
 276       block_additional_data_.reset(new uint8[block_additional_data_size_]);
 277       memcpy(block_additional_data_.get(), &block_add_id,
 278              sizeof(block_add_id));
 279       memcpy(block_additional_data_.get() + 8, data, size);
 280       return true;
 281     }
 282
 283     default:
 284       return true;
 285   }
 286 }
 287
 288 bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
 289                                 int timecode,
 290                                 int  block_duration,
 291                                 int flags,
 292                                 const uint8* data, int size,
 293                                 const uint8* additional, int additional_size,
 294                                 int64 discard_padding) {
 295   DCHECK_GE(size, 0);
 296   if (cluster_timecode_ == -1) {
 297     MEDIA_LOG(log_cb_) << "Got a block before cluster timecode.";
 298     return false;
 299   }
 300
 301   // TODO(acolwell): Should relative negative timecode offsets be rejected?  Or
 302   // only when the absolute timecode is negative?  See http://crbug.com/271794
 303   if (timecode < 0) {
 304     MEDIA_LOG(log_cb_) << "Got a block with negative timecode offset "
 305                        << timecode;
 306     return false;
 307   }
 308
 309   if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
 310     MEDIA_LOG(log_cb_)
 311         << "Got a block with a timecode before the previous block.";
 312     return false;
 313   }
 314
 315   Track* track = NULL;
 316   bool is_text = false;
 317   std::string encryption_key_id;
 318   if (track_num == audio_.track_num()) {
 319     track = &audio_;
 320     encryption_key_id = audio_encryption_key_id_;
 321   } else if (track_num == video_.track_num()) {
 322     track = &video_;
 323     encryption_key_id = video_encryption_key_id_;
 324   } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
 325     return true;
 326   } else if (Track* const text_track = FindTextTrack(track_num)) {
 327     if (is_simple_block)  // BlockGroup is required for WebVTT cues
 328       return false;
 329     if (block_duration < 0)  // not specified
 330       return false;
 331     track = text_track;
 332     is_text = true;
 333   } else {
 334     MEDIA_LOG(log_cb_) << "Unexpected track number " << track_num;
 335     return false;
 336   }
 337
 338   last_block_timecode_ = timecode;
 339
 340   base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
 341       (cluster_timecode_ + timecode) * timecode_multiplier_);
 342
 343   scoped_refptr<StreamParserBuffer> buffer;
 344   if (!is_text) {
 345     // The first bit of the flags is set when a SimpleBlock contains only
 346     // keyframes. If this is a Block, then inspection of the payload is
 347     // necessary to determine whether it contains a keyframe or not.
 348     // http://www.matroska.org/technical/specs/index.html
 349     bool is_keyframe =
 350         is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
 351
 352     // Every encrypted Block has a signal byte and IV prepended to it. Current
 353     // encrypted WebM request for comments specification is here
 354     // http://wiki.webmproject.org/encryption/webm-encryption-rfc
 355     scoped_ptr<DecryptConfig> decrypt_config;
 356     int data_offset = 0;
 357     if (!encryption_key_id.empty() &&
 358         !WebMCreateDecryptConfig(
 359              data, size,
 360              reinterpret_cast<const uint8*>(encryption_key_id.data()),
 361              encryption_key_id.size(),
 362              &decrypt_config, &data_offset)) {
 363       return false;
 364     }
 365
 366     buffer = StreamParserBuffer::CopyFrom(
 367         data + data_offset, size - data_offset,
 368         additional, additional_size,
 369         is_keyframe);
 370
 371     if (decrypt_config)
 372       buffer->set_decrypt_config(decrypt_config.Pass());
 373   } else {
 374     std::string id, settings, content;
 375     WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
 376
 377     std::vector<uint8> side_data;
 378     MakeSideData(id.begin(), id.end(),
 379                  settings.begin(), settings.end(),
 380                  &side_data);
 381
 382     buffer = StreamParserBuffer::CopyFrom(
 383         reinterpret_cast<const uint8*>(content.data()),
 384         content.length(),
 385         &side_data[0],
 386         side_data.size(),
 387         true);
 388   }
 389
 390   buffer->set_timestamp(timestamp);
 391   if (cluster_start_time_ == kNoTimestamp())
 392     cluster_start_time_ = timestamp;
 393
 394   if (block_duration >= 0) {
 395     buffer->set_duration(base::TimeDelta::FromMicroseconds(
 396         block_duration * timecode_multiplier_));
 397   }
 398
 399   if (discard_padding != 0) {
 400     buffer->set_discard_padding(base::TimeDelta::FromMicroseconds(
 401                                     discard_padding / 1000));
 402   }
 403
 404   return track->AddBuffer(buffer);
 405 }
 406
 407 WebMClusterParser::Track::Track(int track_num, bool is_video)
 408     : track_num_(track_num),
 409       is_video_(is_video) {
 410 }
 411
 412 WebMClusterParser::Track::~Track() {}
 413
 414 bool WebMClusterParser::Track::AddBuffer(
 415     const scoped_refptr<StreamParserBuffer>& buffer) {
 416   DVLOG(2) << "AddBuffer() : " << track_num_
 417            << " ts " << buffer->timestamp().InSecondsF()
 418            << " dur " << buffer->duration().InSecondsF()
 419            << " kf " << buffer->IsKeyframe()
 420            << " size " << buffer->data_size();
 421
 422   buffers_.push_back(buffer);
 423   return true;
 424 }
 425
 426 void WebMClusterParser::Track::Reset() {
 427   buffers_.clear();
 428 }
 429
 430 bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const {
 431   // For now, assume that all blocks are keyframes for datatypes other than
 432   // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
 433   if (!is_video_)
 434     return true;
 435
 436   // Make sure the block is big enough for the minimal keyframe header size.
 437   if (size < 7)
 438     return false;
 439
 440   // The LSb of the first byte must be a 0 for a keyframe.
 441   // http://tools.ietf.org/html/rfc6386 Section 19.1
 442   if ((data[0] & 0x01) != 0)
 443     return false;
 444
 445   // Verify VP8 keyframe startcode.
 446   // http://tools.ietf.org/html/rfc6386 Section 19.1
 447   if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
 448     return false;
 449
 450   return true;
 451 }
 452
 453 void WebMClusterParser::ResetTextTracks() {
 454   for (TextTrackMap::iterator it = text_track_map_.begin();
 455        it != text_track_map_.end();
 456        ++it) {
 457     it->second.Reset();
 458   }
 459 }
 460
 461 WebMClusterParser::Track*
 462 WebMClusterParser::FindTextTrack(int track_num) {
 463   const TextTrackMap::iterator it = text_track_map_.find(track_num);
 464
 465   if (it == text_track_map_.end())
 466     return NULL;
 467
 468   return &it->second;
 469 }
 470
 471 }  // namespace media