media/webm/webm_stream_parser.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/webm/webm_stream_parser.h"
   6
   7 #include <string>
   8
   9 #include "base/callback.h"
  10 #include "base/logging.h"
  11 #include "media/ffmpeg/ffmpeg_common.h"
  12 #include "media/filters/ffmpeg_glue.h"
  13 #include "media/filters/in_memory_url_protocol.h"
  14 #include "media/webm/webm_cluster_parser.h"
  15 #include "media/webm/webm_constants.h"
  16 #include "media/webm/webm_content_encodings.h"
  17 #include "media/webm/webm_info_parser.h"
  18 #include "media/webm/webm_tracks_parser.h"
  19
  20 namespace media {
  21
  22 // TODO(xhwang): Figure out the init data type appropriately once it's spec'ed.
  23 static const char kWebMInitDataType[] = "video/webm";
  24
  25 // Helper class that uses FFmpeg to create AudioDecoderConfig &
  26 // VideoDecoderConfig objects.
  27 //
  28 // This dependency on FFmpeg can be removed once we update WebMTracksParser
  29 // to parse the necessary data to construct AudioDecoderConfig &
  30 // VideoDecoderConfig objects. http://crbug.com/108756
  31 class FFmpegConfigHelper {
  32  public:
  33   FFmpegConfigHelper();
  34   ~FFmpegConfigHelper();
  35
  36   bool Parse(const uint8* data, int size);
  37
  38   const AudioDecoderConfig& audio_config() const;
  39   const VideoDecoderConfig& video_config() const;
  40
  41  private:
  42   static const uint8 kWebMHeader[];
  43   static const int kSegmentSizeOffset;
  44   static const uint8 kEmptyCluster[];
  45
  46   AVFormatContext* CreateFormatContext(const uint8* data, int size);
  47   bool SetupStreamConfigs();
  48
  49   AudioDecoderConfig audio_config_;
  50   VideoDecoderConfig video_config_;
  51
  52   // Backing buffer for |url_protocol_|.
  53   scoped_array<uint8> url_protocol_buffer_;
  54
  55   // Protocol used by |format_context_|. It must outlive the context object.
  56   scoped_ptr<InMemoryUrlProtocol> url_protocol_;
  57
  58   // FFmpeg format context for this demuxer. It is created by
  59   // avformat_open_input() during demuxer initialization and cleaned up with
  60   // DestroyAVFormatContext() in the destructor.
  61   AVFormatContext* format_context_;
  62
  63   DISALLOW_COPY_AND_ASSIGN(FFmpegConfigHelper);
  64 };
  65
  66 // WebM File Header. This is prepended to the INFO & TRACKS
  67 // data passed to Init() before handing it to FFmpeg. Essentially
  68 // we are making the INFO & TRACKS data look like a small WebM
  69 // file so we can use FFmpeg to initialize the AVFormatContext.
  70 const uint8 FFmpegConfigHelper::kWebMHeader[] = {
  71   0x1A, 0x45, 0xDF, 0xA3, 0x9F,  // EBML (size = 0x1f)
  72   0x42, 0x86, 0x81, 0x01,  // EBMLVersion = 1
  73   0x42, 0xF7, 0x81, 0x01,  // EBMLReadVersion = 1
  74   0x42, 0xF2, 0x81, 0x04,  // EBMLMaxIDLength = 4
  75   0x42, 0xF3, 0x81, 0x08,  // EBMLMaxSizeLength = 8
  76   0x42, 0x82, 0x84, 0x77, 0x65, 0x62, 0x6D,  // DocType = "webm"
  77   0x42, 0x87, 0x81, 0x02,  // DocTypeVersion = 2
  78   0x42, 0x85, 0x81, 0x02,  // DocTypeReadVersion = 2
  79   // EBML end
  80   0x18, 0x53, 0x80, 0x67,  // Segment
  81   0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // segment(size = 0)
  82   // INFO goes here.
  83 };
  84
  85 // Offset of the segment size field in kWebMHeader. Used to update
  86 // the segment size field before handing the buffer to FFmpeg.
  87 const int FFmpegConfigHelper::kSegmentSizeOffset = sizeof(kWebMHeader) - 8;
  88
  89 const uint8 FFmpegConfigHelper::kEmptyCluster[] = {
  90   0x1F, 0x43, 0xB6, 0x75, 0x80  // CLUSTER (size = 0)
  91 };
  92
  93 FFmpegConfigHelper::FFmpegConfigHelper() : format_context_(NULL) {}
  94
  95 FFmpegConfigHelper::~FFmpegConfigHelper() {
  96   if (!format_context_)
  97     return;
  98
  99   DestroyAVFormatContext(format_context_);
 100   format_context_ = NULL;
 101
 102   if (url_protocol_.get()) {
 103     FFmpegGlue::GetInstance()->RemoveProtocol(url_protocol_.get());
 104     url_protocol_.reset();
 105     url_protocol_buffer_.reset();
 106   }
 107 }
 108
 109 bool FFmpegConfigHelper::Parse(const uint8* data, int size) {
 110   format_context_ = CreateFormatContext(data, size);
 111   return format_context_ && SetupStreamConfigs();
 112 }
 113
 114 const AudioDecoderConfig& FFmpegConfigHelper::audio_config() const {
 115   return audio_config_;
 116 }
 117
 118 const VideoDecoderConfig& FFmpegConfigHelper::video_config() const {
 119   return video_config_;
 120 }
 121
 122 AVFormatContext* FFmpegConfigHelper::CreateFormatContext(const uint8* data,
 123                                                          int size) {
 124   DCHECK(!url_protocol_.get());
 125   DCHECK(!url_protocol_buffer_.get());
 126
 127   int segment_size = size + sizeof(kEmptyCluster);
 128   int buf_size = sizeof(kWebMHeader) + segment_size;
 129   url_protocol_buffer_.reset(new uint8[buf_size]);
 130   uint8* buf = url_protocol_buffer_.get();
 131   memcpy(buf, kWebMHeader, sizeof(kWebMHeader));
 132   memcpy(buf + sizeof(kWebMHeader), data, size);
 133   memcpy(buf + sizeof(kWebMHeader) + size, kEmptyCluster,
 134          sizeof(kEmptyCluster));
 135
 136   // Update the segment size in the buffer.
 137   int64 tmp = (segment_size & GG_LONGLONG(0x00FFFFFFFFFFFFFF)) |
 138       GG_LONGLONG(0x0100000000000000);
 139   for (int i = 0; i < 8; i++) {
 140     buf[kSegmentSizeOffset + i] = (tmp >> (8 * (7 - i))) & 0xff;
 141   }
 142
 143   url_protocol_.reset(new InMemoryUrlProtocol(buf, buf_size, true));
 144   std::string key = FFmpegGlue::GetInstance()->AddProtocol(url_protocol_.get());
 145
 146   // Open FFmpeg AVFormatContext.
 147   AVFormatContext* context = NULL;
 148   int result = avformat_open_input(&context, key.c_str(), NULL, NULL);
 149
 150   if (result < 0)
 151     return NULL;
 152
 153   return context;
 154 }
 155
 156 bool FFmpegConfigHelper::SetupStreamConfigs() {
 157   int result = avformat_find_stream_info(format_context_, NULL);
 158
 159   if (result < 0)
 160     return false;
 161
 162   bool no_supported_streams = true;
 163   for (size_t i = 0; i < format_context_->nb_streams; ++i) {
 164     AVStream* stream = format_context_->streams[i];
 165     AVCodecContext* codec_context = stream->codec;
 166     AVMediaType codec_type = codec_context->codec_type;
 167
 168     if (codec_type == AVMEDIA_TYPE_AUDIO &&
 169         stream->codec->codec_id == CODEC_ID_VORBIS &&
 170         !audio_config_.IsValidConfig()) {
 171       AVCodecContextToAudioDecoderConfig(stream->codec, &audio_config_);
 172       no_supported_streams = false;
 173       continue;
 174     }
 175
 176     if (codec_type == AVMEDIA_TYPE_VIDEO &&
 177         stream->codec->codec_id == CODEC_ID_VP8 &&
 178         !video_config_.IsValidConfig()) {
 179       AVStreamToVideoDecoderConfig(stream, &video_config_);
 180       no_supported_streams = false;
 181       continue;
 182     }
 183   }
 184
 185   return !no_supported_streams;
 186 }
 187
 188 WebMStreamParser::WebMStreamParser()
 189     : state_(kWaitingForInit),
 190       waiting_for_buffers_(false) {
 191 }
 192
 193 WebMStreamParser::~WebMStreamParser() {}
 194
 195 void WebMStreamParser::Init(const InitCB& init_cb,
 196                             const NewConfigCB& config_cb,
 197                             const NewBuffersCB& audio_cb,
 198                             const NewBuffersCB& video_cb,
 199                             const NeedKeyCB& need_key_cb,
 200                             const NewMediaSegmentCB& new_segment_cb,
 201                             const base::Closure& end_of_segment_cb) {
 202   DCHECK_EQ(state_, kWaitingForInit);
 203   DCHECK(init_cb_.is_null());
 204   DCHECK(!init_cb.is_null());
 205   DCHECK(!config_cb.is_null());
 206   DCHECK(!audio_cb.is_null() || !video_cb.is_null());
 207   DCHECK(!need_key_cb.is_null());
 208   DCHECK(!new_segment_cb.is_null());
 209   DCHECK(!end_of_segment_cb.is_null());
 210
 211   ChangeState(kParsingHeaders);
 212   init_cb_ = init_cb;
 213   config_cb_ = config_cb;
 214   audio_cb_ = audio_cb;
 215   video_cb_ = video_cb;
 216   need_key_cb_ = need_key_cb;
 217   new_segment_cb_ = new_segment_cb;
 218   end_of_segment_cb_ = end_of_segment_cb;
 219 }
 220
 221 void WebMStreamParser::Flush() {
 222   DCHECK_NE(state_, kWaitingForInit);
 223
 224   byte_queue_.Reset();
 225
 226   if (state_ != kParsingClusters)
 227     return;
 228
 229   cluster_parser_->Reset();
 230 }
 231
 232 bool WebMStreamParser::Parse(const uint8* buf, int size) {
 233   DCHECK_NE(state_, kWaitingForInit);
 234
 235   if (state_ == kError)
 236     return false;
 237
 238   byte_queue_.Push(buf, size);
 239
 240   int result = 0;
 241   int bytes_parsed = 0;
 242   const uint8* cur = NULL;
 243   int cur_size = 0;
 244
 245   byte_queue_.Peek(&cur, &cur_size);
 246   while (cur_size > 0) {
 247     State oldState = state_;
 248     switch (state_) {
 249       case kParsingHeaders:
 250         result = ParseInfoAndTracks(cur, cur_size);
 251         break;
 252
 253       case kParsingClusters:
 254         result = ParseCluster(cur, cur_size);
 255         break;
 256
 257       case kWaitingForInit:
 258       case kError:
 259         return false;
 260     }
 261
 262     if (result < 0) {
 263       ChangeState(kError);
 264       return false;
 265     }
 266
 267     if (state_ == oldState && result == 0)
 268       break;
 269
 270     DCHECK_GE(result, 0);
 271     cur += result;
 272     cur_size -= result;
 273     bytes_parsed += result;
 274   }
 275
 276   byte_queue_.Pop(bytes_parsed);
 277   return true;
 278 }
 279
 280 void WebMStreamParser::ChangeState(State new_state) {
 281   DVLOG(1) << "ChangeState() : " << state_ << " -> " << new_state;
 282   state_ = new_state;
 283 }
 284
 285 int WebMStreamParser::ParseInfoAndTracks(const uint8* data, int size) {
 286   DCHECK(data);
 287   DCHECK_GT(size, 0);
 288
 289   const uint8* cur = data;
 290   int cur_size = size;
 291   int bytes_parsed = 0;
 292
 293   int id;
 294   int64 element_size;
 295   int result = WebMParseElementHeader(cur, cur_size, &id, &element_size);
 296
 297   if (result <= 0)
 298     return result;
 299
 300   switch (id) {
 301     case kWebMIdEBMLHeader:
 302     case kWebMIdSeekHead:
 303     case kWebMIdVoid:
 304     case kWebMIdCRC32:
 305     case kWebMIdCues:
 306       if (cur_size < (result + element_size)) {
 307         // We don't have the whole element yet. Signal we need more data.
 308         return 0;
 309       }
 310       // Skip the element.
 311       return result + element_size;
 312       break;
 313     case kWebMIdSegment:
 314       // Just consume the segment header.
 315       return result;
 316       break;
 317     case kWebMIdInfo:
 318       // We've found the element we are looking for.
 319       break;
 320     default:
 321       DVLOG(1) << "Unexpected ID 0x" << std::hex << id;
 322       return -1;
 323   }
 324
 325   WebMInfoParser info_parser;
 326   result = info_parser.Parse(cur, cur_size);
 327
 328   if (result <= 0)
 329     return result;
 330
 331   cur += result;
 332   cur_size -= result;
 333   bytes_parsed += result;
 334
 335   WebMTracksParser tracks_parser;
 336   result = tracks_parser.Parse(cur, cur_size);
 337
 338   if (result <= 0)
 339     return result;
 340
 341   bytes_parsed += result;
 342
 343   base::TimeDelta duration = kInfiniteDuration();
 344
 345   if (info_parser.duration() > 0) {
 346     double mult = info_parser.timecode_scale() / 1000.0;
 347     int64 duration_in_us = info_parser.duration() * mult;
 348     duration = base::TimeDelta::FromMicroseconds(duration_in_us);
 349   }
 350
 351   FFmpegConfigHelper config_helper;
 352   if (!config_helper.Parse(data, bytes_parsed)) {
 353     DVLOG(1) << "Failed to parse config data.";
 354     return -1;
 355   }
 356
 357   bool is_audio_encrypted = !tracks_parser.audio_encryption_key_id().empty();
 358   AudioDecoderConfig audio_config;
 359   if (is_audio_encrypted) {
 360     const AudioDecoderConfig& original_audio_config =
 361         config_helper.audio_config();
 362
 363     audio_config.Initialize(original_audio_config.codec(),
 364                             original_audio_config.bits_per_channel(),
 365                             original_audio_config.channel_layout(),
 366                             original_audio_config.samples_per_second(),
 367                             original_audio_config.extra_data(),
 368                             original_audio_config.extra_data_size(),
 369                             is_audio_encrypted, false);
 370
 371     FireNeedKey(tracks_parser.audio_encryption_key_id());
 372   } else {
 373     audio_config.CopyFrom(config_helper.audio_config());
 374   }
 375
 376   // TODO(xhwang): Support decryption of audio (see http://crbug.com/123421).
 377   bool is_video_encrypted = !tracks_parser.video_encryption_key_id().empty();
 378
 379   VideoDecoderConfig video_config;
 380   if (is_video_encrypted) {
 381     const VideoDecoderConfig& original_video_config =
 382         config_helper.video_config();
 383     video_config.Initialize(original_video_config.codec(),
 384                             original_video_config.profile(),
 385                             original_video_config.format(),
 386                             original_video_config.coded_size(),
 387                             original_video_config.visible_rect(),
 388                             original_video_config.natural_size(),
 389                             original_video_config.extra_data(),
 390                             original_video_config.extra_data_size(),
 391                             is_video_encrypted, false);
 392
 393     FireNeedKey(tracks_parser.video_encryption_key_id());
 394   } else {
 395     video_config.CopyFrom(config_helper.video_config());
 396   }
 397
 398   if (!config_cb_.Run(audio_config, video_config)) {
 399     DVLOG(1) << "New config data isn't allowed.";
 400     return -1;
 401   }
 402
 403   cluster_parser_.reset(new WebMClusterParser(
 404       info_parser.timecode_scale(),
 405       tracks_parser.audio_track_num(),
 406       tracks_parser.video_track_num(),
 407       tracks_parser.audio_encryption_key_id(),
 408       tracks_parser.video_encryption_key_id()));
 409
 410   ChangeState(kParsingClusters);
 411
 412   if (!init_cb_.is_null()) {
 413     init_cb_.Run(true, duration);
 414     init_cb_.Reset();
 415   }
 416
 417   return bytes_parsed;
 418 }
 419
 420 int WebMStreamParser::ParseCluster(const uint8* data, int size) {
 421   if (!cluster_parser_.get())
 422     return -1;
 423
 424   int id;
 425   int64 element_size;
 426   int result = WebMParseElementHeader(data, size, &id, &element_size);
 427
 428   if (result <= 0)
 429     return result;
 430
 431   if (id == kWebMIdCluster)
 432     waiting_for_buffers_ = true;
 433
 434   if (id == kWebMIdCues) {
 435     if (size < (result + element_size)) {
 436       // We don't have the whole element yet. Signal we need more data.
 437       return 0;
 438     }
 439     // Skip the element.
 440     return result + element_size;
 441   }
 442
 443   if (id == kWebMIdEBMLHeader) {
 444     ChangeState(kParsingHeaders);
 445     return 0;
 446   }
 447
 448   int bytes_parsed = cluster_parser_->Parse(data, size);
 449
 450   if (bytes_parsed <= 0)
 451     return bytes_parsed;
 452
 453   const BufferQueue& audio_buffers = cluster_parser_->audio_buffers();
 454   const BufferQueue& video_buffers = cluster_parser_->video_buffers();
 455   base::TimeDelta cluster_start_time = cluster_parser_->cluster_start_time();
 456   bool cluster_ended = cluster_parser_->cluster_ended();
 457
 458   if (waiting_for_buffers_ && cluster_start_time != kNoTimestamp()) {
 459     new_segment_cb_.Run(cluster_start_time);
 460     waiting_for_buffers_ = false;
 461   }
 462
 463   if (!audio_buffers.empty() && !audio_cb_.Run(audio_buffers))
 464     return -1;
 465
 466   if (!video_buffers.empty() && !video_cb_.Run(video_buffers))
 467     return -1;
 468
 469   if (cluster_ended)
 470     end_of_segment_cb_.Run();
 471
 472   return bytes_parsed;
 473 }
 474
 475 void WebMStreamParser::FireNeedKey(const std::string& key_id) {
 476   int key_id_size = key_id.size();
 477   DCHECK_GT(key_id_size, 0);
 478   scoped_array<uint8> key_id_array(new uint8[key_id_size]);
 479   memcpy(key_id_array.get(), key_id.data(), key_id_size);
 480   need_key_cb_.Run(kWebMInitDataType, key_id_array.Pass(), key_id_size);
 481 }
 482
 483 }  // namespace media