media/filters/ffmpeg_audio_decoder.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/filters/ffmpeg_audio_decoder.h"
   6
   7 #include "base/bind.h"
   8 #include "base/callback_helpers.h"
   9 #include "base/location.h"
  10 #include "base/message_loop/message_loop_proxy.h"
  11 #include "media/base/audio_buffer.h"
  12 #include "media/base/audio_bus.h"
  13 #include "media/base/audio_decoder_config.h"
  14 #include "media/base/audio_timestamp_helper.h"
  15 #include "media/base/bind_to_loop.h"
  16 #include "media/base/decoder_buffer.h"
  17 #include "media/base/demuxer.h"
  18 #include "media/base/limits.h"
  19 #include "media/base/pipeline.h"
  20 #include "media/base/sample_format.h"
  21 #include "media/ffmpeg/ffmpeg_common.h"
  22 #include "media/filters/ffmpeg_glue.h"
  23
  24 namespace media {
  25
  26 // Helper structure for managing multiple decoded audio frames per packet.
  27 struct QueuedAudioBuffer {
  28   AudioDecoder::Status status;
  29   scoped_refptr<AudioBuffer> buffer;
  30 };
  31
  32 // Returns true if the decode result was end of stream.
  33 static inline bool IsEndOfStream(int result,
  34                                  int decoded_size,
  35                                  const scoped_refptr<DecoderBuffer>& input) {
  36   // Three conditions to meet to declare end of stream for this decoder:
  37   // 1. FFmpeg didn't read anything.
  38   // 2. FFmpeg didn't output anything.
  39   // 3. An end of stream buffer is received.
  40   return result == 0 && decoded_size == 0 && input->end_of_stream();
  41 }
  42
  43 // Return the number of channels from the data in |frame|.
  44 static inline int DetermineChannels(AVFrame* frame) {
  45 #if defined(CHROMIUM_NO_AVFRAME_CHANNELS)
  46   // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field.
  47   return av_get_channel_layout_nb_channels(frame->channel_layout);
  48 #else
  49   return frame->channels;
  50 #endif
  51 }
  52
  53 // Called by FFmpeg's allocation routine to allocate a buffer. Uses
  54 // AVCodecContext.opaque to get the object reference in order to call
  55 // GetAudioBuffer() to do the actual allocation.
  56 static int GetAudioBufferImpl(struct AVCodecContext* s,
  57                               AVFrame* frame,
  58                               int flags) {
  59   DCHECK(s->codec->capabilities & CODEC_CAP_DR1);
  60   DCHECK_EQ(s->codec_type, AVMEDIA_TYPE_AUDIO);
  61   FFmpegAudioDecoder* decoder = static_cast<FFmpegAudioDecoder*>(s->opaque);
  62   return decoder->GetAudioBuffer(s, frame, flags);
  63 }
  64
  65 // Called by FFmpeg's allocation routine to free a buffer. |opaque| is the
  66 // AudioBuffer allocated, so unref it.
  67 static void ReleaseAudioBufferImpl(void* opaque, uint8* data) {
  68   scoped_refptr<AudioBuffer> buffer;
  69   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
  70 }
  71
  72 FFmpegAudioDecoder::FFmpegAudioDecoder(
  73     const scoped_refptr<base::MessageLoopProxy>& message_loop)
  74     : message_loop_(message_loop),
  75       weak_factory_(this),
  76       demuxer_stream_(NULL),
  77       bytes_per_channel_(0),
  78       channel_layout_(CHANNEL_LAYOUT_NONE),
  79       channels_(0),
  80       samples_per_second_(0),
  81       av_sample_format_(0),
  82       last_input_timestamp_(kNoTimestamp()),
  83       output_frames_to_drop_(0) {
  84 }
  85
  86 void FFmpegAudioDecoder::Initialize(
  87     DemuxerStream* stream,
  88     const PipelineStatusCB& status_cb,
  89     const StatisticsCB& statistics_cb) {
  90   DCHECK(message_loop_->BelongsToCurrentThread());
  91   PipelineStatusCB initialize_cb = BindToCurrentLoop(status_cb);
  92
  93   FFmpegGlue::InitializeFFmpeg();
  94
  95   if (demuxer_stream_) {
  96     // TODO(scherkus): initialization currently happens more than once in
  97     // PipelineIntegrationTest.BasicPlayback.
  98     LOG(ERROR) << "Initialize has already been called.";
  99     CHECK(false);
 100   }
 101
 102   weak_this_ = weak_factory_.GetWeakPtr();
 103   demuxer_stream_ = stream;
 104
 105   if (!ConfigureDecoder()) {
 106     status_cb.Run(DECODER_ERROR_NOT_SUPPORTED);
 107     return;
 108   }
 109
 110   statistics_cb_ = statistics_cb;
 111   initialize_cb.Run(PIPELINE_OK);
 112 }
 113
 114 void FFmpegAudioDecoder::Read(const ReadCB& read_cb) {
 115   DCHECK(message_loop_->BelongsToCurrentThread());
 116   DCHECK(!read_cb.is_null());
 117   CHECK(read_cb_.is_null()) << "Overlapping decodes are not supported.";
 118
 119   read_cb_ = BindToCurrentLoop(read_cb);
 120
 121   // If we don't have any queued audio from the last packet we decoded, ask for
 122   // more data from the demuxer to satisfy this read.
 123   if (queued_audio_.empty()) {
 124     ReadFromDemuxerStream();
 125     return;
 126   }
 127
 128   base::ResetAndReturn(&read_cb_).Run(
 129       queued_audio_.front().status, queued_audio_.front().buffer);
 130   queued_audio_.pop_front();
 131 }
 132
 133 int FFmpegAudioDecoder::bits_per_channel() {
 134   DCHECK(message_loop_->BelongsToCurrentThread());
 135   return bytes_per_channel_ * 8;
 136 }
 137
 138 ChannelLayout FFmpegAudioDecoder::channel_layout() {
 139   DCHECK(message_loop_->BelongsToCurrentThread());
 140   return channel_layout_;
 141 }
 142
 143 int FFmpegAudioDecoder::samples_per_second() {
 144   DCHECK(message_loop_->BelongsToCurrentThread());
 145   return samples_per_second_;
 146 }
 147
 148 void FFmpegAudioDecoder::Reset(const base::Closure& closure) {
 149   DCHECK(message_loop_->BelongsToCurrentThread());
 150   base::Closure reset_cb = BindToCurrentLoop(closure);
 151
 152   avcodec_flush_buffers(codec_context_.get());
 153   ResetTimestampState();
 154   queued_audio_.clear();
 155   reset_cb.Run();
 156 }
 157
 158 FFmpegAudioDecoder::~FFmpegAudioDecoder() {
 159   // TODO(scherkus): should we require Stop() to be called? this might end up
 160   // getting called on a random thread due to refcounting.
 161   ReleaseFFmpegResources();
 162 }
 163
 164 int FFmpegAudioDecoder::GetAudioBuffer(AVCodecContext* codec,
 165                                        AVFrame* frame,
 166                                        int flags) {
 167   // Since this routine is called by FFmpeg when a buffer is required for audio
 168   // data, use the values supplied by FFmpeg (ignoring the current settings).
 169   // RunDecodeLoop() gets to determine if the buffer is useable or not.
 170   AVSampleFormat format = static_cast<AVSampleFormat>(frame->format);
 171   SampleFormat sample_format = AVSampleFormatToSampleFormat(format);
 172   int channels = DetermineChannels(frame);
 173   if ((channels <= 0) || (channels >= limits::kMaxChannels)) {
 174     DLOG(ERROR) << "Requested number of channels (" << channels
 175                 << ") exceeds limit.";
 176     return AVERROR(EINVAL);
 177   }
 178
 179   int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
 180   if (frame->nb_samples <= 0)
 181     return AVERROR(EINVAL);
 182
 183   // Determine how big the buffer should be and allocate it. FFmpeg may adjust
 184   // how big each channel data is in order to meet the alignment policy, so
 185   // we need to take this into consideration.
 186   int buffer_size_in_bytes =
 187       av_samples_get_buffer_size(&frame->linesize[0],
 188                                  channels,
 189                                  frame->nb_samples,
 190                                  format,
 191                                  AudioBuffer::kChannelAlignment);
 192   int frames_required = buffer_size_in_bytes / bytes_per_channel / channels;
 193   DCHECK_GE(frames_required, frame->nb_samples);
 194   scoped_refptr<AudioBuffer> buffer =
 195       AudioBuffer::CreateBuffer(sample_format, channels, frames_required);
 196
 197   // Initialize the data[] and extended_data[] fields to point into the memory
 198   // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved
 199   // audio and equal to |channels| for planar audio.
 200   int number_of_planes = buffer->channel_data().size();
 201   if (number_of_planes <= AV_NUM_DATA_POINTERS) {
 202     DCHECK_EQ(frame->extended_data, frame->data);
 203     for (int i = 0; i < number_of_planes; ++i)
 204       frame->data[i] = buffer->channel_data()[i];
 205   } else {
 206     // There are more channels than can fit into data[], so allocate
 207     // extended_data[] and fill appropriately.
 208     frame->extended_data = static_cast<uint8**>(
 209         av_malloc(number_of_planes * sizeof(*frame->extended_data)));
 210     int i = 0;
 211     for (; i < AV_NUM_DATA_POINTERS; ++i)
 212       frame->extended_data[i] = frame->data[i] = buffer->channel_data()[i];
 213     for (; i < number_of_planes; ++i)
 214       frame->extended_data[i] = buffer->channel_data()[i];
 215   }
 216
 217   // Now create an AVBufferRef for the data just allocated. It will own the
 218   // reference to the AudioBuffer object.
 219   void* opaque = NULL;
 220   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
 221   frame->buf[0] = av_buffer_create(
 222       frame->data[0], buffer_size_in_bytes, ReleaseAudioBufferImpl, opaque, 0);
 223   return 0;
 224 }
 225
 226 void FFmpegAudioDecoder::ReadFromDemuxerStream() {
 227   DCHECK(!read_cb_.is_null());
 228   demuxer_stream_->Read(base::Bind(
 229       &FFmpegAudioDecoder::BufferReady, weak_this_));
 230 }
 231
 232 void FFmpegAudioDecoder::BufferReady(
 233     DemuxerStream::Status status,
 234     const scoped_refptr<DecoderBuffer>& input) {
 235   DCHECK(message_loop_->BelongsToCurrentThread());
 236   DCHECK(!read_cb_.is_null());
 237   DCHECK(queued_audio_.empty());
 238   DCHECK_EQ(status != DemuxerStream::kOk, !input.get()) << status;
 239
 240   if (status == DemuxerStream::kAborted) {
 241     DCHECK(!input.get());
 242     base::ResetAndReturn(&read_cb_).Run(kAborted, NULL);
 243     return;
 244   }
 245
 246   if (status == DemuxerStream::kConfigChanged) {
 247     DCHECK(!input.get());
 248
 249     // Send a "end of stream" buffer to the decode loop
 250     // to output any remaining data still in the decoder.
 251     RunDecodeLoop(DecoderBuffer::CreateEOSBuffer(), true);
 252
 253     DVLOG(1) << "Config changed.";
 254
 255     if (!ConfigureDecoder()) {
 256       base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 257       return;
 258     }
 259
 260     ResetTimestampState();
 261
 262     if (queued_audio_.empty()) {
 263       ReadFromDemuxerStream();
 264       return;
 265     }
 266
 267     base::ResetAndReturn(&read_cb_).Run(
 268         queued_audio_.front().status, queued_audio_.front().buffer);
 269     queued_audio_.pop_front();
 270     return;
 271   }
 272
 273   DCHECK_EQ(status, DemuxerStream::kOk);
 274   DCHECK(input.get());
 275
 276   // Make sure we are notified if http://crbug.com/49709 returns.  Issue also
 277   // occurs with some damaged files.
 278   if (!input->end_of_stream() && input->timestamp() == kNoTimestamp() &&
 279       output_timestamp_helper_->base_timestamp() == kNoTimestamp()) {
 280     DVLOG(1) << "Received a buffer without timestamps!";
 281     base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 282     return;
 283   }
 284
 285   bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS;
 286   if (!input->end_of_stream()) {
 287     if (last_input_timestamp_ == kNoTimestamp()) {
 288       if (is_vorbis && (input->timestamp() < base::TimeDelta())) {
 289         // Dropping frames for negative timestamps as outlined in section A.2
 290         // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
 291         output_frames_to_drop_ = floor(
 292             0.5 + -input->timestamp().InSecondsF() * samples_per_second_);
 293       } else {
 294         last_input_timestamp_ = input->timestamp();
 295       }
 296     } else if (input->timestamp() != kNoTimestamp()) {
 297       if (input->timestamp() < last_input_timestamp_) {
 298         base::TimeDelta diff = input->timestamp() - last_input_timestamp_;
 299         DVLOG(1) << "Input timestamps are not monotonically increasing! "
 300                  << " ts " << input->timestamp().InMicroseconds() << " us"
 301                  << " diff " << diff.InMicroseconds() << " us";
 302         base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 303         return;
 304       }
 305
 306       last_input_timestamp_ = input->timestamp();
 307     }
 308   }
 309
 310   RunDecodeLoop(input, false);
 311
 312   // We exhausted the provided packet, but it wasn't enough for a frame.  Ask
 313   // for more data in order to fulfill this read.
 314   if (queued_audio_.empty()) {
 315     ReadFromDemuxerStream();
 316     return;
 317   }
 318
 319   // Execute callback to return the first frame we decoded.
 320   base::ResetAndReturn(&read_cb_).Run(
 321       queued_audio_.front().status, queued_audio_.front().buffer);
 322   queued_audio_.pop_front();
 323 }
 324
 325 bool FFmpegAudioDecoder::ConfigureDecoder() {
 326   const AudioDecoderConfig& config = demuxer_stream_->audio_decoder_config();
 327
 328   if (!config.IsValidConfig()) {
 329     DLOG(ERROR) << "Invalid audio stream -"
 330                 << " codec: " << config.codec()
 331                 << " channel layout: " << config.channel_layout()
 332                 << " bits per channel: " << config.bits_per_channel()
 333                 << " samples per second: " << config.samples_per_second();
 334     return false;
 335   }
 336
 337   if (config.is_encrypted()) {
 338     DLOG(ERROR) << "Encrypted audio stream not supported";
 339     return false;
 340   }
 341
 342   if (codec_context_.get() &&
 343       (bytes_per_channel_ != config.bytes_per_channel() ||
 344        channel_layout_ != config.channel_layout() ||
 345        samples_per_second_ != config.samples_per_second())) {
 346     DVLOG(1) << "Unsupported config change :";
 347     DVLOG(1) << "\tbytes_per_channel : " << bytes_per_channel_
 348              << " -> " << config.bytes_per_channel();
 349     DVLOG(1) << "\tchannel_layout : " << channel_layout_
 350              << " -> " << config.channel_layout();
 351     DVLOG(1) << "\tsample_rate : " << samples_per_second_
 352              << " -> " << config.samples_per_second();
 353     return false;
 354   }
 355
 356   // Release existing decoder resources if necessary.
 357   ReleaseFFmpegResources();
 358
 359   // Initialize AVCodecContext structure.
 360   codec_context_.reset(avcodec_alloc_context3(NULL));
 361   AudioDecoderConfigToAVCodecContext(config, codec_context_.get());
 362
 363   codec_context_->opaque = this;
 364   codec_context_->get_buffer2 = GetAudioBufferImpl;
 365   codec_context_->refcounted_frames = 1;
 366
 367   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
 368   if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
 369     DLOG(ERROR) << "Could not initialize audio decoder: "
 370                 << codec_context_->codec_id;
 371     return false;
 372   }
 373
 374   // Success!
 375   av_frame_.reset(avcodec_alloc_frame());
 376   channel_layout_ = config.channel_layout();
 377   samples_per_second_ = config.samples_per_second();
 378   output_timestamp_helper_.reset(
 379       new AudioTimestampHelper(config.samples_per_second()));
 380
 381   // Store initial values to guard against midstream configuration changes.
 382   channels_ = codec_context_->channels;
 383   if (channels_ != ChannelLayoutToChannelCount(channel_layout_)) {
 384     DLOG(ERROR) << "Audio configuration specified "
 385                 << ChannelLayoutToChannelCount(channel_layout_)
 386                 << " channels, but FFmpeg thinks the file contains "
 387                 << channels_ << " channels";
 388     return false;
 389   }
 390   av_sample_format_ = codec_context_->sample_fmt;
 391   sample_format_ = AVSampleFormatToSampleFormat(
 392       static_cast<AVSampleFormat>(av_sample_format_));
 393   bytes_per_channel_ = SampleFormatToBytesPerChannel(sample_format_);
 394
 395   return true;
 396 }
 397
 398 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
 399   codec_context_.reset();
 400   av_frame_.reset();
 401 }
 402
 403 void FFmpegAudioDecoder::ResetTimestampState() {
 404   output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
 405   last_input_timestamp_ = kNoTimestamp();
 406   output_frames_to_drop_ = 0;
 407 }
 408
 409 void FFmpegAudioDecoder::RunDecodeLoop(
 410     const scoped_refptr<DecoderBuffer>& input,
 411     bool skip_eos_append) {
 412   AVPacket packet;
 413   av_init_packet(&packet);
 414   if (input->end_of_stream()) {
 415     packet.data = NULL;
 416     packet.size = 0;
 417   } else {
 418     packet.data = const_cast<uint8*>(input->data());
 419     packet.size = input->data_size();
 420   }
 421
 422   // Each audio packet may contain several frames, so we must call the decoder
 423   // until we've exhausted the packet.  Regardless of the packet size we always
 424   // want to hand it to the decoder at least once, otherwise we would end up
 425   // skipping end of stream packets since they have a size of zero.
 426   do {
 427     int frame_decoded = 0;
 428     int result = avcodec_decode_audio4(
 429         codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
 430
 431     if (result < 0) {
 432       DCHECK(!input->end_of_stream())
 433           << "End of stream buffer produced an error! "
 434           << "This is quite possibly a bug in the audio decoder not handling "
 435           << "end of stream AVPackets correctly.";
 436
 437       DLOG(ERROR)
 438           << "Error decoding an audio frame with timestamp: "
 439           << input->timestamp().InMicroseconds() << " us, duration: "
 440           << input->duration().InMicroseconds() << " us, packet size: "
 441           << input->data_size() << " bytes";
 442
 443       // TODO(dalecurtis): We should return a kDecodeError here instead:
 444       // http://crbug.com/145276
 445       break;
 446     }
 447
 448     // Update packet size and data pointer in case we need to call the decoder
 449     // with the remaining bytes from this packet.
 450     packet.size -= result;
 451     packet.data += result;
 452
 453     if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
 454         !input->end_of_stream()) {
 455       DCHECK(input->timestamp() != kNoTimestamp());
 456       if (output_frames_to_drop_ > 0) {
 457         // Currently Vorbis is the only codec that causes us to drop samples.
 458         // If we have to drop samples it always means the timeline starts at 0.
 459         DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
 460         output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
 461       } else {
 462         output_timestamp_helper_->SetBaseTimestamp(input->timestamp());
 463       }
 464     }
 465
 466     scoped_refptr<AudioBuffer> output;
 467     int decoded_frames = 0;
 468     int original_frames = 0;
 469     int channels = DetermineChannels(av_frame_.get());
 470     if (frame_decoded) {
 471       if (av_frame_->sample_rate != samples_per_second_ ||
 472           channels != channels_ ||
 473           av_frame_->format != av_sample_format_) {
 474         DLOG(ERROR) << "Unsupported midstream configuration change!"
 475                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
 476                     << samples_per_second_
 477                     << ", Channels: " << channels << " vs "
 478                     << channels_
 479                     << ", Sample Format: " << av_frame_->format << " vs "
 480                     << av_sample_format_;
 481
 482         // This is an unrecoverable error, so bail out.
 483         QueuedAudioBuffer queue_entry = { kDecodeError, NULL };
 484         queued_audio_.push_back(queue_entry);
 485         av_frame_unref(av_frame_.get());
 486         break;
 487       }
 488
 489       // Get the AudioBuffer that the data was decoded into. Adjust the number
 490       // of frames, in case fewer than requested were actually decoded.
 491       output = reinterpret_cast<AudioBuffer*>(
 492           av_buffer_get_opaque(av_frame_->buf[0]));
 493       DCHECK_EQ(channels_, output->channel_count());
 494       original_frames = av_frame_->nb_samples;
 495       int unread_frames = output->frame_count() - original_frames;
 496       DCHECK_GE(unread_frames, 0);
 497       if (unread_frames > 0)
 498         output->TrimEnd(unread_frames);
 499
 500       // If there are frames to drop, get rid of as many as we can.
 501       if (output_frames_to_drop_ > 0) {
 502         int drop = std::min(output->frame_count(), output_frames_to_drop_);
 503         output->TrimStart(drop);
 504         output_frames_to_drop_ -= drop;
 505       }
 506
 507       decoded_frames = output->frame_count();
 508       av_frame_unref(av_frame_.get());
 509     }
 510
 511     // WARNING: |av_frame_| no longer has valid data at this point.
 512
 513     if (decoded_frames > 0) {
 514       // Set the timestamp/duration once all the extra frames have been
 515       // discarded.
 516       output->set_timestamp(output_timestamp_helper_->GetTimestamp());
 517       output->set_duration(
 518           output_timestamp_helper_->GetFrameDuration(decoded_frames));
 519       output_timestamp_helper_->AddFrames(decoded_frames);
 520     } else if (IsEndOfStream(result, original_frames, input) &&
 521                !skip_eos_append) {
 522       DCHECK_EQ(packet.size, 0);
 523       output = AudioBuffer::CreateEOSBuffer();
 524     } else {
 525       // In case all the frames in the buffer were dropped.
 526       output = NULL;
 527     }
 528
 529     if (output.get()) {
 530       QueuedAudioBuffer queue_entry = { kOk, output };
 531       queued_audio_.push_back(queue_entry);
 532     }
 533
 534     // Decoding finished successfully, update statistics.
 535     if (result > 0) {
 536       PipelineStatistics statistics;
 537       statistics.audio_bytes_decoded = result;
 538       statistics_cb_.Run(statistics);
 539     }
 540   } while (packet.size > 0);
 541 }
 542
 543 }  // namespace media