media/filters/ffmpeg_audio_decoder.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/filters/ffmpeg_audio_decoder.h"
   6
   7 #include "base/bind.h"
   8 #include "base/callback_helpers.h"
   9 #include "base/location.h"
  10 #include "base/message_loop/message_loop_proxy.h"
  11 #include "media/base/audio_buffer.h"
  12 #include "media/base/audio_bus.h"
  13 #include "media/base/audio_decoder_config.h"
  14 #include "media/base/audio_timestamp_helper.h"
  15 #include "media/base/bind_to_loop.h"
  16 #include "media/base/decoder_buffer.h"
  17 #include "media/base/demuxer.h"
  18 #include "media/base/pipeline.h"
  19 #include "media/base/sample_format.h"
  20 #include "media/ffmpeg/ffmpeg_common.h"
  21 #include "media/filters/ffmpeg_glue.h"
  22
  23 namespace media {
  24
  25 // Helper structure for managing multiple decoded audio frames per packet.
  26 struct QueuedAudioBuffer {
  27   AudioDecoder::Status status;
  28   scoped_refptr<AudioBuffer> buffer;
  29 };
  30
  31 // Returns true if the decode result was end of stream.
  32 static inline bool IsEndOfStream(int result,
  33                                  int decoded_size,
  34                                  const scoped_refptr<DecoderBuffer>& input) {
  35   // Three conditions to meet to declare end of stream for this decoder:
  36   // 1. FFmpeg didn't read anything.
  37   // 2. FFmpeg didn't output anything.
  38   // 3. An end of stream buffer is received.
  39   return result == 0 && decoded_size == 0 && input->end_of_stream();
  40 }
  41
  42 // Return the number of channels from the data in |frame|.
  43 static inline int DetermineChannels(AVFrame* frame) {
  44 #if defined(CHROMIUM_NO_AVFRAME_CHANNELS)
  45   // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field.
  46   return av_get_channel_layout_nb_channels(frame->channel_layout);
  47 #else
  48   return frame->channels;
  49 #endif
  50 }
  51
  52 // Called by FFmpeg's allocation routine to allocate a buffer. Uses
  53 // AVCodecContext.opaque to get the object reference in order to call
  54 // GetAudioBuffer() to do the actual allocation.
  55 static int GetAudioBufferImpl(struct AVCodecContext* s,
  56                               AVFrame* frame,
  57                               int flags) {
  58   DCHECK(s->codec->capabilities & CODEC_CAP_DR1);
  59   DCHECK_EQ(s->codec_type, AVMEDIA_TYPE_AUDIO);
  60   FFmpegAudioDecoder* decoder = static_cast<FFmpegAudioDecoder*>(s->opaque);
  61   return decoder->GetAudioBuffer(s, frame, flags);
  62 }
  63
  64 // Called by FFmpeg's allocation routine to free a buffer. |opaque| is the
  65 // AudioBuffer allocated, so unref it.
  66 static void ReleaseAudioBufferImpl(void* opaque, uint8* data) {
  67   scoped_refptr<AudioBuffer> buffer;
  68   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
  69 }
  70
  71 FFmpegAudioDecoder::FFmpegAudioDecoder(
  72     const scoped_refptr<base::MessageLoopProxy>& message_loop)
  73     : message_loop_(message_loop),
  74       weak_factory_(this),
  75       demuxer_stream_(NULL),
  76       codec_context_(NULL),
  77       bytes_per_channel_(0),
  78       channel_layout_(CHANNEL_LAYOUT_NONE),
  79       channels_(0),
  80       samples_per_second_(0),
  81       av_sample_format_(0),
  82       last_input_timestamp_(kNoTimestamp()),
  83       output_frames_to_drop_(0),
  84       av_frame_(NULL) {
  85 }
  86
  87 void FFmpegAudioDecoder::Initialize(
  88     DemuxerStream* stream,
  89     const PipelineStatusCB& status_cb,
  90     const StatisticsCB& statistics_cb) {
  91   DCHECK(message_loop_->BelongsToCurrentThread());
  92   PipelineStatusCB initialize_cb = BindToCurrentLoop(status_cb);
  93
  94   FFmpegGlue::InitializeFFmpeg();
  95
  96   if (demuxer_stream_) {
  97     // TODO(scherkus): initialization currently happens more than once in
  98     // PipelineIntegrationTest.BasicPlayback.
  99     LOG(ERROR) << "Initialize has already been called.";
 100     CHECK(false);
 101   }
 102
 103   weak_this_ = weak_factory_.GetWeakPtr();
 104   demuxer_stream_ = stream;
 105
 106   if (!ConfigureDecoder()) {
 107     status_cb.Run(DECODER_ERROR_NOT_SUPPORTED);
 108     return;
 109   }
 110
 111   statistics_cb_ = statistics_cb;
 112   initialize_cb.Run(PIPELINE_OK);
 113 }
 114
 115 void FFmpegAudioDecoder::Read(const ReadCB& read_cb) {
 116   DCHECK(message_loop_->BelongsToCurrentThread());
 117   DCHECK(!read_cb.is_null());
 118   CHECK(read_cb_.is_null()) << "Overlapping decodes are not supported.";
 119
 120   read_cb_ = BindToCurrentLoop(read_cb);
 121
 122   // If we don't have any queued audio from the last packet we decoded, ask for
 123   // more data from the demuxer to satisfy this read.
 124   if (queued_audio_.empty()) {
 125     ReadFromDemuxerStream();
 126     return;
 127   }
 128
 129   base::ResetAndReturn(&read_cb_).Run(
 130       queued_audio_.front().status, queued_audio_.front().buffer);
 131   queued_audio_.pop_front();
 132 }
 133
 134 int FFmpegAudioDecoder::bits_per_channel() {
 135   DCHECK(message_loop_->BelongsToCurrentThread());
 136   return bytes_per_channel_ * 8;
 137 }
 138
 139 ChannelLayout FFmpegAudioDecoder::channel_layout() {
 140   DCHECK(message_loop_->BelongsToCurrentThread());
 141   return channel_layout_;
 142 }
 143
 144 int FFmpegAudioDecoder::samples_per_second() {
 145   DCHECK(message_loop_->BelongsToCurrentThread());
 146   return samples_per_second_;
 147 }
 148
 149 void FFmpegAudioDecoder::Reset(const base::Closure& closure) {
 150   DCHECK(message_loop_->BelongsToCurrentThread());
 151   base::Closure reset_cb = BindToCurrentLoop(closure);
 152
 153   avcodec_flush_buffers(codec_context_);
 154   ResetTimestampState();
 155   queued_audio_.clear();
 156   reset_cb.Run();
 157 }
 158
 159 FFmpegAudioDecoder::~FFmpegAudioDecoder() {
 160   // TODO(scherkus): should we require Stop() to be called? this might end up
 161   // getting called on a random thread due to refcounting.
 162   ReleaseFFmpegResources();
 163 }
 164
 165 int FFmpegAudioDecoder::GetAudioBuffer(AVCodecContext* codec,
 166                                        AVFrame* frame,
 167                                        int flags) {
 168   // Since this routine is called by FFmpeg when a buffer is required for audio
 169   // data, use the values supplied by FFmpeg (ignoring the current settings).
 170   // RunDecodeLoop() gets to determine if the buffer is useable or not.
 171   AVSampleFormat format = static_cast<AVSampleFormat>(frame->format);
 172   SampleFormat sample_format = AVSampleFormatToSampleFormat(format);
 173   int channels = DetermineChannels(frame);
 174   int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
 175   if (frame->nb_samples <= 0)
 176     return AVERROR(EINVAL);
 177
 178   // Determine how big the buffer should be and allocate it. FFmpeg may adjust
 179   // how big each channel data is in order to meet the alignment policy, so
 180   // we need to take this into consideration.
 181   int buffer_size_in_bytes =
 182       av_samples_get_buffer_size(&frame->linesize[0],
 183                                  channels,
 184                                  frame->nb_samples,
 185                                  format,
 186                                  AudioBuffer::kChannelAlignment);
 187   int frames_required = buffer_size_in_bytes / bytes_per_channel / channels;
 188   DCHECK_GE(frames_required, frame->nb_samples);
 189   scoped_refptr<AudioBuffer> buffer =
 190       AudioBuffer::CreateBuffer(sample_format, channels, frames_required);
 191
 192   // Initialize the data[] and extended_data[] fields to point into the memory
 193   // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved
 194   // audio and equal to |channels| for planar audio.
 195   int number_of_planes = buffer->channel_data().size();
 196   if (number_of_planes <= AV_NUM_DATA_POINTERS) {
 197     DCHECK_EQ(frame->extended_data, frame->data);
 198     for (int i = 0; i < number_of_planes; ++i)
 199       frame->data[i] = buffer->channel_data()[i];
 200   } else {
 201     // There are more channels than can fit into data[], so allocate
 202     // extended_data[] and fill appropriately.
 203     frame->extended_data = static_cast<uint8**>(
 204         av_malloc(number_of_planes * sizeof(*frame->extended_data)));
 205     int i = 0;
 206     for (; i < AV_NUM_DATA_POINTERS; ++i)
 207       frame->extended_data[i] = frame->data[i] = buffer->channel_data()[i];
 208     for (; i < number_of_planes; ++i)
 209       frame->extended_data[i] = buffer->channel_data()[i];
 210   }
 211
 212   // Now create an AVBufferRef for the data just allocated. It will own the
 213   // reference to the AudioBuffer object.
 214   void* opaque = NULL;
 215   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
 216   frame->buf[0] = av_buffer_create(
 217       frame->data[0], buffer_size_in_bytes, ReleaseAudioBufferImpl, opaque, 0);
 218   return 0;
 219 }
 220
 221 void FFmpegAudioDecoder::ReadFromDemuxerStream() {
 222   DCHECK(!read_cb_.is_null());
 223   demuxer_stream_->Read(base::Bind(
 224       &FFmpegAudioDecoder::BufferReady, weak_this_));
 225 }
 226
 227 void FFmpegAudioDecoder::BufferReady(
 228     DemuxerStream::Status status,
 229     const scoped_refptr<DecoderBuffer>& input) {
 230   DCHECK(message_loop_->BelongsToCurrentThread());
 231   DCHECK(!read_cb_.is_null());
 232   DCHECK(queued_audio_.empty());
 233   DCHECK_EQ(status != DemuxerStream::kOk, !input.get()) << status;
 234
 235   if (status == DemuxerStream::kAborted) {
 236     DCHECK(!input.get());
 237     base::ResetAndReturn(&read_cb_).Run(kAborted, NULL);
 238     return;
 239   }
 240
 241   if (status == DemuxerStream::kConfigChanged) {
 242     DCHECK(!input.get());
 243
 244     // Send a "end of stream" buffer to the decode loop
 245     // to output any remaining data still in the decoder.
 246     RunDecodeLoop(DecoderBuffer::CreateEOSBuffer(), true);
 247
 248     DVLOG(1) << "Config changed.";
 249
 250     if (!ConfigureDecoder()) {
 251       base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 252       return;
 253     }
 254
 255     ResetTimestampState();
 256
 257     if (queued_audio_.empty()) {
 258       ReadFromDemuxerStream();
 259       return;
 260     }
 261
 262     base::ResetAndReturn(&read_cb_).Run(
 263         queued_audio_.front().status, queued_audio_.front().buffer);
 264     queued_audio_.pop_front();
 265     return;
 266   }
 267
 268   DCHECK_EQ(status, DemuxerStream::kOk);
 269   DCHECK(input.get());
 270
 271   // Make sure we are notified if http://crbug.com/49709 returns.  Issue also
 272   // occurs with some damaged files.
 273   if (!input->end_of_stream() && input->timestamp() == kNoTimestamp() &&
 274       output_timestamp_helper_->base_timestamp() == kNoTimestamp()) {
 275     DVLOG(1) << "Received a buffer without timestamps!";
 276     base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 277     return;
 278   }
 279
 280   bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS;
 281   if (!input->end_of_stream()) {
 282     if (last_input_timestamp_ == kNoTimestamp()) {
 283       if (is_vorbis && (input->timestamp() < base::TimeDelta())) {
 284         // Dropping frames for negative timestamps as outlined in section A.2
 285         // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
 286         output_frames_to_drop_ = floor(
 287             0.5 + -input->timestamp().InSecondsF() * samples_per_second_);
 288       } else {
 289         last_input_timestamp_ = input->timestamp();
 290       }
 291     } else if (input->timestamp() != kNoTimestamp()) {
 292       if (input->timestamp() < last_input_timestamp_) {
 293         base::TimeDelta diff = input->timestamp() - last_input_timestamp_;
 294         DVLOG(1) << "Input timestamps are not monotonically increasing! "
 295                  << " ts " << input->timestamp().InMicroseconds() << " us"
 296                  << " diff " << diff.InMicroseconds() << " us";
 297         base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 298         return;
 299       }
 300
 301       last_input_timestamp_ = input->timestamp();
 302     }
 303   }
 304
 305   RunDecodeLoop(input, false);
 306
 307   // We exhausted the provided packet, but it wasn't enough for a frame.  Ask
 308   // for more data in order to fulfill this read.
 309   if (queued_audio_.empty()) {
 310     ReadFromDemuxerStream();
 311     return;
 312   }
 313
 314   // Execute callback to return the first frame we decoded.
 315   base::ResetAndReturn(&read_cb_).Run(
 316       queued_audio_.front().status, queued_audio_.front().buffer);
 317   queued_audio_.pop_front();
 318 }
 319
 320 bool FFmpegAudioDecoder::ConfigureDecoder() {
 321   const AudioDecoderConfig& config = demuxer_stream_->audio_decoder_config();
 322
 323   if (!config.IsValidConfig()) {
 324     DLOG(ERROR) << "Invalid audio stream -"
 325                 << " codec: " << config.codec()
 326                 << " channel layout: " << config.channel_layout()
 327                 << " bits per channel: " << config.bits_per_channel()
 328                 << " samples per second: " << config.samples_per_second();
 329     return false;
 330   }
 331
 332   if (config.is_encrypted()) {
 333     DLOG(ERROR) << "Encrypted audio stream not supported";
 334     return false;
 335   }
 336
 337   if (codec_context_ &&
 338       (bytes_per_channel_ != config.bytes_per_channel() ||
 339        channel_layout_ != config.channel_layout() ||
 340        samples_per_second_ != config.samples_per_second())) {
 341     DVLOG(1) << "Unsupported config change :";
 342     DVLOG(1) << "\tbytes_per_channel : " << bytes_per_channel_
 343              << " -> " << config.bytes_per_channel();
 344     DVLOG(1) << "\tchannel_layout : " << channel_layout_
 345              << " -> " << config.channel_layout();
 346     DVLOG(1) << "\tsample_rate : " << samples_per_second_
 347              << " -> " << config.samples_per_second();
 348     return false;
 349   }
 350
 351   // Release existing decoder resources if necessary.
 352   ReleaseFFmpegResources();
 353
 354   // Initialize AVCodecContext structure.
 355   codec_context_ = avcodec_alloc_context3(NULL);
 356   AudioDecoderConfigToAVCodecContext(config, codec_context_);
 357
 358   codec_context_->opaque = this;
 359   codec_context_->get_buffer2 = GetAudioBufferImpl;
 360
 361   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
 362   if (!codec || avcodec_open2(codec_context_, codec, NULL) < 0) {
 363     DLOG(ERROR) << "Could not initialize audio decoder: "
 364                 << codec_context_->codec_id;
 365     return false;
 366   }
 367
 368   // Success!
 369   av_frame_ = avcodec_alloc_frame();
 370   channel_layout_ = config.channel_layout();
 371   samples_per_second_ = config.samples_per_second();
 372   output_timestamp_helper_.reset(
 373       new AudioTimestampHelper(config.samples_per_second()));
 374
 375   // Store initial values to guard against midstream configuration changes.
 376   channels_ = codec_context_->channels;
 377   if (channels_ != ChannelLayoutToChannelCount(channel_layout_)) {
 378     DLOG(ERROR) << "Audio configuration specified "
 379                 << ChannelLayoutToChannelCount(channel_layout_)
 380                 << " channels, but FFmpeg thinks the file contains "
 381                 << channels_ << " channels";
 382     return false;
 383   }
 384   av_sample_format_ = codec_context_->sample_fmt;
 385   sample_format_ = AVSampleFormatToSampleFormat(
 386       static_cast<AVSampleFormat>(av_sample_format_));
 387   bytes_per_channel_ = SampleFormatToBytesPerChannel(sample_format_);
 388
 389   return true;
 390 }
 391
 392 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
 393   if (codec_context_) {
 394     av_free(codec_context_->extradata);
 395     avcodec_close(codec_context_);
 396     av_free(codec_context_);
 397   }
 398
 399   if (av_frame_) {
 400     av_free(av_frame_);
 401     av_frame_ = NULL;
 402   }
 403 }
 404
 405 void FFmpegAudioDecoder::ResetTimestampState() {
 406   output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
 407   last_input_timestamp_ = kNoTimestamp();
 408   output_frames_to_drop_ = 0;
 409 }
 410
 411 void FFmpegAudioDecoder::RunDecodeLoop(
 412     const scoped_refptr<DecoderBuffer>& input,
 413     bool skip_eos_append) {
 414   AVPacket packet;
 415   av_init_packet(&packet);
 416   if (input->end_of_stream()) {
 417     packet.data = NULL;
 418     packet.size = 0;
 419   } else {
 420     packet.data = const_cast<uint8*>(input->data());
 421     packet.size = input->data_size();
 422   }
 423
 424   // Each audio packet may contain several frames, so we must call the decoder
 425   // until we've exhausted the packet.  Regardless of the packet size we always
 426   // want to hand it to the decoder at least once, otherwise we would end up
 427   // skipping end of stream packets since they have a size of zero.
 428   do {
 429     // Reset frame to default values.
 430     avcodec_get_frame_defaults(av_frame_);
 431
 432     int frame_decoded = 0;
 433     int result = avcodec_decode_audio4(
 434         codec_context_, av_frame_, &frame_decoded, &packet);
 435
 436     if (result < 0) {
 437       DCHECK(!input->end_of_stream())
 438           << "End of stream buffer produced an error! "
 439           << "This is quite possibly a bug in the audio decoder not handling "
 440           << "end of stream AVPackets correctly.";
 441
 442       DLOG(ERROR)
 443           << "Error decoding an audio frame with timestamp: "
 444           << input->timestamp().InMicroseconds() << " us, duration: "
 445           << input->duration().InMicroseconds() << " us, packet size: "
 446           << input->data_size() << " bytes";
 447
 448       // TODO(dalecurtis): We should return a kDecodeError here instead:
 449       // http://crbug.com/145276
 450       break;
 451     }
 452
 453     // Update packet size and data pointer in case we need to call the decoder
 454     // with the remaining bytes from this packet.
 455     packet.size -= result;
 456     packet.data += result;
 457
 458     if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
 459         !input->end_of_stream()) {
 460       DCHECK(input->timestamp() != kNoTimestamp());
 461       if (output_frames_to_drop_ > 0) {
 462         // Currently Vorbis is the only codec that causes us to drop samples.
 463         // If we have to drop samples it always means the timeline starts at 0.
 464         DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
 465         output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
 466       } else {
 467         output_timestamp_helper_->SetBaseTimestamp(input->timestamp());
 468       }
 469     }
 470
 471     scoped_refptr<AudioBuffer> output;
 472     int decoded_frames = 0;
 473     int original_frames = 0;
 474     int channels = DetermineChannels(av_frame_);
 475     if (frame_decoded) {
 476       if (av_frame_->sample_rate != samples_per_second_ ||
 477           channels != channels_ ||
 478           av_frame_->format != av_sample_format_) {
 479         DLOG(ERROR) << "Unsupported midstream configuration change!"
 480                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
 481                     << samples_per_second_
 482                     << ", Channels: " << channels << " vs "
 483                     << channels_
 484                     << ", Sample Format: " << av_frame_->format << " vs "
 485                     << av_sample_format_;
 486
 487         // This is an unrecoverable error, so bail out.
 488         QueuedAudioBuffer queue_entry = { kDecodeError, NULL };
 489         queued_audio_.push_back(queue_entry);
 490         break;
 491       }
 492
 493       // Get the AudioBuffer that the data was decoded into. Adjust the number
 494       // of frames, in case fewer than requested were actually decoded.
 495       output = reinterpret_cast<AudioBuffer*>(
 496           av_buffer_get_opaque(av_frame_->buf[0]));
 497       DCHECK_EQ(channels_, output->channel_count());
 498       original_frames = av_frame_->nb_samples;
 499       int unread_frames = output->frame_count() - original_frames;
 500       DCHECK_GE(unread_frames, 0);
 501       if (unread_frames > 0)
 502         output->TrimEnd(unread_frames);
 503
 504       // If there are frames to drop, get rid of as many as we can.
 505       if (output_frames_to_drop_ > 0) {
 506         int drop = std::min(output->frame_count(), output_frames_to_drop_);
 507         output->TrimStart(drop);
 508         output_frames_to_drop_ -= drop;
 509       }
 510
 511       decoded_frames = output->frame_count();
 512     }
 513
 514     if (decoded_frames > 0) {
 515       // Set the timestamp/duration once all the extra frames have been
 516       // discarded.
 517       output->set_timestamp(output_timestamp_helper_->GetTimestamp());
 518       output->set_duration(
 519           output_timestamp_helper_->GetFrameDuration(decoded_frames));
 520       output_timestamp_helper_->AddFrames(decoded_frames);
 521     } else if (IsEndOfStream(result, original_frames, input) &&
 522                !skip_eos_append) {
 523       DCHECK_EQ(packet.size, 0);
 524       output = AudioBuffer::CreateEOSBuffer();
 525     } else {
 526       // In case all the frames in the buffer were dropped.
 527       output = NULL;
 528     }
 529
 530     if (output.get()) {
 531       QueuedAudioBuffer queue_entry = { kOk, output };
 532       queued_audio_.push_back(queue_entry);
 533     }
 534
 535     // Decoding finished successfully, update statistics.
 536     if (result > 0) {
 537       PipelineStatistics statistics;
 538       statistics.audio_bytes_decoded = result;
 539       statistics_cb_.Run(statistics);
 540     }
 541   } while (packet.size > 0);
 542 }
 543
 544 }  // namespace media