media/filters/ffmpeg_audio_decoder.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/filters/ffmpeg_audio_decoder.h"
   6
   7 #include "base/callback_helpers.h"
   8 #include "base/single_thread_task_runner.h"
   9 #include "media/base/audio_buffer.h"
  10 #include "media/base/audio_bus.h"
  11 #include "media/base/audio_decoder_config.h"
  12 #include "media/base/audio_discard_helper.h"
  13 #include "media/base/bind_to_current_loop.h"
  14 #include "media/base/decoder_buffer.h"
  15 #include "media/base/limits.h"
  16 #include "media/base/sample_format.h"
  17 #include "media/ffmpeg/ffmpeg_common.h"
  18 #include "media/filters/ffmpeg_glue.h"
  19
  20 namespace media {
  21
  22 // Returns true if the decode result was end of stream.
  23 static inline bool IsEndOfStream(int result,
  24                                  int decoded_size,
  25                                  const scoped_refptr<DecoderBuffer>& input) {
  26   // Three conditions to meet to declare end of stream for this decoder:
  27   // 1. FFmpeg didn't read anything.
  28   // 2. FFmpeg didn't output anything.
  29   // 3. An end of stream buffer is received.
  30   return result == 0 && decoded_size == 0 && input->end_of_stream();
  31 }
  32
  33 // Return the number of channels from the data in |frame|.
  34 static inline int DetermineChannels(AVFrame* frame) {
  35 #if defined(CHROMIUM_NO_AVFRAME_CHANNELS)
  36   // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field.
  37   return av_get_channel_layout_nb_channels(frame->channel_layout);
  38 #else
  39   return frame->channels;
  40 #endif
  41 }
  42
  43 // Called by FFmpeg's allocation routine to free a buffer. |opaque| is the
  44 // AudioBuffer allocated, so unref it.
  45 static void ReleaseAudioBufferImpl(void* opaque, uint8* data) {
  46   scoped_refptr<AudioBuffer> buffer;
  47   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
  48 }
  49
  50 // Called by FFmpeg's allocation routine to allocate a buffer. Uses
  51 // AVCodecContext.opaque to get the object reference in order to call
  52 // GetAudioBuffer() to do the actual allocation.
  53 static int GetAudioBuffer(struct AVCodecContext* s, AVFrame* frame, int flags) {
  54   DCHECK(s->codec->capabilities & CODEC_CAP_DR1);
  55   DCHECK_EQ(s->codec_type, AVMEDIA_TYPE_AUDIO);
  56
  57   // Since this routine is called by FFmpeg when a buffer is required for audio
  58   // data, use the values supplied by FFmpeg (ignoring the current settings).
  59   // FFmpegDecode() gets to determine if the buffer is useable or not.
  60   AVSampleFormat format = static_cast<AVSampleFormat>(frame->format);
  61   SampleFormat sample_format = AVSampleFormatToSampleFormat(format);
  62   int channels = DetermineChannels(frame);
  63   if (channels <= 0 || channels >= limits::kMaxChannels) {
  64     DLOG(ERROR) << "Requested number of channels (" << channels
  65                 << ") exceeds limit.";
  66     return AVERROR(EINVAL);
  67   }
  68
  69   int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
  70   if (frame->nb_samples <= 0)
  71     return AVERROR(EINVAL);
  72
  73   if (s->channels != channels) {
  74     DLOG(ERROR) << "AVCodecContext and AVFrame disagree on channel count.";
  75     return AVERROR(EINVAL);
  76   }
  77
  78   // Determine how big the buffer should be and allocate it. FFmpeg may adjust
  79   // how big each channel data is in order to meet the alignment policy, so
  80   // we need to take this into consideration.
  81   int buffer_size_in_bytes =
  82       av_samples_get_buffer_size(&frame->linesize[0],
  83                                  channels,
  84                                  frame->nb_samples,
  85                                  format,
  86                                  AudioBuffer::kChannelAlignment);
  87   // Check for errors from av_samples_get_buffer_size().
  88   if (buffer_size_in_bytes < 0)
  89     return buffer_size_in_bytes;
  90   int frames_required = buffer_size_in_bytes / bytes_per_channel / channels;
  91   DCHECK_GE(frames_required, frame->nb_samples);
  92   scoped_refptr<AudioBuffer> buffer = AudioBuffer::CreateBuffer(
  93       sample_format,
  94       ChannelLayoutToChromeChannelLayout(s->channel_layout, s->channels),
  95       channels,
  96       s->sample_rate,
  97       frames_required);
  98
  99   // Initialize the data[] and extended_data[] fields to point into the memory
 100   // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved
 101   // audio and equal to |channels| for planar audio.
 102   int number_of_planes = buffer->channel_data().size();
 103   if (number_of_planes <= AV_NUM_DATA_POINTERS) {
 104     DCHECK_EQ(frame->extended_data, frame->data);
 105     for (int i = 0; i < number_of_planes; ++i)
 106       frame->data[i] = buffer->channel_data()[i];
 107   } else {
 108     // There are more channels than can fit into data[], so allocate
 109     // extended_data[] and fill appropriately.
 110     frame->extended_data = static_cast<uint8**>(
 111         av_malloc(number_of_planes * sizeof(*frame->extended_data)));
 112     int i = 0;
 113     for (; i < AV_NUM_DATA_POINTERS; ++i)
 114       frame->extended_data[i] = frame->data[i] = buffer->channel_data()[i];
 115     for (; i < number_of_planes; ++i)
 116       frame->extended_data[i] = buffer->channel_data()[i];
 117   }
 118
 119   // Now create an AVBufferRef for the data just allocated. It will own the
 120   // reference to the AudioBuffer object.
 121   void* opaque = NULL;
 122   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
 123   frame->buf[0] = av_buffer_create(
 124       frame->data[0], buffer_size_in_bytes, ReleaseAudioBufferImpl, opaque, 0);
 125   return 0;
 126 }
 127
 128 FFmpegAudioDecoder::FFmpegAudioDecoder(
 129     const scoped_refptr<base::SingleThreadTaskRunner>& task_runner)
 130     : task_runner_(task_runner), state_(kUninitialized), av_sample_format_(0) {
 131 }
 132
 133 FFmpegAudioDecoder::~FFmpegAudioDecoder() {
 134   DCHECK_EQ(state_, kUninitialized);
 135   DCHECK(!codec_context_);
 136   DCHECK(!av_frame_);
 137 }
 138
 139 void FFmpegAudioDecoder::Initialize(const AudioDecoderConfig& config,
 140                                     const PipelineStatusCB& status_cb) {
 141   DCHECK(task_runner_->BelongsToCurrentThread());
 142   DCHECK(!config.is_encrypted());
 143
 144   FFmpegGlue::InitializeFFmpeg();
 145
 146   config_ = config;
 147   PipelineStatusCB initialize_cb = BindToCurrentLoop(status_cb);
 148
 149   if (!config.IsValidConfig() || !ConfigureDecoder()) {
 150     initialize_cb.Run(DECODER_ERROR_NOT_SUPPORTED);
 151     return;
 152   }
 153
 154   // Success!
 155   state_ = kNormal;
 156   initialize_cb.Run(PIPELINE_OK);
 157 }
 158
 159 void FFmpegAudioDecoder::Decode(const scoped_refptr<DecoderBuffer>& buffer,
 160                                 const DecodeCB& decode_cb) {
 161   DCHECK(task_runner_->BelongsToCurrentThread());
 162   DCHECK(!decode_cb.is_null());
 163   CHECK_NE(state_, kUninitialized);
 164   DecodeCB decode_cb_bound = BindToCurrentLoop(decode_cb);
 165
 166   if (state_ == kError) {
 167     decode_cb_bound.Run(kDecodeError, NULL);
 168     return;
 169   }
 170
 171   // Return empty frames if decoding has finished.
 172   if (state_ == kDecodeFinished) {
 173     decode_cb_bound.Run(kOk, AudioBuffer::CreateEOSBuffer());
 174     return;
 175   }
 176
 177   if (!buffer) {
 178     decode_cb_bound.Run(kAborted, NULL);
 179     return;
 180   }
 181
 182   DecodeBuffer(buffer, decode_cb_bound);
 183 }
 184
 185 scoped_refptr<AudioBuffer> FFmpegAudioDecoder::GetDecodeOutput() {
 186   DCHECK(task_runner_->BelongsToCurrentThread());
 187   if (queued_audio_.empty())
 188     return NULL;
 189   scoped_refptr<AudioBuffer> out = queued_audio_.front();
 190   queued_audio_.pop_front();
 191   return out;
 192 }
 193
 194 void FFmpegAudioDecoder::Reset(const base::Closure& closure) {
 195   DCHECK(task_runner_->BelongsToCurrentThread());
 196
 197   avcodec_flush_buffers(codec_context_.get());
 198   state_ = kNormal;
 199   ResetTimestampState();
 200   task_runner_->PostTask(FROM_HERE, closure);
 201 }
 202
 203 void FFmpegAudioDecoder::Stop() {
 204   DCHECK(task_runner_->BelongsToCurrentThread());
 205
 206   if (state_ == kUninitialized)
 207     return;
 208
 209   ReleaseFFmpegResources();
 210   ResetTimestampState();
 211   state_ = kUninitialized;
 212 }
 213
 214 void FFmpegAudioDecoder::DecodeBuffer(
 215     const scoped_refptr<DecoderBuffer>& buffer,
 216     const DecodeCB& decode_cb) {
 217   DCHECK(task_runner_->BelongsToCurrentThread());
 218   DCHECK_NE(state_, kUninitialized);
 219   DCHECK_NE(state_, kDecodeFinished);
 220   DCHECK_NE(state_, kError);
 221
 222   DCHECK(buffer);
 223
 224   // During decode, because reads are issued asynchronously, it is possible to
 225   // receive multiple end of stream buffers since each decode is acked. When the
 226   // first end of stream buffer is read, FFmpeg may still have frames queued
 227   // up in the decoder so we need to go through the decode loop until it stops
 228   // giving sensible data.  After that, the decoder should output empty
 229   // frames.  There are three states the decoder can be in:
 230   //
 231   //   kNormal: This is the starting state. Buffers are decoded. Decode errors
 232   //            are discarded.
 233   //   kFlushCodec: There isn't any more input data. Call avcodec_decode_audio4
 234   //                until no more data is returned to flush out remaining
 235   //                frames. The input buffer is ignored at this point.
 236   //   kDecodeFinished: All calls return empty frames.
 237   //   kError: Unexpected error happened.
 238   //
 239   // These are the possible state transitions.
 240   //
 241   // kNormal -> kFlushCodec:
 242   //     When buffer->end_of_stream() is first true.
 243   // kNormal -> kError:
 244   //     A decoding error occurs and decoding needs to stop.
 245   // kFlushCodec -> kDecodeFinished:
 246   //     When avcodec_decode_audio4() returns 0 data.
 247   // kFlushCodec -> kError:
 248   //     When avcodec_decode_audio4() errors out.
 249   // (any state) -> kNormal:
 250   //     Any time Reset() is called.
 251
 252   // Make sure we are notified if http://crbug.com/49709 returns.  Issue also
 253   // occurs with some damaged files.
 254   if (!buffer->end_of_stream() && buffer->timestamp() == kNoTimestamp()) {
 255     DVLOG(1) << "Received a buffer without timestamps!";
 256     decode_cb.Run(kDecodeError, NULL);
 257     return;
 258   }
 259
 260   if (!buffer->end_of_stream() && !discard_helper_->initialized() &&
 261       codec_context_->codec_id == AV_CODEC_ID_VORBIS &&
 262       buffer->timestamp() < base::TimeDelta()) {
 263     // Dropping frames for negative timestamps as outlined in section A.2
 264     // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
 265     const int discard_frames =
 266         discard_helper_->TimeDeltaToFrames(-buffer->timestamp());
 267     discard_helper_->Reset(discard_frames);
 268   }
 269
 270   // Transition to kFlushCodec on the first end of stream buffer.
 271   if (state_ == kNormal && buffer->end_of_stream()) {
 272     state_ = kFlushCodec;
 273   }
 274
 275   if (!FFmpegDecode(buffer)) {
 276     state_ = kError;
 277     decode_cb.Run(kDecodeError, NULL);
 278     return;
 279   }
 280
 281   if (queued_audio_.empty()) {
 282     if (state_ == kFlushCodec) {
 283       DCHECK(buffer->end_of_stream());
 284       state_ = kDecodeFinished;
 285       decode_cb.Run(kOk, AudioBuffer::CreateEOSBuffer());
 286       return;
 287     }
 288
 289     decode_cb.Run(kNotEnoughData, NULL);
 290     return;
 291   }
 292
 293   decode_cb.Run(kOk, queued_audio_.front());
 294   queued_audio_.pop_front();
 295 }
 296
 297 bool FFmpegAudioDecoder::FFmpegDecode(
 298     const scoped_refptr<DecoderBuffer>& buffer) {
 299   DCHECK(queued_audio_.empty());
 300
 301   AVPacket packet;
 302   av_init_packet(&packet);
 303   if (buffer->end_of_stream()) {
 304     packet.data = NULL;
 305     packet.size = 0;
 306   } else {
 307     packet.data = const_cast<uint8*>(buffer->data());
 308     packet.size = buffer->data_size();
 309   }
 310
 311   // Each audio packet may contain several frames, so we must call the decoder
 312   // until we've exhausted the packet.  Regardless of the packet size we always
 313   // want to hand it to the decoder at least once, otherwise we would end up
 314   // skipping end of stream packets since they have a size of zero.
 315   do {
 316     int frame_decoded = 0;
 317     const int result = avcodec_decode_audio4(
 318         codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
 319
 320     if (result < 0) {
 321       DCHECK(!buffer->end_of_stream())
 322           << "End of stream buffer produced an error! "
 323           << "This is quite possibly a bug in the audio decoder not handling "
 324           << "end of stream AVPackets correctly.";
 325
 326       DLOG(WARNING)
 327           << "Failed to decode an audio frame with timestamp: "
 328           << buffer->timestamp().InMicroseconds() << " us, duration: "
 329           << buffer->duration().InMicroseconds() << " us, packet size: "
 330           << buffer->data_size() << " bytes";
 331
 332       break;
 333     }
 334
 335     // Update packet size and data pointer in case we need to call the decoder
 336     // with the remaining bytes from this packet.
 337     packet.size -= result;
 338     packet.data += result;
 339
 340     scoped_refptr<AudioBuffer> output;
 341     const int channels = DetermineChannels(av_frame_.get());
 342     if (frame_decoded) {
 343       if (av_frame_->sample_rate != config_.samples_per_second() ||
 344           channels != ChannelLayoutToChannelCount(config_.channel_layout()) ||
 345           av_frame_->format != av_sample_format_) {
 346         DLOG(ERROR) << "Unsupported midstream configuration change!"
 347                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
 348                     << config_.samples_per_second()
 349                     << ", Channels: " << channels << " vs "
 350                     << ChannelLayoutToChannelCount(config_.channel_layout())
 351                     << ", Sample Format: " << av_frame_->format << " vs "
 352                     << av_sample_format_;
 353
 354         // This is an unrecoverable error, so bail out.
 355         queued_audio_.clear();
 356         av_frame_unref(av_frame_.get());
 357         return false;
 358       }
 359
 360       // Get the AudioBuffer that the data was decoded into. Adjust the number
 361       // of frames, in case fewer than requested were actually decoded.
 362       output = reinterpret_cast<AudioBuffer*>(
 363           av_buffer_get_opaque(av_frame_->buf[0]));
 364
 365       DCHECK_EQ(ChannelLayoutToChannelCount(config_.channel_layout()),
 366                 output->channel_count());
 367       const int unread_frames = output->frame_count() - av_frame_->nb_samples;
 368       DCHECK_GE(unread_frames, 0);
 369       if (unread_frames > 0)
 370         output->TrimEnd(unread_frames);
 371
 372       av_frame_unref(av_frame_.get());
 373     }
 374
 375     // WARNING: |av_frame_| no longer has valid data at this point.
 376     const int decoded_frames = frame_decoded ? output->frame_count() : 0;
 377     if (IsEndOfStream(result, decoded_frames, buffer)) {
 378       DCHECK_EQ(packet.size, 0);
 379       queued_audio_.push_back(AudioBuffer::CreateEOSBuffer());
 380     } else if (discard_helper_->ProcessBuffers(buffer, output)) {
 381       queued_audio_.push_back(output);
 382     }
 383   } while (packet.size > 0);
 384
 385   return true;
 386 }
 387
 388 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
 389   codec_context_.reset();
 390   av_frame_.reset();
 391 }
 392
 393 bool FFmpegAudioDecoder::ConfigureDecoder() {
 394   if (!config_.IsValidConfig()) {
 395     DLOG(ERROR) << "Invalid audio stream -"
 396                 << " codec: " << config_.codec()
 397                 << " channel layout: " << config_.channel_layout()
 398                 << " bits per channel: " << config_.bits_per_channel()
 399                 << " samples per second: " << config_.samples_per_second();
 400     return false;
 401   }
 402
 403   if (config_.is_encrypted()) {
 404     DLOG(ERROR) << "Encrypted audio stream not supported";
 405     return false;
 406   }
 407
 408   // Release existing decoder resources if necessary.
 409   ReleaseFFmpegResources();
 410
 411   // Initialize AVCodecContext structure.
 412   codec_context_.reset(avcodec_alloc_context3(NULL));
 413   AudioDecoderConfigToAVCodecContext(config_, codec_context_.get());
 414
 415   codec_context_->opaque = this;
 416   codec_context_->get_buffer2 = GetAudioBuffer;
 417   codec_context_->refcounted_frames = 1;
 418
 419   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
 420   if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
 421     DLOG(ERROR) << "Could not initialize audio decoder: "
 422                 << codec_context_->codec_id;
 423     ReleaseFFmpegResources();
 424     state_ = kUninitialized;
 425     return false;
 426   }
 427
 428   // Success!
 429   av_frame_.reset(av_frame_alloc());
 430   discard_helper_.reset(new AudioDiscardHelper(config_.samples_per_second(),
 431                                                config_.codec_delay()));
 432   av_sample_format_ = codec_context_->sample_fmt;
 433
 434   if (codec_context_->channels !=
 435       ChannelLayoutToChannelCount(config_.channel_layout())) {
 436     DLOG(ERROR) << "Audio configuration specified "
 437                 << ChannelLayoutToChannelCount(config_.channel_layout())
 438                 << " channels, but FFmpeg thinks the file contains "
 439                 << codec_context_->channels << " channels";
 440     ReleaseFFmpegResources();
 441     state_ = kUninitialized;
 442     return false;
 443   }
 444
 445   ResetTimestampState();
 446   return true;
 447 }
 448
 449 void FFmpegAudioDecoder::ResetTimestampState() {
 450   discard_helper_->Reset(config_.codec_delay());
 451 }
 452
 453 }  // namespace media