media/filters/ffmpeg_audio_decoder.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/filters/ffmpeg_audio_decoder.h"
   6
   7 #include "base/callback_helpers.h"
   8 #include "base/single_thread_task_runner.h"
   9 #include "media/base/audio_buffer.h"
  10 #include "media/base/audio_bus.h"
  11 #include "media/base/audio_decoder_config.h"
  12 #include "media/base/audio_discard_helper.h"
  13 #include "media/base/bind_to_current_loop.h"
  14 #include "media/base/decoder_buffer.h"
  15 #include "media/base/limits.h"
  16 #include "media/ffmpeg/ffmpeg_common.h"
  17 #include "media/filters/ffmpeg_glue.h"
  18
  19 namespace media {
  20
  21 // Returns true if the decode result was end of stream.
  22 static inline bool IsEndOfStream(int result,
  23                                  int decoded_size,
  24                                  const scoped_refptr<DecoderBuffer>& input) {
  25   // Three conditions to meet to declare end of stream for this decoder:
  26   // 1. FFmpeg didn't read anything.
  27   // 2. FFmpeg didn't output anything.
  28   // 3. An end of stream buffer is received.
  29   return result == 0 && decoded_size == 0 && input->end_of_stream();
  30 }
  31
  32 // Return the number of channels from the data in |frame|.
  33 static inline int DetermineChannels(AVFrame* frame) {
  34 #if defined(CHROMIUM_NO_AVFRAME_CHANNELS)
  35   // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field.
  36   return av_get_channel_layout_nb_channels(frame->channel_layout);
  37 #else
  38   return frame->channels;
  39 #endif
  40 }
  41
  42 // Called by FFmpeg's allocation routine to free a buffer. |opaque| is the
  43 // AudioBuffer allocated, so unref it.
  44 static void ReleaseAudioBufferImpl(void* opaque, uint8* data) {
  45   scoped_refptr<AudioBuffer> buffer;
  46   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
  47 }
  48
  49 // Called by FFmpeg's allocation routine to allocate a buffer. Uses
  50 // AVCodecContext.opaque to get the object reference in order to call
  51 // GetAudioBuffer() to do the actual allocation.
  52 static int GetAudioBuffer(struct AVCodecContext* s, AVFrame* frame, int flags) {
  53   DCHECK(s->codec->capabilities & CODEC_CAP_DR1);
  54   DCHECK_EQ(s->codec_type, AVMEDIA_TYPE_AUDIO);
  55
  56   // Since this routine is called by FFmpeg when a buffer is required for audio
  57   // data, use the values supplied by FFmpeg (ignoring the current settings).
  58   // FFmpegDecode() gets to determine if the buffer is useable or not.
  59   AVSampleFormat format = static_cast<AVSampleFormat>(frame->format);
  60   SampleFormat sample_format = AVSampleFormatToSampleFormat(format);
  61   int channels = DetermineChannels(frame);
  62   if (channels <= 0 || channels >= limits::kMaxChannels) {
  63     DLOG(ERROR) << "Requested number of channels (" << channels
  64                 << ") exceeds limit.";
  65     return AVERROR(EINVAL);
  66   }
  67
  68   int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
  69   if (frame->nb_samples <= 0)
  70     return AVERROR(EINVAL);
  71
  72   if (s->channels != channels) {
  73     DLOG(ERROR) << "AVCodecContext and AVFrame disagree on channel count.";
  74     return AVERROR(EINVAL);
  75   }
  76
  77   // Determine how big the buffer should be and allocate it. FFmpeg may adjust
  78   // how big each channel data is in order to meet the alignment policy, so
  79   // we need to take this into consideration.
  80   int buffer_size_in_bytes =
  81       av_samples_get_buffer_size(&frame->linesize[0],
  82                                  channels,
  83                                  frame->nb_samples,
  84                                  format,
  85                                  AudioBuffer::kChannelAlignment);
  86   // Check for errors from av_samples_get_buffer_size().
  87   if (buffer_size_in_bytes < 0)
  88     return buffer_size_in_bytes;
  89   int frames_required = buffer_size_in_bytes / bytes_per_channel / channels;
  90   DCHECK_GE(frames_required, frame->nb_samples);
  91   scoped_refptr<AudioBuffer> buffer = AudioBuffer::CreateBuffer(
  92       sample_format,
  93       ChannelLayoutToChromeChannelLayout(s->channel_layout, s->channels),
  94       channels,
  95       s->sample_rate,
  96       frames_required);
  97
  98   // Initialize the data[] and extended_data[] fields to point into the memory
  99   // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved
 100   // audio and equal to |channels| for planar audio.
 101   int number_of_planes = buffer->channel_data().size();
 102   if (number_of_planes <= AV_NUM_DATA_POINTERS) {
 103     DCHECK_EQ(frame->extended_data, frame->data);
 104     for (int i = 0; i < number_of_planes; ++i)
 105       frame->data[i] = buffer->channel_data()[i];
 106   } else {
 107     // There are more channels than can fit into data[], so allocate
 108     // extended_data[] and fill appropriately.
 109     frame->extended_data = static_cast<uint8**>(
 110         av_malloc(number_of_planes * sizeof(*frame->extended_data)));
 111     int i = 0;
 112     for (; i < AV_NUM_DATA_POINTERS; ++i)
 113       frame->extended_data[i] = frame->data[i] = buffer->channel_data()[i];
 114     for (; i < number_of_planes; ++i)
 115       frame->extended_data[i] = buffer->channel_data()[i];
 116   }
 117
 118   // Now create an AVBufferRef for the data just allocated. It will own the
 119   // reference to the AudioBuffer object.
 120   void* opaque = NULL;
 121   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
 122   frame->buf[0] = av_buffer_create(
 123       frame->data[0], buffer_size_in_bytes, ReleaseAudioBufferImpl, opaque, 0);
 124   return 0;
 125 }
 126
 127 FFmpegAudioDecoder::FFmpegAudioDecoder(
 128     const scoped_refptr<base::SingleThreadTaskRunner>& task_runner,
 129     const LogCB& log_cb)
 130     : task_runner_(task_runner),
 131       state_(kUninitialized),
 132       av_sample_format_(0),
 133       log_cb_(log_cb) {
 134 }
 135
 136 FFmpegAudioDecoder::~FFmpegAudioDecoder() {
 137   DCHECK(task_runner_->BelongsToCurrentThread());
 138
 139   if (state_ != kUninitialized) {
 140     ReleaseFFmpegResources();
 141     ResetTimestampState();
 142   }
 143 }
 144
 145 std::string FFmpegAudioDecoder::GetDisplayName() const {
 146   return "FFmpegAudioDecoder";
 147 }
 148
 149 void FFmpegAudioDecoder::Initialize(const AudioDecoderConfig& config,
 150                                     const InitCB& init_cb,
 151                                     const OutputCB& output_cb) {
 152   DCHECK(task_runner_->BelongsToCurrentThread());
 153   DCHECK(!config.is_encrypted());
 154
 155   FFmpegGlue::InitializeFFmpeg();
 156
 157   config_ = config;
 158   InitCB bound_init_cb = BindToCurrentLoop(init_cb);
 159
 160   if (!config.IsValidConfig() || !ConfigureDecoder()) {
 161     bound_init_cb.Run(false);
 162     return;
 163   }
 164
 165   // Success!
 166   output_cb_ = BindToCurrentLoop(output_cb);
 167   state_ = kNormal;
 168   bound_init_cb.Run(true);
 169 }
 170
 171 void FFmpegAudioDecoder::Decode(const scoped_refptr<DecoderBuffer>& buffer,
 172                                 const DecodeCB& decode_cb) {
 173   DCHECK(task_runner_->BelongsToCurrentThread());
 174   DCHECK(!decode_cb.is_null());
 175   CHECK_NE(state_, kUninitialized);
 176   DecodeCB decode_cb_bound = BindToCurrentLoop(decode_cb);
 177
 178   if (state_ == kError) {
 179     decode_cb_bound.Run(kDecodeError);
 180     return;
 181   }
 182
 183   // Do nothing if decoding has finished.
 184   if (state_ == kDecodeFinished) {
 185     decode_cb_bound.Run(kOk);
 186     return;
 187   }
 188
 189   DecodeBuffer(buffer, decode_cb_bound);
 190 }
 191
 192 void FFmpegAudioDecoder::Reset(const base::Closure& closure) {
 193   DCHECK(task_runner_->BelongsToCurrentThread());
 194
 195   avcodec_flush_buffers(codec_context_.get());
 196   state_ = kNormal;
 197   ResetTimestampState();
 198   task_runner_->PostTask(FROM_HERE, closure);
 199 }
 200
 201 void FFmpegAudioDecoder::DecodeBuffer(
 202     const scoped_refptr<DecoderBuffer>& buffer,
 203     const DecodeCB& decode_cb) {
 204   DCHECK(task_runner_->BelongsToCurrentThread());
 205   DCHECK_NE(state_, kUninitialized);
 206   DCHECK_NE(state_, kDecodeFinished);
 207   DCHECK_NE(state_, kError);
 208   DCHECK(buffer.get());
 209
 210   // Make sure we are notified if http://crbug.com/49709 returns.  Issue also
 211   // occurs with some damaged files.
 212   if (!buffer->end_of_stream() && buffer->timestamp() == kNoTimestamp()) {
 213     DVLOG(1) << "Received a buffer without timestamps!";
 214     decode_cb.Run(kDecodeError);
 215     return;
 216   }
 217
 218   bool has_produced_frame;
 219   do {
 220     has_produced_frame = false;
 221     if (!FFmpegDecode(buffer, &has_produced_frame)) {
 222       state_ = kError;
 223       decode_cb.Run(kDecodeError);
 224       return;
 225     }
 226     // Repeat to flush the decoder after receiving EOS buffer.
 227   } while (buffer->end_of_stream() && has_produced_frame);
 228
 229   if (buffer->end_of_stream())
 230     state_ = kDecodeFinished;
 231
 232   decode_cb.Run(kOk);
 233 }
 234
 235 bool FFmpegAudioDecoder::FFmpegDecode(
 236     const scoped_refptr<DecoderBuffer>& buffer,
 237     bool* has_produced_frame) {
 238   DCHECK(!*has_produced_frame);
 239
 240   AVPacket packet;
 241   av_init_packet(&packet);
 242   if (buffer->end_of_stream()) {
 243     packet.data = NULL;
 244     packet.size = 0;
 245   } else {
 246     packet.data = const_cast<uint8*>(buffer->data());
 247     packet.size = buffer->data_size();
 248   }
 249
 250   // Each audio packet may contain several frames, so we must call the decoder
 251   // until we've exhausted the packet.  Regardless of the packet size we always
 252   // want to hand it to the decoder at least once, otherwise we would end up
 253   // skipping end of stream packets since they have a size of zero.
 254   do {
 255     int frame_decoded = 0;
 256     const int result = avcodec_decode_audio4(
 257         codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
 258
 259     if (result < 0) {
 260       DCHECK(!buffer->end_of_stream())
 261           << "End of stream buffer produced an error! "
 262           << "This is quite possibly a bug in the audio decoder not handling "
 263           << "end of stream AVPackets correctly.";
 264
 265       MEDIA_LOG(DEBUG, log_cb_)
 266           << "Dropping audio frame which failed decode with timestamp: "
 267           << buffer->timestamp().InMicroseconds()
 268           << " us, duration: " << buffer->duration().InMicroseconds()
 269           << " us, packet size: " << buffer->data_size() << " bytes";
 270
 271       break;
 272     }
 273
 274     // Update packet size and data pointer in case we need to call the decoder
 275     // with the remaining bytes from this packet.
 276     packet.size -= result;
 277     packet.data += result;
 278
 279     scoped_refptr<AudioBuffer> output;
 280     const int channels = DetermineChannels(av_frame_.get());
 281     if (frame_decoded) {
 282       if (av_frame_->sample_rate != config_.samples_per_second() ||
 283           channels != ChannelLayoutToChannelCount(config_.channel_layout()) ||
 284           av_frame_->format != av_sample_format_) {
 285         DLOG(ERROR) << "Unsupported midstream configuration change!"
 286                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
 287                     << config_.samples_per_second()
 288                     << ", Channels: " << channels << " vs "
 289                     << ChannelLayoutToChannelCount(config_.channel_layout())
 290                     << ", Sample Format: " << av_frame_->format << " vs "
 291                     << av_sample_format_;
 292
 293         if (config_.codec() == kCodecAAC &&
 294             av_frame_->sample_rate == 2 * config_.samples_per_second()) {
 295           MEDIA_LOG(DEBUG, log_cb_) << "Implicit HE-AAC signalling is being"
 296                                     << " used. Please use mp4a.40.5 instead of"
 297                                     << " mp4a.40.2 in the mimetype.";
 298         }
 299         // This is an unrecoverable error, so bail out.
 300         av_frame_unref(av_frame_.get());
 301         return false;
 302       }
 303
 304       // Get the AudioBuffer that the data was decoded into. Adjust the number
 305       // of frames, in case fewer than requested were actually decoded.
 306       output = reinterpret_cast<AudioBuffer*>(
 307           av_buffer_get_opaque(av_frame_->buf[0]));
 308
 309       DCHECK_EQ(ChannelLayoutToChannelCount(config_.channel_layout()),
 310                 output->channel_count());
 311       const int unread_frames = output->frame_count() - av_frame_->nb_samples;
 312       DCHECK_GE(unread_frames, 0);
 313       if (unread_frames > 0)
 314         output->TrimEnd(unread_frames);
 315       av_frame_unref(av_frame_.get());
 316     }
 317
 318     // WARNING: |av_frame_| no longer has valid data at this point.
 319     const int decoded_frames = frame_decoded ? output->frame_count() : 0;
 320     if (IsEndOfStream(result, decoded_frames, buffer)) {
 321       DCHECK_EQ(packet.size, 0);
 322     } else if (discard_helper_->ProcessBuffers(buffer, output)) {
 323       *has_produced_frame = true;
 324       output_cb_.Run(output);
 325     }
 326   } while (packet.size > 0);
 327
 328   return true;
 329 }
 330
 331 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
 332   codec_context_.reset();
 333   av_frame_.reset();
 334 }
 335
 336 bool FFmpegAudioDecoder::ConfigureDecoder() {
 337   if (!config_.IsValidConfig()) {
 338     DLOG(ERROR) << "Invalid audio stream -"
 339                 << " codec: " << config_.codec()
 340                 << " channel layout: " << config_.channel_layout()
 341                 << " bits per channel: " << config_.bits_per_channel()
 342                 << " samples per second: " << config_.samples_per_second();
 343     return false;
 344   }
 345
 346   if (config_.is_encrypted()) {
 347     DLOG(ERROR) << "Encrypted audio stream not supported";
 348     return false;
 349   }
 350
 351   // Release existing decoder resources if necessary.
 352   ReleaseFFmpegResources();
 353
 354   // Initialize AVCodecContext structure.
 355   codec_context_.reset(avcodec_alloc_context3(NULL));
 356   AudioDecoderConfigToAVCodecContext(config_, codec_context_.get());
 357
 358   codec_context_->opaque = this;
 359   codec_context_->get_buffer2 = GetAudioBuffer;
 360   codec_context_->refcounted_frames = 1;
 361
 362   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
 363   if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
 364     DLOG(ERROR) << "Could not initialize audio decoder: "
 365                 << codec_context_->codec_id;
 366     ReleaseFFmpegResources();
 367     state_ = kUninitialized;
 368     return false;
 369   }
 370
 371   // Success!
 372   av_frame_.reset(av_frame_alloc());
 373   discard_helper_.reset(new AudioDiscardHelper(config_.samples_per_second(),
 374                                                config_.codec_delay()));
 375   av_sample_format_ = codec_context_->sample_fmt;
 376
 377   if (codec_context_->channels !=
 378       ChannelLayoutToChannelCount(config_.channel_layout())) {
 379     DLOG(ERROR) << "Audio configuration specified "
 380                 << ChannelLayoutToChannelCount(config_.channel_layout())
 381                 << " channels, but FFmpeg thinks the file contains "
 382                 << codec_context_->channels << " channels";
 383     ReleaseFFmpegResources();
 384     state_ = kUninitialized;
 385     return false;
 386   }
 387
 388   ResetTimestampState();
 389   return true;
 390 }
 391
 392 void FFmpegAudioDecoder::ResetTimestampState() {
 393   discard_helper_->Reset(config_.codec_delay());
 394 }
 395
 396 }  // namespace media