1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/filters/ffmpeg_audio_decoder.h"
8 #include "base/callback_helpers.h"
9 #include "base/location.h"
10 #include "base/single_thread_task_runner.h"
11 #include "media/base/audio_buffer.h"
12 #include "media/base/audio_bus.h"
13 #include "media/base/audio_decoder_config.h"
14 #include "media/base/audio_timestamp_helper.h"
15 #include "media/base/bind_to_current_loop.h"
16 #include "media/base/decoder_buffer.h"
17 #include "media/base/demuxer.h"
18 #include "media/base/limits.h"
19 #include "media/base/pipeline.h"
20 #include "media/base/sample_format.h"
21 #include "media/ffmpeg/ffmpeg_common.h"
22 #include "media/filters/ffmpeg_glue.h"
26 // Helper structure for managing multiple decoded audio frames per packet.
27 struct QueuedAudioBuffer
{
28 AudioDecoder::Status status
;
29 scoped_refptr
<AudioBuffer
> buffer
;
32 // Returns true if the decode result was end of stream.
33 static inline bool IsEndOfStream(int result
,
35 const scoped_refptr
<DecoderBuffer
>& input
) {
36 // Three conditions to meet to declare end of stream for this decoder:
37 // 1. FFmpeg didn't read anything.
38 // 2. FFmpeg didn't output anything.
39 // 3. An end of stream buffer is received.
40 return result
== 0 && decoded_size
== 0 && input
->end_of_stream();
43 // Return the number of channels from the data in |frame|.
44 static inline int DetermineChannels(AVFrame
* frame
) {
45 #if defined(CHROMIUM_NO_AVFRAME_CHANNELS)
46 // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field.
47 return av_get_channel_layout_nb_channels(frame
->channel_layout
);
49 return frame
->channels
;
53 // Called by FFmpeg's allocation routine to allocate a buffer. Uses
54 // AVCodecContext.opaque to get the object reference in order to call
55 // GetAudioBuffer() to do the actual allocation.
56 static int GetAudioBufferImpl(struct AVCodecContext
* s
,
59 DCHECK(s
->codec
->capabilities
& CODEC_CAP_DR1
);
60 DCHECK_EQ(s
->codec_type
, AVMEDIA_TYPE_AUDIO
);
61 FFmpegAudioDecoder
* decoder
= static_cast<FFmpegAudioDecoder
*>(s
->opaque
);
62 return decoder
->GetAudioBuffer(s
, frame
, flags
);
65 // Called by FFmpeg's allocation routine to free a buffer. |opaque| is the
66 // AudioBuffer allocated, so unref it.
67 static void ReleaseAudioBufferImpl(void* opaque
, uint8
* data
) {
68 scoped_refptr
<AudioBuffer
> buffer
;
69 buffer
.swap(reinterpret_cast<AudioBuffer
**>(&opaque
));
72 FFmpegAudioDecoder::FFmpegAudioDecoder(
73 const scoped_refptr
<base::SingleThreadTaskRunner
>& task_runner
)
74 : task_runner_(task_runner
),
76 demuxer_stream_(NULL
),
77 bytes_per_channel_(0),
78 channel_layout_(CHANNEL_LAYOUT_NONE
),
80 samples_per_second_(0),
82 last_input_timestamp_(kNoTimestamp()),
83 output_frames_to_drop_(0) {
86 void FFmpegAudioDecoder::Initialize(
87 DemuxerStream
* stream
,
88 const PipelineStatusCB
& status_cb
,
89 const StatisticsCB
& statistics_cb
) {
90 DCHECK(task_runner_
->BelongsToCurrentThread());
91 PipelineStatusCB initialize_cb
= BindToCurrentLoop(status_cb
);
93 FFmpegGlue::InitializeFFmpeg();
95 if (demuxer_stream_
) {
96 // TODO(scherkus): initialization currently happens more than once in
97 // PipelineIntegrationTest.BasicPlayback.
98 LOG(ERROR
) << "Initialize has already been called.";
102 weak_this_
= weak_factory_
.GetWeakPtr();
103 demuxer_stream_
= stream
;
105 if (!ConfigureDecoder()) {
106 status_cb
.Run(DECODER_ERROR_NOT_SUPPORTED
);
110 statistics_cb_
= statistics_cb
;
111 initialize_cb
.Run(PIPELINE_OK
);
114 void FFmpegAudioDecoder::Read(const ReadCB
& read_cb
) {
115 DCHECK(task_runner_
->BelongsToCurrentThread());
116 DCHECK(!read_cb
.is_null());
117 CHECK(read_cb_
.is_null()) << "Overlapping decodes are not supported.";
119 read_cb_
= BindToCurrentLoop(read_cb
);
121 // If we don't have any queued audio from the last packet we decoded, ask for
122 // more data from the demuxer to satisfy this read.
123 if (queued_audio_
.empty()) {
124 ReadFromDemuxerStream();
128 base::ResetAndReturn(&read_cb_
).Run(
129 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
130 queued_audio_
.pop_front();
133 int FFmpegAudioDecoder::bits_per_channel() {
134 DCHECK(task_runner_
->BelongsToCurrentThread());
135 return bytes_per_channel_
* 8;
138 ChannelLayout
FFmpegAudioDecoder::channel_layout() {
139 DCHECK(task_runner_
->BelongsToCurrentThread());
140 return channel_layout_
;
143 int FFmpegAudioDecoder::samples_per_second() {
144 DCHECK(task_runner_
->BelongsToCurrentThread());
145 return samples_per_second_
;
148 void FFmpegAudioDecoder::Reset(const base::Closure
& closure
) {
149 DCHECK(task_runner_
->BelongsToCurrentThread());
150 base::Closure reset_cb
= BindToCurrentLoop(closure
);
152 avcodec_flush_buffers(codec_context_
.get());
153 ResetTimestampState();
154 queued_audio_
.clear();
158 FFmpegAudioDecoder::~FFmpegAudioDecoder() {
159 // TODO(scherkus): should we require Stop() to be called? this might end up
160 // getting called on a random thread due to refcounting.
161 ReleaseFFmpegResources();
164 int FFmpegAudioDecoder::GetAudioBuffer(AVCodecContext
* codec
,
167 // Since this routine is called by FFmpeg when a buffer is required for audio
168 // data, use the values supplied by FFmpeg (ignoring the current settings).
169 // RunDecodeLoop() gets to determine if the buffer is useable or not.
170 AVSampleFormat format
= static_cast<AVSampleFormat
>(frame
->format
);
171 SampleFormat sample_format
= AVSampleFormatToSampleFormat(format
);
172 int channels
= DetermineChannels(frame
);
173 if ((channels
<= 0) || (channels
>= limits::kMaxChannels
)) {
174 DLOG(ERROR
) << "Requested number of channels (" << channels
175 << ") exceeds limit.";
176 return AVERROR(EINVAL
);
179 int bytes_per_channel
= SampleFormatToBytesPerChannel(sample_format
);
180 if (frame
->nb_samples
<= 0)
181 return AVERROR(EINVAL
);
183 // Determine how big the buffer should be and allocate it. FFmpeg may adjust
184 // how big each channel data is in order to meet the alignment policy, so
185 // we need to take this into consideration.
186 int buffer_size_in_bytes
=
187 av_samples_get_buffer_size(&frame
->linesize
[0],
191 AudioBuffer::kChannelAlignment
);
192 // Check for errors from av_samples_get_buffer_size().
193 if (buffer_size_in_bytes
< 0)
194 return buffer_size_in_bytes
;
195 int frames_required
= buffer_size_in_bytes
/ bytes_per_channel
/ channels
;
196 DCHECK_GE(frames_required
, frame
->nb_samples
);
197 scoped_refptr
<AudioBuffer
> buffer
=
198 AudioBuffer::CreateBuffer(sample_format
, channels
, frames_required
);
200 // Initialize the data[] and extended_data[] fields to point into the memory
201 // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved
202 // audio and equal to |channels| for planar audio.
203 int number_of_planes
= buffer
->channel_data().size();
204 if (number_of_planes
<= AV_NUM_DATA_POINTERS
) {
205 DCHECK_EQ(frame
->extended_data
, frame
->data
);
206 for (int i
= 0; i
< number_of_planes
; ++i
)
207 frame
->data
[i
] = buffer
->channel_data()[i
];
209 // There are more channels than can fit into data[], so allocate
210 // extended_data[] and fill appropriately.
211 frame
->extended_data
= static_cast<uint8
**>(
212 av_malloc(number_of_planes
* sizeof(*frame
->extended_data
)));
214 for (; i
< AV_NUM_DATA_POINTERS
; ++i
)
215 frame
->extended_data
[i
] = frame
->data
[i
] = buffer
->channel_data()[i
];
216 for (; i
< number_of_planes
; ++i
)
217 frame
->extended_data
[i
] = buffer
->channel_data()[i
];
220 // Now create an AVBufferRef for the data just allocated. It will own the
221 // reference to the AudioBuffer object.
223 buffer
.swap(reinterpret_cast<AudioBuffer
**>(&opaque
));
224 frame
->buf
[0] = av_buffer_create(
225 frame
->data
[0], buffer_size_in_bytes
, ReleaseAudioBufferImpl
, opaque
, 0);
229 void FFmpegAudioDecoder::ReadFromDemuxerStream() {
230 DCHECK(!read_cb_
.is_null());
231 demuxer_stream_
->Read(base::Bind(
232 &FFmpegAudioDecoder::BufferReady
, weak_this_
));
235 void FFmpegAudioDecoder::BufferReady(
236 DemuxerStream::Status status
,
237 const scoped_refptr
<DecoderBuffer
>& input
) {
238 DCHECK(task_runner_
->BelongsToCurrentThread());
239 DCHECK(!read_cb_
.is_null());
240 DCHECK(queued_audio_
.empty());
241 DCHECK_EQ(status
!= DemuxerStream::kOk
, !input
.get()) << status
;
243 if (status
== DemuxerStream::kAborted
) {
244 DCHECK(!input
.get());
245 base::ResetAndReturn(&read_cb_
).Run(kAborted
, NULL
);
249 if (status
== DemuxerStream::kConfigChanged
) {
250 DCHECK(!input
.get());
252 // Send a "end of stream" buffer to the decode loop
253 // to output any remaining data still in the decoder.
254 RunDecodeLoop(DecoderBuffer::CreateEOSBuffer(), true);
256 DVLOG(1) << "Config changed.";
258 if (!ConfigureDecoder()) {
259 base::ResetAndReturn(&read_cb_
).Run(kDecodeError
, NULL
);
263 ResetTimestampState();
265 if (queued_audio_
.empty()) {
266 ReadFromDemuxerStream();
270 base::ResetAndReturn(&read_cb_
).Run(
271 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
272 queued_audio_
.pop_front();
276 DCHECK_EQ(status
, DemuxerStream::kOk
);
279 // Make sure we are notified if http://crbug.com/49709 returns. Issue also
280 // occurs with some damaged files.
281 if (!input
->end_of_stream() && input
->timestamp() == kNoTimestamp() &&
282 output_timestamp_helper_
->base_timestamp() == kNoTimestamp()) {
283 DVLOG(1) << "Received a buffer without timestamps!";
284 base::ResetAndReturn(&read_cb_
).Run(kDecodeError
, NULL
);
288 if (!input
->end_of_stream()) {
289 if (last_input_timestamp_
== kNoTimestamp() &&
290 codec_context_
->codec_id
== AV_CODEC_ID_VORBIS
&&
291 input
->timestamp() < base::TimeDelta()) {
292 // Dropping frames for negative timestamps as outlined in section A.2
293 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
294 output_frames_to_drop_
= floor(
295 0.5 + -input
->timestamp().InSecondsF() * samples_per_second_
);
297 if (last_input_timestamp_
!= kNoTimestamp() &&
298 input
->timestamp() < last_input_timestamp_
) {
299 const base::TimeDelta diff
= input
->timestamp() - last_input_timestamp_
;
301 << "Input timestamps are not monotonically increasing! "
302 << " ts " << input
->timestamp().InMicroseconds() << " us"
303 << " diff " << diff
.InMicroseconds() << " us";
306 last_input_timestamp_
= input
->timestamp();
310 RunDecodeLoop(input
, false);
312 // We exhausted the provided packet, but it wasn't enough for a frame. Ask
313 // for more data in order to fulfill this read.
314 if (queued_audio_
.empty()) {
315 ReadFromDemuxerStream();
319 // Execute callback to return the first frame we decoded.
320 base::ResetAndReturn(&read_cb_
).Run(
321 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
322 queued_audio_
.pop_front();
325 bool FFmpegAudioDecoder::ConfigureDecoder() {
326 const AudioDecoderConfig
& config
= demuxer_stream_
->audio_decoder_config();
328 if (!config
.IsValidConfig()) {
329 DLOG(ERROR
) << "Invalid audio stream -"
330 << " codec: " << config
.codec()
331 << " channel layout: " << config
.channel_layout()
332 << " bits per channel: " << config
.bits_per_channel()
333 << " samples per second: " << config
.samples_per_second();
337 if (config
.is_encrypted()) {
338 DLOG(ERROR
) << "Encrypted audio stream not supported";
342 if (codec_context_
.get() &&
343 (bytes_per_channel_
!= config
.bytes_per_channel() ||
344 channel_layout_
!= config
.channel_layout() ||
345 samples_per_second_
!= config
.samples_per_second())) {
346 DVLOG(1) << "Unsupported config change :";
347 DVLOG(1) << "\tbytes_per_channel : " << bytes_per_channel_
348 << " -> " << config
.bytes_per_channel();
349 DVLOG(1) << "\tchannel_layout : " << channel_layout_
350 << " -> " << config
.channel_layout();
351 DVLOG(1) << "\tsample_rate : " << samples_per_second_
352 << " -> " << config
.samples_per_second();
356 // Release existing decoder resources if necessary.
357 ReleaseFFmpegResources();
359 // Initialize AVCodecContext structure.
360 codec_context_
.reset(avcodec_alloc_context3(NULL
));
361 AudioDecoderConfigToAVCodecContext(config
, codec_context_
.get());
363 codec_context_
->opaque
= this;
364 codec_context_
->get_buffer2
= GetAudioBufferImpl
;
365 codec_context_
->refcounted_frames
= 1;
367 AVCodec
* codec
= avcodec_find_decoder(codec_context_
->codec_id
);
368 if (!codec
|| avcodec_open2(codec_context_
.get(), codec
, NULL
) < 0) {
369 DLOG(ERROR
) << "Could not initialize audio decoder: "
370 << codec_context_
->codec_id
;
375 av_frame_
.reset(av_frame_alloc());
376 channel_layout_
= config
.channel_layout();
377 samples_per_second_
= config
.samples_per_second();
378 output_timestamp_helper_
.reset(
379 new AudioTimestampHelper(config
.samples_per_second()));
381 // Store initial values to guard against midstream configuration changes.
382 channels_
= codec_context_
->channels
;
383 if (channels_
!= ChannelLayoutToChannelCount(channel_layout_
)) {
384 DLOG(ERROR
) << "Audio configuration specified "
385 << ChannelLayoutToChannelCount(channel_layout_
)
386 << " channels, but FFmpeg thinks the file contains "
387 << channels_
<< " channels";
390 av_sample_format_
= codec_context_
->sample_fmt
;
391 sample_format_
= AVSampleFormatToSampleFormat(
392 static_cast<AVSampleFormat
>(av_sample_format_
));
393 bytes_per_channel_
= SampleFormatToBytesPerChannel(sample_format_
);
398 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
399 codec_context_
.reset();
403 void FFmpegAudioDecoder::ResetTimestampState() {
404 output_timestamp_helper_
->SetBaseTimestamp(kNoTimestamp());
405 last_input_timestamp_
= kNoTimestamp();
406 output_frames_to_drop_
= 0;
409 void FFmpegAudioDecoder::RunDecodeLoop(
410 const scoped_refptr
<DecoderBuffer
>& input
,
411 bool skip_eos_append
) {
413 av_init_packet(&packet
);
414 if (input
->end_of_stream()) {
418 packet
.data
= const_cast<uint8
*>(input
->data());
419 packet
.size
= input
->data_size();
422 // Each audio packet may contain several frames, so we must call the decoder
423 // until we've exhausted the packet. Regardless of the packet size we always
424 // want to hand it to the decoder at least once, otherwise we would end up
425 // skipping end of stream packets since they have a size of zero.
427 int frame_decoded
= 0;
428 int result
= avcodec_decode_audio4(
429 codec_context_
.get(), av_frame_
.get(), &frame_decoded
, &packet
);
432 DCHECK(!input
->end_of_stream())
433 << "End of stream buffer produced an error! "
434 << "This is quite possibly a bug in the audio decoder not handling "
435 << "end of stream AVPackets correctly.";
438 << "Failed to decode an audio frame with timestamp: "
439 << input
->timestamp().InMicroseconds() << " us, duration: "
440 << input
->duration().InMicroseconds() << " us, packet size: "
441 << input
->data_size() << " bytes";
446 // Update packet size and data pointer in case we need to call the decoder
447 // with the remaining bytes from this packet.
448 packet
.size
-= result
;
449 packet
.data
+= result
;
451 if (output_timestamp_helper_
->base_timestamp() == kNoTimestamp() &&
452 !input
->end_of_stream()) {
453 DCHECK(input
->timestamp() != kNoTimestamp());
454 if (output_frames_to_drop_
> 0) {
455 // Currently Vorbis is the only codec that causes us to drop samples.
456 // If we have to drop samples it always means the timeline starts at 0.
457 DCHECK_EQ(codec_context_
->codec_id
, AV_CODEC_ID_VORBIS
);
458 output_timestamp_helper_
->SetBaseTimestamp(base::TimeDelta());
460 output_timestamp_helper_
->SetBaseTimestamp(input
->timestamp());
464 scoped_refptr
<AudioBuffer
> output
;
465 int decoded_frames
= 0;
466 int original_frames
= 0;
467 int channels
= DetermineChannels(av_frame_
.get());
469 if (av_frame_
->sample_rate
!= samples_per_second_
||
470 channels
!= channels_
||
471 av_frame_
->format
!= av_sample_format_
) {
472 DLOG(ERROR
) << "Unsupported midstream configuration change!"
473 << " Sample Rate: " << av_frame_
->sample_rate
<< " vs "
474 << samples_per_second_
475 << ", Channels: " << channels
<< " vs "
477 << ", Sample Format: " << av_frame_
->format
<< " vs "
478 << av_sample_format_
;
480 // This is an unrecoverable error, so bail out.
481 QueuedAudioBuffer queue_entry
= { kDecodeError
, NULL
};
482 queued_audio_
.push_back(queue_entry
);
483 av_frame_unref(av_frame_
.get());
487 // Get the AudioBuffer that the data was decoded into. Adjust the number
488 // of frames, in case fewer than requested were actually decoded.
489 output
= reinterpret_cast<AudioBuffer
*>(
490 av_buffer_get_opaque(av_frame_
->buf
[0]));
491 DCHECK_EQ(channels_
, output
->channel_count());
492 original_frames
= av_frame_
->nb_samples
;
493 int unread_frames
= output
->frame_count() - original_frames
;
494 DCHECK_GE(unread_frames
, 0);
495 if (unread_frames
> 0)
496 output
->TrimEnd(unread_frames
);
498 // If there are frames to drop, get rid of as many as we can.
499 if (output_frames_to_drop_
> 0) {
500 int drop
= std::min(output
->frame_count(), output_frames_to_drop_
);
501 output
->TrimStart(drop
);
502 output_frames_to_drop_
-= drop
;
505 decoded_frames
= output
->frame_count();
506 av_frame_unref(av_frame_
.get());
509 // WARNING: |av_frame_| no longer has valid data at this point.
511 if (decoded_frames
> 0) {
512 // Set the timestamp/duration once all the extra frames have been
514 output
->set_timestamp(output_timestamp_helper_
->GetTimestamp());
515 output
->set_duration(
516 output_timestamp_helper_
->GetFrameDuration(decoded_frames
));
517 output_timestamp_helper_
->AddFrames(decoded_frames
);
518 } else if (IsEndOfStream(result
, original_frames
, input
) &&
520 DCHECK_EQ(packet
.size
, 0);
521 output
= AudioBuffer::CreateEOSBuffer();
523 // In case all the frames in the buffer were dropped.
528 QueuedAudioBuffer queue_entry
= { kOk
, output
};
529 queued_audio_
.push_back(queue_entry
);
532 // Decoding finished successfully, update statistics.
534 PipelineStatistics statistics
;
535 statistics
.audio_bytes_decoded
= result
;
536 statistics_cb_
.Run(statistics
);
538 } while (packet
.size
> 0);