1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/filters/ffmpeg_audio_decoder.h"
8 #include "base/callback_helpers.h"
9 #include "base/location.h"
10 #include "base/message_loop/message_loop_proxy.h"
11 #include "media/base/audio_buffer.h"
12 #include "media/base/audio_bus.h"
13 #include "media/base/audio_decoder_config.h"
14 #include "media/base/audio_timestamp_helper.h"
15 #include "media/base/bind_to_loop.h"
16 #include "media/base/decoder_buffer.h"
17 #include "media/base/demuxer.h"
18 #include "media/base/pipeline.h"
19 #include "media/base/sample_format.h"
20 #include "media/ffmpeg/ffmpeg_common.h"
21 #include "media/filters/ffmpeg_glue.h"
25 // Helper structure for managing multiple decoded audio frames per packet.
26 struct QueuedAudioBuffer
{
27 AudioDecoder::Status status
;
28 scoped_refptr
<AudioBuffer
> buffer
;
31 // Returns true if the decode result was end of stream.
32 static inline bool IsEndOfStream(int result
,
34 const scoped_refptr
<DecoderBuffer
>& input
) {
35 // Three conditions to meet to declare end of stream for this decoder:
36 // 1. FFmpeg didn't read anything.
37 // 2. FFmpeg didn't output anything.
38 // 3. An end of stream buffer is received.
39 return result
== 0 && decoded_size
== 0 && input
->end_of_stream();
42 // Return the number of channels from the data in |frame|.
43 static inline int DetermineChannels(AVFrame
* frame
) {
44 #if defined(CHROMIUM_NO_AVFRAME_CHANNELS)
45 // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field.
46 return av_get_channel_layout_nb_channels(frame
->channel_layout
);
48 return frame
->channels
;
52 // Called by FFmpeg's allocation routine to allocate a buffer. Uses
53 // AVCodecContext.opaque to get the object reference in order to call
54 // GetAudioBuffer() to do the actual allocation.
55 static int GetAudioBufferImpl(struct AVCodecContext
* s
,
58 DCHECK(s
->codec
->capabilities
& CODEC_CAP_DR1
);
59 DCHECK_EQ(s
->codec_type
, AVMEDIA_TYPE_AUDIO
);
60 FFmpegAudioDecoder
* decoder
= static_cast<FFmpegAudioDecoder
*>(s
->opaque
);
61 return decoder
->GetAudioBuffer(s
, frame
, flags
);
64 // Called by FFmpeg's allocation routine to free a buffer. |opaque| is the
65 // AudioBuffer allocated, so unref it.
66 static void ReleaseAudioBufferImpl(void* opaque
, uint8
* data
) {
67 scoped_refptr
<AudioBuffer
> buffer
;
68 buffer
.swap(reinterpret_cast<AudioBuffer
**>(&opaque
));
71 FFmpegAudioDecoder::FFmpegAudioDecoder(
72 const scoped_refptr
<base::MessageLoopProxy
>& message_loop
)
73 : message_loop_(message_loop
),
75 demuxer_stream_(NULL
),
77 bytes_per_channel_(0),
78 channel_layout_(CHANNEL_LAYOUT_NONE
),
80 samples_per_second_(0),
82 last_input_timestamp_(kNoTimestamp()),
83 output_frames_to_drop_(0),
87 void FFmpegAudioDecoder::Initialize(
88 DemuxerStream
* stream
,
89 const PipelineStatusCB
& status_cb
,
90 const StatisticsCB
& statistics_cb
) {
91 DCHECK(message_loop_
->BelongsToCurrentThread());
92 PipelineStatusCB initialize_cb
= BindToCurrentLoop(status_cb
);
94 FFmpegGlue::InitializeFFmpeg();
96 if (demuxer_stream_
) {
97 // TODO(scherkus): initialization currently happens more than once in
98 // PipelineIntegrationTest.BasicPlayback.
99 LOG(ERROR
) << "Initialize has already been called.";
103 weak_this_
= weak_factory_
.GetWeakPtr();
104 demuxer_stream_
= stream
;
106 if (!ConfigureDecoder()) {
107 status_cb
.Run(DECODER_ERROR_NOT_SUPPORTED
);
111 statistics_cb_
= statistics_cb
;
112 initialize_cb
.Run(PIPELINE_OK
);
115 void FFmpegAudioDecoder::Read(const ReadCB
& read_cb
) {
116 DCHECK(message_loop_
->BelongsToCurrentThread());
117 DCHECK(!read_cb
.is_null());
118 CHECK(read_cb_
.is_null()) << "Overlapping decodes are not supported.";
120 read_cb_
= BindToCurrentLoop(read_cb
);
122 // If we don't have any queued audio from the last packet we decoded, ask for
123 // more data from the demuxer to satisfy this read.
124 if (queued_audio_
.empty()) {
125 ReadFromDemuxerStream();
129 base::ResetAndReturn(&read_cb_
).Run(
130 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
131 queued_audio_
.pop_front();
134 int FFmpegAudioDecoder::bits_per_channel() {
135 DCHECK(message_loop_
->BelongsToCurrentThread());
136 return bytes_per_channel_
* 8;
139 ChannelLayout
FFmpegAudioDecoder::channel_layout() {
140 DCHECK(message_loop_
->BelongsToCurrentThread());
141 return channel_layout_
;
144 int FFmpegAudioDecoder::samples_per_second() {
145 DCHECK(message_loop_
->BelongsToCurrentThread());
146 return samples_per_second_
;
149 void FFmpegAudioDecoder::Reset(const base::Closure
& closure
) {
150 DCHECK(message_loop_
->BelongsToCurrentThread());
151 base::Closure reset_cb
= BindToCurrentLoop(closure
);
153 avcodec_flush_buffers(codec_context_
);
154 ResetTimestampState();
155 queued_audio_
.clear();
159 FFmpegAudioDecoder::~FFmpegAudioDecoder() {
160 // TODO(scherkus): should we require Stop() to be called? this might end up
161 // getting called on a random thread due to refcounting.
162 ReleaseFFmpegResources();
165 int FFmpegAudioDecoder::GetAudioBuffer(AVCodecContext
* codec
,
168 // Since this routine is called by FFmpeg when a buffer is required for audio
169 // data, use the values supplied by FFmpeg (ignoring the current settings).
170 // RunDecodeLoop() gets to determine if the buffer is useable or not.
171 AVSampleFormat format
= static_cast<AVSampleFormat
>(frame
->format
);
172 SampleFormat sample_format
= AVSampleFormatToSampleFormat(format
);
173 int channels
= DetermineChannels(frame
);
174 int bytes_per_channel
= SampleFormatToBytesPerChannel(sample_format
);
175 if (frame
->nb_samples
<= 0)
176 return AVERROR(EINVAL
);
178 // Determine how big the buffer should be and allocate it. FFmpeg may adjust
179 // how big each channel data is in order to meet the alignment policy, so
180 // we need to take this into consideration.
181 int buffer_size_in_bytes
=
182 av_samples_get_buffer_size(&frame
->linesize
[0],
186 AudioBuffer::kChannelAlignment
);
187 int frames_required
= buffer_size_in_bytes
/ bytes_per_channel
/ channels
;
188 DCHECK_GE(frames_required
, frame
->nb_samples
);
189 scoped_refptr
<AudioBuffer
> buffer
=
190 AudioBuffer::CreateBuffer(sample_format
, channels
, frames_required
);
192 // Initialize the data[] and extended_data[] fields to point into the memory
193 // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved
194 // audio and equal to |channels| for planar audio.
195 int number_of_planes
= buffer
->channel_data().size();
196 if (number_of_planes
<= AV_NUM_DATA_POINTERS
) {
197 DCHECK_EQ(frame
->extended_data
, frame
->data
);
198 for (int i
= 0; i
< number_of_planes
; ++i
)
199 frame
->data
[i
] = buffer
->channel_data()[i
];
201 // There are more channels than can fit into data[], so allocate
202 // extended_data[] and fill appropriately.
203 frame
->extended_data
= static_cast<uint8
**>(
204 av_malloc(number_of_planes
* sizeof(*frame
->extended_data
)));
206 for (; i
< AV_NUM_DATA_POINTERS
; ++i
)
207 frame
->extended_data
[i
] = frame
->data
[i
] = buffer
->channel_data()[i
];
208 for (; i
< number_of_planes
; ++i
)
209 frame
->extended_data
[i
] = buffer
->channel_data()[i
];
212 // Now create an AVBufferRef for the data just allocated. It will own the
213 // reference to the AudioBuffer object.
215 buffer
.swap(reinterpret_cast<AudioBuffer
**>(&opaque
));
216 frame
->buf
[0] = av_buffer_create(
217 frame
->data
[0], buffer_size_in_bytes
, ReleaseAudioBufferImpl
, opaque
, 0);
221 void FFmpegAudioDecoder::ReadFromDemuxerStream() {
222 DCHECK(!read_cb_
.is_null());
223 demuxer_stream_
->Read(base::Bind(
224 &FFmpegAudioDecoder::BufferReady
, weak_this_
));
227 void FFmpegAudioDecoder::BufferReady(
228 DemuxerStream::Status status
,
229 const scoped_refptr
<DecoderBuffer
>& input
) {
230 DCHECK(message_loop_
->BelongsToCurrentThread());
231 DCHECK(!read_cb_
.is_null());
232 DCHECK(queued_audio_
.empty());
233 DCHECK_EQ(status
!= DemuxerStream::kOk
, !input
.get()) << status
;
235 if (status
== DemuxerStream::kAborted
) {
236 DCHECK(!input
.get());
237 base::ResetAndReturn(&read_cb_
).Run(kAborted
, NULL
);
241 if (status
== DemuxerStream::kConfigChanged
) {
242 DCHECK(!input
.get());
244 // Send a "end of stream" buffer to the decode loop
245 // to output any remaining data still in the decoder.
246 RunDecodeLoop(DecoderBuffer::CreateEOSBuffer(), true);
248 DVLOG(1) << "Config changed.";
250 if (!ConfigureDecoder()) {
251 base::ResetAndReturn(&read_cb_
).Run(kDecodeError
, NULL
);
255 ResetTimestampState();
257 if (queued_audio_
.empty()) {
258 ReadFromDemuxerStream();
262 base::ResetAndReturn(&read_cb_
).Run(
263 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
264 queued_audio_
.pop_front();
268 DCHECK_EQ(status
, DemuxerStream::kOk
);
271 // Make sure we are notified if http://crbug.com/49709 returns. Issue also
272 // occurs with some damaged files.
273 if (!input
->end_of_stream() && input
->timestamp() == kNoTimestamp() &&
274 output_timestamp_helper_
->base_timestamp() == kNoTimestamp()) {
275 DVLOG(1) << "Received a buffer without timestamps!";
276 base::ResetAndReturn(&read_cb_
).Run(kDecodeError
, NULL
);
280 bool is_vorbis
= codec_context_
->codec_id
== AV_CODEC_ID_VORBIS
;
281 if (!input
->end_of_stream()) {
282 if (last_input_timestamp_
== kNoTimestamp()) {
283 if (is_vorbis
&& (input
->timestamp() < base::TimeDelta())) {
284 // Dropping frames for negative timestamps as outlined in section A.2
285 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
286 output_frames_to_drop_
= floor(
287 0.5 + -input
->timestamp().InSecondsF() * samples_per_second_
);
289 last_input_timestamp_
= input
->timestamp();
291 } else if (input
->timestamp() != kNoTimestamp()) {
292 if (input
->timestamp() < last_input_timestamp_
) {
293 base::TimeDelta diff
= input
->timestamp() - last_input_timestamp_
;
294 DVLOG(1) << "Input timestamps are not monotonically increasing! "
295 << " ts " << input
->timestamp().InMicroseconds() << " us"
296 << " diff " << diff
.InMicroseconds() << " us";
297 base::ResetAndReturn(&read_cb_
).Run(kDecodeError
, NULL
);
301 last_input_timestamp_
= input
->timestamp();
305 RunDecodeLoop(input
, false);
307 // We exhausted the provided packet, but it wasn't enough for a frame. Ask
308 // for more data in order to fulfill this read.
309 if (queued_audio_
.empty()) {
310 ReadFromDemuxerStream();
314 // Execute callback to return the first frame we decoded.
315 base::ResetAndReturn(&read_cb_
).Run(
316 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
317 queued_audio_
.pop_front();
320 bool FFmpegAudioDecoder::ConfigureDecoder() {
321 const AudioDecoderConfig
& config
= demuxer_stream_
->audio_decoder_config();
323 if (!config
.IsValidConfig()) {
324 DLOG(ERROR
) << "Invalid audio stream -"
325 << " codec: " << config
.codec()
326 << " channel layout: " << config
.channel_layout()
327 << " bits per channel: " << config
.bits_per_channel()
328 << " samples per second: " << config
.samples_per_second();
332 if (config
.is_encrypted()) {
333 DLOG(ERROR
) << "Encrypted audio stream not supported";
337 if (codec_context_
&&
338 (bytes_per_channel_
!= config
.bytes_per_channel() ||
339 channel_layout_
!= config
.channel_layout() ||
340 samples_per_second_
!= config
.samples_per_second())) {
341 DVLOG(1) << "Unsupported config change :";
342 DVLOG(1) << "\tbytes_per_channel : " << bytes_per_channel_
343 << " -> " << config
.bytes_per_channel();
344 DVLOG(1) << "\tchannel_layout : " << channel_layout_
345 << " -> " << config
.channel_layout();
346 DVLOG(1) << "\tsample_rate : " << samples_per_second_
347 << " -> " << config
.samples_per_second();
351 // Release existing decoder resources if necessary.
352 ReleaseFFmpegResources();
354 // Initialize AVCodecContext structure.
355 codec_context_
= avcodec_alloc_context3(NULL
);
356 AudioDecoderConfigToAVCodecContext(config
, codec_context_
);
358 codec_context_
->opaque
= this;
359 codec_context_
->get_buffer2
= GetAudioBufferImpl
;
361 AVCodec
* codec
= avcodec_find_decoder(codec_context_
->codec_id
);
362 if (!codec
|| avcodec_open2(codec_context_
, codec
, NULL
) < 0) {
363 DLOG(ERROR
) << "Could not initialize audio decoder: "
364 << codec_context_
->codec_id
;
369 av_frame_
= avcodec_alloc_frame();
370 channel_layout_
= config
.channel_layout();
371 samples_per_second_
= config
.samples_per_second();
372 output_timestamp_helper_
.reset(
373 new AudioTimestampHelper(config
.samples_per_second()));
375 // Store initial values to guard against midstream configuration changes.
376 channels_
= codec_context_
->channels
;
377 if (channels_
!= ChannelLayoutToChannelCount(channel_layout_
)) {
378 DLOG(ERROR
) << "Audio configuration specified "
379 << ChannelLayoutToChannelCount(channel_layout_
)
380 << " channels, but FFmpeg thinks the file contains "
381 << channels_
<< " channels";
384 av_sample_format_
= codec_context_
->sample_fmt
;
385 sample_format_
= AVSampleFormatToSampleFormat(
386 static_cast<AVSampleFormat
>(av_sample_format_
));
387 bytes_per_channel_
= SampleFormatToBytesPerChannel(sample_format_
);
392 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
393 if (codec_context_
) {
394 av_free(codec_context_
->extradata
);
395 avcodec_close(codec_context_
);
396 av_free(codec_context_
);
405 void FFmpegAudioDecoder::ResetTimestampState() {
406 output_timestamp_helper_
->SetBaseTimestamp(kNoTimestamp());
407 last_input_timestamp_
= kNoTimestamp();
408 output_frames_to_drop_
= 0;
411 void FFmpegAudioDecoder::RunDecodeLoop(
412 const scoped_refptr
<DecoderBuffer
>& input
,
413 bool skip_eos_append
) {
415 av_init_packet(&packet
);
416 if (input
->end_of_stream()) {
420 packet
.data
= const_cast<uint8
*>(input
->data());
421 packet
.size
= input
->data_size();
424 // Each audio packet may contain several frames, so we must call the decoder
425 // until we've exhausted the packet. Regardless of the packet size we always
426 // want to hand it to the decoder at least once, otherwise we would end up
427 // skipping end of stream packets since they have a size of zero.
429 // Reset frame to default values.
430 avcodec_get_frame_defaults(av_frame_
);
432 int frame_decoded
= 0;
433 int result
= avcodec_decode_audio4(
434 codec_context_
, av_frame_
, &frame_decoded
, &packet
);
437 DCHECK(!input
->end_of_stream())
438 << "End of stream buffer produced an error! "
439 << "This is quite possibly a bug in the audio decoder not handling "
440 << "end of stream AVPackets correctly.";
443 << "Error decoding an audio frame with timestamp: "
444 << input
->timestamp().InMicroseconds() << " us, duration: "
445 << input
->duration().InMicroseconds() << " us, packet size: "
446 << input
->data_size() << " bytes";
448 // TODO(dalecurtis): We should return a kDecodeError here instead:
449 // http://crbug.com/145276
453 // Update packet size and data pointer in case we need to call the decoder
454 // with the remaining bytes from this packet.
455 packet
.size
-= result
;
456 packet
.data
+= result
;
458 if (output_timestamp_helper_
->base_timestamp() == kNoTimestamp() &&
459 !input
->end_of_stream()) {
460 DCHECK(input
->timestamp() != kNoTimestamp());
461 if (output_frames_to_drop_
> 0) {
462 // Currently Vorbis is the only codec that causes us to drop samples.
463 // If we have to drop samples it always means the timeline starts at 0.
464 DCHECK_EQ(codec_context_
->codec_id
, AV_CODEC_ID_VORBIS
);
465 output_timestamp_helper_
->SetBaseTimestamp(base::TimeDelta());
467 output_timestamp_helper_
->SetBaseTimestamp(input
->timestamp());
471 scoped_refptr
<AudioBuffer
> output
;
472 int decoded_frames
= 0;
473 int original_frames
= 0;
474 int channels
= DetermineChannels(av_frame_
);
476 if (av_frame_
->sample_rate
!= samples_per_second_
||
477 channels
!= channels_
||
478 av_frame_
->format
!= av_sample_format_
) {
479 DLOG(ERROR
) << "Unsupported midstream configuration change!"
480 << " Sample Rate: " << av_frame_
->sample_rate
<< " vs "
481 << samples_per_second_
482 << ", Channels: " << channels
<< " vs "
484 << ", Sample Format: " << av_frame_
->format
<< " vs "
485 << av_sample_format_
;
487 // This is an unrecoverable error, so bail out.
488 QueuedAudioBuffer queue_entry
= { kDecodeError
, NULL
};
489 queued_audio_
.push_back(queue_entry
);
493 // Get the AudioBuffer that the data was decoded into. Adjust the number
494 // of frames, in case fewer than requested were actually decoded.
495 output
= reinterpret_cast<AudioBuffer
*>(
496 av_buffer_get_opaque(av_frame_
->buf
[0]));
497 DCHECK_EQ(channels_
, output
->channel_count());
498 original_frames
= av_frame_
->nb_samples
;
499 int unread_frames
= output
->frame_count() - original_frames
;
500 DCHECK_GE(unread_frames
, 0);
501 if (unread_frames
> 0)
502 output
->TrimEnd(unread_frames
);
504 // If there are frames to drop, get rid of as many as we can.
505 if (output_frames_to_drop_
> 0) {
506 int drop
= std::min(output
->frame_count(), output_frames_to_drop_
);
507 output
->TrimStart(drop
);
508 output_frames_to_drop_
-= drop
;
511 decoded_frames
= output
->frame_count();
514 if (decoded_frames
> 0) {
515 // Set the timestamp/duration once all the extra frames have been
517 output
->set_timestamp(output_timestamp_helper_
->GetTimestamp());
518 output
->set_duration(
519 output_timestamp_helper_
->GetFrameDuration(decoded_frames
));
520 output_timestamp_helper_
->AddFrames(decoded_frames
);
521 } else if (IsEndOfStream(result
, original_frames
, input
) &&
523 DCHECK_EQ(packet
.size
, 0);
524 output
= AudioBuffer::CreateEOSBuffer();
526 // In case all the frames in the buffer were dropped.
531 QueuedAudioBuffer queue_entry
= { kOk
, output
};
532 queued_audio_
.push_back(queue_entry
);
535 // Decoding finished successfully, update statistics.
537 PipelineStatistics statistics
;
538 statistics
.audio_bytes_decoded
= result
;
539 statistics_cb_
.Run(statistics
);
541 } while (packet
.size
> 0);