1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/filters/ffmpeg_audio_decoder.h"
8 #include "base/callback_helpers.h"
9 #include "base/location.h"
10 #include "base/single_thread_task_runner.h"
11 #include "media/base/audio_buffer.h"
12 #include "media/base/audio_bus.h"
13 #include "media/base/audio_decoder_config.h"
14 #include "media/base/audio_timestamp_helper.h"
15 #include "media/base/bind_to_current_loop.h"
16 #include "media/base/decoder_buffer.h"
17 #include "media/base/demuxer.h"
18 #include "media/base/limits.h"
19 #include "media/base/pipeline.h"
20 #include "media/base/sample_format.h"
21 #include "media/ffmpeg/ffmpeg_common.h"
22 #include "media/filters/ffmpeg_glue.h"
26 // Helper structure for managing multiple decoded audio frames per packet.
27 struct QueuedAudioBuffer
{
28 AudioDecoder::Status status
;
29 scoped_refptr
<AudioBuffer
> buffer
;
32 // Returns true if the decode result was end of stream.
33 static inline bool IsEndOfStream(int result
,
35 const scoped_refptr
<DecoderBuffer
>& input
) {
36 // Three conditions to meet to declare end of stream for this decoder:
37 // 1. FFmpeg didn't read anything.
38 // 2. FFmpeg didn't output anything.
39 // 3. An end of stream buffer is received.
40 return result
== 0 && decoded_size
== 0 && input
->end_of_stream();
43 // Return the number of channels from the data in |frame|.
44 static inline int DetermineChannels(AVFrame
* frame
) {
45 #if defined(CHROMIUM_NO_AVFRAME_CHANNELS)
46 // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field.
47 return av_get_channel_layout_nb_channels(frame
->channel_layout
);
49 return frame
->channels
;
53 // Called by FFmpeg's allocation routine to allocate a buffer. Uses
54 // AVCodecContext.opaque to get the object reference in order to call
55 // GetAudioBuffer() to do the actual allocation.
56 static int GetAudioBufferImpl(struct AVCodecContext
* s
,
59 DCHECK(s
->codec
->capabilities
& CODEC_CAP_DR1
);
60 DCHECK_EQ(s
->codec_type
, AVMEDIA_TYPE_AUDIO
);
61 FFmpegAudioDecoder
* decoder
= static_cast<FFmpegAudioDecoder
*>(s
->opaque
);
62 return decoder
->GetAudioBuffer(s
, frame
, flags
);
65 // Called by FFmpeg's allocation routine to free a buffer. |opaque| is the
66 // AudioBuffer allocated, so unref it.
67 static void ReleaseAudioBufferImpl(void* opaque
, uint8
* data
) {
68 scoped_refptr
<AudioBuffer
> buffer
;
69 buffer
.swap(reinterpret_cast<AudioBuffer
**>(&opaque
));
72 FFmpegAudioDecoder::FFmpegAudioDecoder(
73 const scoped_refptr
<base::SingleThreadTaskRunner
>& task_runner
)
74 : task_runner_(task_runner
),
76 demuxer_stream_(NULL
),
77 bytes_per_channel_(0),
78 channel_layout_(CHANNEL_LAYOUT_NONE
),
80 samples_per_second_(0),
82 last_input_timestamp_(kNoTimestamp()),
83 output_frames_to_drop_(0) {
86 void FFmpegAudioDecoder::Initialize(
87 DemuxerStream
* stream
,
88 const PipelineStatusCB
& status_cb
,
89 const StatisticsCB
& statistics_cb
) {
90 DCHECK(task_runner_
->BelongsToCurrentThread());
91 PipelineStatusCB initialize_cb
= BindToCurrentLoop(status_cb
);
93 FFmpegGlue::InitializeFFmpeg();
95 if (demuxer_stream_
) {
96 // TODO(scherkus): initialization currently happens more than once in
97 // PipelineIntegrationTest.BasicPlayback.
98 LOG(ERROR
) << "Initialize has already been called.";
102 weak_this_
= weak_factory_
.GetWeakPtr();
103 demuxer_stream_
= stream
;
105 if (!ConfigureDecoder()) {
106 status_cb
.Run(DECODER_ERROR_NOT_SUPPORTED
);
110 statistics_cb_
= statistics_cb
;
111 initialize_cb
.Run(PIPELINE_OK
);
114 void FFmpegAudioDecoder::Read(const ReadCB
& read_cb
) {
115 DCHECK(task_runner_
->BelongsToCurrentThread());
116 DCHECK(!read_cb
.is_null());
117 CHECK(read_cb_
.is_null()) << "Overlapping decodes are not supported.";
118 DCHECK(reset_cb_
.is_null());
119 DCHECK(stop_cb_
.is_null());
121 read_cb_
= BindToCurrentLoop(read_cb
);
123 // If we don't have any queued audio from the last packet we decoded, ask for
124 // more data from the demuxer to satisfy this read.
125 if (queued_audio_
.empty()) {
126 ReadFromDemuxerStream();
130 base::ResetAndReturn(&read_cb_
).Run(
131 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
132 queued_audio_
.pop_front();
135 int FFmpegAudioDecoder::bits_per_channel() {
136 DCHECK(task_runner_
->BelongsToCurrentThread());
137 return bytes_per_channel_
* 8;
140 ChannelLayout
FFmpegAudioDecoder::channel_layout() {
141 DCHECK(task_runner_
->BelongsToCurrentThread());
142 return channel_layout_
;
145 int FFmpegAudioDecoder::samples_per_second() {
146 DCHECK(task_runner_
->BelongsToCurrentThread());
147 return samples_per_second_
;
150 void FFmpegAudioDecoder::Reset(const base::Closure
& closure
) {
151 DCHECK(task_runner_
->BelongsToCurrentThread());
152 reset_cb_
= BindToCurrentLoop(closure
);
154 // A demuxer read is pending, we'll wait until it finishes.
155 if (!read_cb_
.is_null())
161 void FFmpegAudioDecoder::Stop(const base::Closure
& closure
) {
162 DCHECK(task_runner_
->BelongsToCurrentThread());
163 stop_cb_
= BindToCurrentLoop(closure
);
165 // A demuxer read is pending, we'll wait until it finishes.
166 if (!read_cb_
.is_null())
169 if (!reset_cb_
.is_null()) {
177 FFmpegAudioDecoder::~FFmpegAudioDecoder() {}
179 int FFmpegAudioDecoder::GetAudioBuffer(AVCodecContext
* codec
,
182 // Since this routine is called by FFmpeg when a buffer is required for audio
183 // data, use the values supplied by FFmpeg (ignoring the current settings).
184 // RunDecodeLoop() gets to determine if the buffer is useable or not.
185 AVSampleFormat format
= static_cast<AVSampleFormat
>(frame
->format
);
186 SampleFormat sample_format
= AVSampleFormatToSampleFormat(format
);
187 int channels
= DetermineChannels(frame
);
188 if ((channels
<= 0) || (channels
>= limits::kMaxChannels
)) {
189 DLOG(ERROR
) << "Requested number of channels (" << channels
190 << ") exceeds limit.";
191 return AVERROR(EINVAL
);
194 int bytes_per_channel
= SampleFormatToBytesPerChannel(sample_format
);
195 if (frame
->nb_samples
<= 0)
196 return AVERROR(EINVAL
);
198 // Determine how big the buffer should be and allocate it. FFmpeg may adjust
199 // how big each channel data is in order to meet the alignment policy, so
200 // we need to take this into consideration.
201 int buffer_size_in_bytes
=
202 av_samples_get_buffer_size(&frame
->linesize
[0],
206 AudioBuffer::kChannelAlignment
);
207 // Check for errors from av_samples_get_buffer_size().
208 if (buffer_size_in_bytes
< 0)
209 return buffer_size_in_bytes
;
210 int frames_required
= buffer_size_in_bytes
/ bytes_per_channel
/ channels
;
211 DCHECK_GE(frames_required
, frame
->nb_samples
);
212 scoped_refptr
<AudioBuffer
> buffer
=
213 AudioBuffer::CreateBuffer(sample_format
, channels
, frames_required
);
215 // Initialize the data[] and extended_data[] fields to point into the memory
216 // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved
217 // audio and equal to |channels| for planar audio.
218 int number_of_planes
= buffer
->channel_data().size();
219 if (number_of_planes
<= AV_NUM_DATA_POINTERS
) {
220 DCHECK_EQ(frame
->extended_data
, frame
->data
);
221 for (int i
= 0; i
< number_of_planes
; ++i
)
222 frame
->data
[i
] = buffer
->channel_data()[i
];
224 // There are more channels than can fit into data[], so allocate
225 // extended_data[] and fill appropriately.
226 frame
->extended_data
= static_cast<uint8
**>(
227 av_malloc(number_of_planes
* sizeof(*frame
->extended_data
)));
229 for (; i
< AV_NUM_DATA_POINTERS
; ++i
)
230 frame
->extended_data
[i
] = frame
->data
[i
] = buffer
->channel_data()[i
];
231 for (; i
< number_of_planes
; ++i
)
232 frame
->extended_data
[i
] = buffer
->channel_data()[i
];
235 // Now create an AVBufferRef for the data just allocated. It will own the
236 // reference to the AudioBuffer object.
238 buffer
.swap(reinterpret_cast<AudioBuffer
**>(&opaque
));
239 frame
->buf
[0] = av_buffer_create(
240 frame
->data
[0], buffer_size_in_bytes
, ReleaseAudioBufferImpl
, opaque
, 0);
244 void FFmpegAudioDecoder::DoStop() {
245 DCHECK(task_runner_
->BelongsToCurrentThread());
246 DCHECK(!stop_cb_
.is_null());
247 DCHECK(read_cb_
.is_null());
248 DCHECK(reset_cb_
.is_null());
250 ResetTimestampState();
251 queued_audio_
.clear();
252 ReleaseFFmpegResources();
253 base::ResetAndReturn(&stop_cb_
).Run();
256 void FFmpegAudioDecoder::DoReset() {
257 DCHECK(task_runner_
->BelongsToCurrentThread());
258 DCHECK(!reset_cb_
.is_null());
259 DCHECK(read_cb_
.is_null());
261 avcodec_flush_buffers(codec_context_
.get());
262 ResetTimestampState();
263 queued_audio_
.clear();
264 base::ResetAndReturn(&reset_cb_
).Run();
266 if (!stop_cb_
.is_null())
270 void FFmpegAudioDecoder::ReadFromDemuxerStream() {
271 DCHECK(!read_cb_
.is_null());
272 demuxer_stream_
->Read(base::Bind(
273 &FFmpegAudioDecoder::BufferReady
, weak_this_
));
276 void FFmpegAudioDecoder::BufferReady(
277 DemuxerStream::Status status
,
278 const scoped_refptr
<DecoderBuffer
>& input
) {
279 DCHECK(task_runner_
->BelongsToCurrentThread());
280 DCHECK(!read_cb_
.is_null());
281 DCHECK(queued_audio_
.empty());
282 DCHECK_EQ(status
!= DemuxerStream::kOk
, !input
.get()) << status
;
284 // Pending Reset: ignore the buffer we just got, send kAborted to |read_cb_|
285 // and carry out the Reset().
286 // If there happens to also be a pending Stop(), that will be handled at
287 // the end of DoReset().
288 if (!reset_cb_
.is_null()) {
289 base::ResetAndReturn(&read_cb_
).Run(kAborted
, NULL
);
294 // Pending Stop: ignore the buffer we just got, send kAborted to |read_cb_|
295 // and carry out the Stop().
296 if (!stop_cb_
.is_null()) {
297 base::ResetAndReturn(&read_cb_
).Run(kAborted
, NULL
);
302 if (status
== DemuxerStream::kAborted
) {
303 DCHECK(!input
.get());
304 base::ResetAndReturn(&read_cb_
).Run(kAborted
, NULL
);
308 if (status
== DemuxerStream::kConfigChanged
) {
309 DCHECK(!input
.get());
311 // Send a "end of stream" buffer to the decode loop
312 // to output any remaining data still in the decoder.
313 RunDecodeLoop(DecoderBuffer::CreateEOSBuffer(), true);
315 DVLOG(1) << "Config changed.";
317 if (!ConfigureDecoder()) {
318 base::ResetAndReturn(&read_cb_
).Run(kDecodeError
, NULL
);
322 ResetTimestampState();
324 if (queued_audio_
.empty()) {
325 ReadFromDemuxerStream();
329 base::ResetAndReturn(&read_cb_
).Run(
330 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
331 queued_audio_
.pop_front();
335 DCHECK_EQ(status
, DemuxerStream::kOk
);
338 // Make sure we are notified if http://crbug.com/49709 returns. Issue also
339 // occurs with some damaged files.
340 if (!input
->end_of_stream() && input
->timestamp() == kNoTimestamp() &&
341 output_timestamp_helper_
->base_timestamp() == kNoTimestamp()) {
342 DVLOG(1) << "Received a buffer without timestamps!";
343 base::ResetAndReturn(&read_cb_
).Run(kDecodeError
, NULL
);
347 if (!input
->end_of_stream()) {
348 if (last_input_timestamp_
== kNoTimestamp() &&
349 codec_context_
->codec_id
== AV_CODEC_ID_VORBIS
&&
350 input
->timestamp() < base::TimeDelta()) {
351 // Dropping frames for negative timestamps as outlined in section A.2
352 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
353 output_frames_to_drop_
= floor(
354 0.5 + -input
->timestamp().InSecondsF() * samples_per_second_
);
356 if (last_input_timestamp_
!= kNoTimestamp() &&
357 input
->timestamp() < last_input_timestamp_
) {
358 const base::TimeDelta diff
= input
->timestamp() - last_input_timestamp_
;
360 << "Input timestamps are not monotonically increasing! "
361 << " ts " << input
->timestamp().InMicroseconds() << " us"
362 << " diff " << diff
.InMicroseconds() << " us";
365 last_input_timestamp_
= input
->timestamp();
369 RunDecodeLoop(input
, false);
371 // We exhausted the provided packet, but it wasn't enough for a frame. Ask
372 // for more data in order to fulfill this read.
373 if (queued_audio_
.empty()) {
374 ReadFromDemuxerStream();
378 // Execute callback to return the first frame we decoded.
379 base::ResetAndReturn(&read_cb_
).Run(
380 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
381 queued_audio_
.pop_front();
384 bool FFmpegAudioDecoder::ConfigureDecoder() {
385 const AudioDecoderConfig
& config
= demuxer_stream_
->audio_decoder_config();
387 if (!config
.IsValidConfig()) {
388 DLOG(ERROR
) << "Invalid audio stream -"
389 << " codec: " << config
.codec()
390 << " channel layout: " << config
.channel_layout()
391 << " bits per channel: " << config
.bits_per_channel()
392 << " samples per second: " << config
.samples_per_second();
396 if (config
.is_encrypted()) {
397 DLOG(ERROR
) << "Encrypted audio stream not supported";
401 if (codec_context_
.get() &&
402 (bytes_per_channel_
!= config
.bytes_per_channel() ||
403 channel_layout_
!= config
.channel_layout() ||
404 samples_per_second_
!= config
.samples_per_second())) {
405 DVLOG(1) << "Unsupported config change :";
406 DVLOG(1) << "\tbytes_per_channel : " << bytes_per_channel_
407 << " -> " << config
.bytes_per_channel();
408 DVLOG(1) << "\tchannel_layout : " << channel_layout_
409 << " -> " << config
.channel_layout();
410 DVLOG(1) << "\tsample_rate : " << samples_per_second_
411 << " -> " << config
.samples_per_second();
415 // Release existing decoder resources if necessary.
416 ReleaseFFmpegResources();
418 // Initialize AVCodecContext structure.
419 codec_context_
.reset(avcodec_alloc_context3(NULL
));
420 AudioDecoderConfigToAVCodecContext(config
, codec_context_
.get());
422 codec_context_
->opaque
= this;
423 codec_context_
->get_buffer2
= GetAudioBufferImpl
;
424 codec_context_
->refcounted_frames
= 1;
426 AVCodec
* codec
= avcodec_find_decoder(codec_context_
->codec_id
);
427 if (!codec
|| avcodec_open2(codec_context_
.get(), codec
, NULL
) < 0) {
428 DLOG(ERROR
) << "Could not initialize audio decoder: "
429 << codec_context_
->codec_id
;
434 av_frame_
.reset(av_frame_alloc());
435 channel_layout_
= config
.channel_layout();
436 samples_per_second_
= config
.samples_per_second();
437 output_timestamp_helper_
.reset(
438 new AudioTimestampHelper(config
.samples_per_second()));
440 // Store initial values to guard against midstream configuration changes.
441 channels_
= codec_context_
->channels
;
442 if (channels_
!= ChannelLayoutToChannelCount(channel_layout_
)) {
443 DLOG(ERROR
) << "Audio configuration specified "
444 << ChannelLayoutToChannelCount(channel_layout_
)
445 << " channels, but FFmpeg thinks the file contains "
446 << channels_
<< " channels";
449 av_sample_format_
= codec_context_
->sample_fmt
;
450 sample_format_
= AVSampleFormatToSampleFormat(
451 static_cast<AVSampleFormat
>(av_sample_format_
));
452 bytes_per_channel_
= SampleFormatToBytesPerChannel(sample_format_
);
457 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
458 codec_context_
.reset();
462 void FFmpegAudioDecoder::ResetTimestampState() {
463 output_timestamp_helper_
->SetBaseTimestamp(kNoTimestamp());
464 last_input_timestamp_
= kNoTimestamp();
465 output_frames_to_drop_
= 0;
468 void FFmpegAudioDecoder::RunDecodeLoop(
469 const scoped_refptr
<DecoderBuffer
>& input
,
470 bool skip_eos_append
) {
472 av_init_packet(&packet
);
473 if (input
->end_of_stream()) {
477 packet
.data
= const_cast<uint8
*>(input
->data());
478 packet
.size
= input
->data_size();
481 // Each audio packet may contain several frames, so we must call the decoder
482 // until we've exhausted the packet. Regardless of the packet size we always
483 // want to hand it to the decoder at least once, otherwise we would end up
484 // skipping end of stream packets since they have a size of zero.
486 int frame_decoded
= 0;
487 int result
= avcodec_decode_audio4(
488 codec_context_
.get(), av_frame_
.get(), &frame_decoded
, &packet
);
491 DCHECK(!input
->end_of_stream())
492 << "End of stream buffer produced an error! "
493 << "This is quite possibly a bug in the audio decoder not handling "
494 << "end of stream AVPackets correctly.";
497 << "Failed to decode an audio frame with timestamp: "
498 << input
->timestamp().InMicroseconds() << " us, duration: "
499 << input
->duration().InMicroseconds() << " us, packet size: "
500 << input
->data_size() << " bytes";
505 // Update packet size and data pointer in case we need to call the decoder
506 // with the remaining bytes from this packet.
507 packet
.size
-= result
;
508 packet
.data
+= result
;
510 if (output_timestamp_helper_
->base_timestamp() == kNoTimestamp() &&
511 !input
->end_of_stream()) {
512 DCHECK(input
->timestamp() != kNoTimestamp());
513 if (output_frames_to_drop_
> 0) {
514 // Currently Vorbis is the only codec that causes us to drop samples.
515 // If we have to drop samples it always means the timeline starts at 0.
516 DCHECK_EQ(codec_context_
->codec_id
, AV_CODEC_ID_VORBIS
);
517 output_timestamp_helper_
->SetBaseTimestamp(base::TimeDelta());
519 output_timestamp_helper_
->SetBaseTimestamp(input
->timestamp());
523 scoped_refptr
<AudioBuffer
> output
;
524 int decoded_frames
= 0;
525 int original_frames
= 0;
526 int channels
= DetermineChannels(av_frame_
.get());
528 if (av_frame_
->sample_rate
!= samples_per_second_
||
529 channels
!= channels_
||
530 av_frame_
->format
!= av_sample_format_
) {
531 DLOG(ERROR
) << "Unsupported midstream configuration change!"
532 << " Sample Rate: " << av_frame_
->sample_rate
<< " vs "
533 << samples_per_second_
534 << ", Channels: " << channels
<< " vs "
536 << ", Sample Format: " << av_frame_
->format
<< " vs "
537 << av_sample_format_
;
539 // This is an unrecoverable error, so bail out.
540 QueuedAudioBuffer queue_entry
= { kDecodeError
, NULL
};
541 queued_audio_
.push_back(queue_entry
);
542 av_frame_unref(av_frame_
.get());
546 // Get the AudioBuffer that the data was decoded into. Adjust the number
547 // of frames, in case fewer than requested were actually decoded.
548 output
= reinterpret_cast<AudioBuffer
*>(
549 av_buffer_get_opaque(av_frame_
->buf
[0]));
550 DCHECK_EQ(channels_
, output
->channel_count());
551 original_frames
= av_frame_
->nb_samples
;
552 int unread_frames
= output
->frame_count() - original_frames
;
553 DCHECK_GE(unread_frames
, 0);
554 if (unread_frames
> 0)
555 output
->TrimEnd(unread_frames
);
557 // If there are frames to drop, get rid of as many as we can.
558 if (output_frames_to_drop_
> 0) {
559 int drop
= std::min(output
->frame_count(), output_frames_to_drop_
);
560 output
->TrimStart(drop
);
561 output_frames_to_drop_
-= drop
;
564 decoded_frames
= output
->frame_count();
565 av_frame_unref(av_frame_
.get());
568 // WARNING: |av_frame_| no longer has valid data at this point.
570 if (decoded_frames
> 0) {
571 // Set the timestamp/duration once all the extra frames have been
573 output
->set_timestamp(output_timestamp_helper_
->GetTimestamp());
574 output
->set_duration(
575 output_timestamp_helper_
->GetFrameDuration(decoded_frames
));
576 output_timestamp_helper_
->AddFrames(decoded_frames
);
577 } else if (IsEndOfStream(result
, original_frames
, input
) &&
579 DCHECK_EQ(packet
.size
, 0);
580 output
= AudioBuffer::CreateEOSBuffer();
582 // In case all the frames in the buffer were dropped.
587 QueuedAudioBuffer queue_entry
= { kOk
, output
};
588 queued_audio_
.push_back(queue_entry
);
591 // Decoding finished successfully, update statistics.
593 PipelineStatistics statistics
;
594 statistics
.audio_bytes_decoded
= result
;
595 statistics_cb_
.Run(statistics
);
597 } while (packet
.size
> 0);