1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.h"
9 #include "base/logging.h"
10 #include "media/base/audio_bus.h"
11 #include "media/base/audio_timestamp_helper.h"
12 #include "media/base/buffers.h"
13 #include "media/base/data_buffer.h"
14 #include "media/base/limits.h"
15 #include "webkit/media/crypto/ppapi/cdm/content_decryption_module.h"
17 // Include FFmpeg header files.
19 // Temporarily disable possible loss of data warning.
20 MSVC_PUSH_DISABLE_WARNING(4244);
21 #include <libavcodec/avcodec.h>
25 namespace webkit_media
{
27 // Maximum number of channels with defined layout in src/media.
28 static const int kMaxChannels
= 8;
30 static CodecID
CdmAudioCodecToCodecID(
31 cdm::AudioDecoderConfig::AudioCodec audio_codec
) {
32 switch (audio_codec
) {
33 case cdm::AudioDecoderConfig::kCodecVorbis
:
34 return CODEC_ID_VORBIS
;
35 case cdm::AudioDecoderConfig::kCodecAac
:
37 case cdm::AudioDecoderConfig::kUnknownAudioCodec
:
39 NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec
;
44 static void CdmAudioDecoderConfigToAVCodecContext(
45 const cdm::AudioDecoderConfig
& config
,
46 AVCodecContext
* codec_context
) {
47 codec_context
->codec_type
= AVMEDIA_TYPE_AUDIO
;
48 codec_context
->codec_id
= CdmAudioCodecToCodecID(config
.codec
);
50 switch (config
.bits_per_channel
) {
52 codec_context
->sample_fmt
= AV_SAMPLE_FMT_U8
;
55 codec_context
->sample_fmt
= AV_SAMPLE_FMT_S16
;
58 codec_context
->sample_fmt
= AV_SAMPLE_FMT_S32
;
61 DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits "
62 "per channel: " << config
.bits_per_channel
;
63 codec_context
->sample_fmt
= AV_SAMPLE_FMT_NONE
;
66 codec_context
->channels
= config
.channel_count
;
67 codec_context
->sample_rate
= config
.samples_per_second
;
69 if (config
.extra_data
) {
70 codec_context
->extradata_size
= config
.extra_data_size
;
71 codec_context
->extradata
= reinterpret_cast<uint8_t*>(
72 av_malloc(config
.extra_data_size
+ FF_INPUT_BUFFER_PADDING_SIZE
));
73 memcpy(codec_context
->extradata
, config
.extra_data
,
74 config
.extra_data_size
);
75 memset(codec_context
->extradata
+ config
.extra_data_size
, '\0',
76 FF_INPUT_BUFFER_PADDING_SIZE
);
78 codec_context
->extradata
= NULL
;
79 codec_context
->extradata_size
= 0;
83 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(cdm::Host
* host
)
84 : is_initialized_(false),
89 samples_per_second_(0),
93 last_input_timestamp_(media::kNoTimestamp()),
94 output_bytes_to_drop_(0) {
97 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() {
98 ReleaseFFmpegResources();
101 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig
& config
) {
102 DVLOG(1) << "Initialize()";
104 if (!IsValidConfig(config
)) {
105 LOG(ERROR
) << "Initialize(): invalid audio decoder configuration.";
109 if (is_initialized_
) {
110 LOG(ERROR
) << "Initialize(): Already initialized.";
114 // Initialize AVCodecContext structure.
115 codec_context_
= avcodec_alloc_context3(NULL
);
116 CdmAudioDecoderConfigToAVCodecContext(config
, codec_context_
);
118 // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
119 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_S16P
)
120 codec_context_
->request_sample_fmt
= AV_SAMPLE_FMT_S16
;
122 AVCodec
* codec
= avcodec_find_decoder(codec_context_
->codec_id
);
123 if (!codec
|| avcodec_open2(codec_context_
, codec
, NULL
) < 0) {
124 DLOG(ERROR
) << "Could not initialize audio decoder: "
125 << codec_context_
->codec_id
;
129 // Ensure avcodec_open2() respected our format request.
130 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_S16P
) {
131 DLOG(ERROR
) << "Unable to configure a supported sample format: "
132 << codec_context_
->sample_fmt
;
136 // Some codecs will only output float data, so we need to convert to integer
137 // before returning the decoded buffer.
138 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLTP
||
139 codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLT
) {
140 // Preallocate the AudioBus for float conversions. We can treat interleaved
141 // float data as a single planar channel since our output is expected in an
142 // interleaved format anyways.
143 int channels
= codec_context_
->channels
;
144 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLT
)
146 converter_bus_
= media::AudioBus::CreateWrapper(channels
);
150 av_frame_
= avcodec_alloc_frame();
151 bits_per_channel_
= config
.bits_per_channel
;
152 samples_per_second_
= config
.samples_per_second
;
153 bytes_per_frame_
= codec_context_
->channels
* bits_per_channel_
/ 8;
154 output_timestamp_helper_
.reset(new media::AudioTimestampHelper(
155 bytes_per_frame_
, config
.samples_per_second
));
156 serialized_audio_frames_
.reserve(bytes_per_frame_
* samples_per_second_
);
157 is_initialized_
= true;
159 // Store initial values to guard against midstream configuration changes.
160 channels_
= codec_context_
->channels
;
161 av_sample_format_
= codec_context_
->sample_fmt
;
166 void FFmpegCdmAudioDecoder::Deinitialize() {
167 DVLOG(1) << "Deinitialize()";
168 ReleaseFFmpegResources();
169 is_initialized_
= false;
170 ResetTimestampState();
173 void FFmpegCdmAudioDecoder::Reset() {
174 DVLOG(1) << "Reset()";
175 avcodec_flush_buffers(codec_context_
);
176 ResetTimestampState();
180 bool FFmpegCdmAudioDecoder::IsValidConfig(
181 const cdm::AudioDecoderConfig
& config
) {
182 return config
.codec
!= cdm::AudioDecoderConfig::kUnknownAudioCodec
&&
183 config
.channel_count
> 0 &&
184 config
.channel_count
<= kMaxChannels
&&
185 config
.bits_per_channel
> 0 &&
186 config
.bits_per_channel
<= media::limits::kMaxBitsPerSample
&&
187 config
.samples_per_second
> 0 &&
188 config
.samples_per_second
<= media::limits::kMaxSampleRate
;
191 cdm::Status
FFmpegCdmAudioDecoder::DecodeBuffer(
192 const uint8_t* compressed_buffer
,
193 int32_t compressed_buffer_size
,
194 int64_t input_timestamp
,
195 cdm::AudioFrames
* decoded_frames
) {
196 DVLOG(1) << "DecodeBuffer()";
197 const bool is_end_of_stream
= !compressed_buffer
;
198 base::TimeDelta timestamp
=
199 base::TimeDelta::FromMicroseconds(input_timestamp
);
201 bool is_vorbis
= codec_context_
->codec_id
== CODEC_ID_VORBIS
;
202 if (!is_end_of_stream
) {
203 if (last_input_timestamp_
== media::kNoTimestamp()) {
204 if (is_vorbis
&& timestamp
< base::TimeDelta()) {
205 // Dropping frames for negative timestamps as outlined in section A.2
206 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
207 int frames_to_drop
= floor(
208 0.5 + -timestamp
.InSecondsF() * samples_per_second_
);
209 output_bytes_to_drop_
= bytes_per_frame_
* frames_to_drop
;
211 last_input_timestamp_
= timestamp
;
213 } else if (timestamp
!= media::kNoTimestamp()) {
214 if (timestamp
< last_input_timestamp_
) {
215 base::TimeDelta diff
= timestamp
- last_input_timestamp_
;
216 DVLOG(1) << "Input timestamps are not monotonically increasing! "
217 << " ts " << timestamp
.InMicroseconds() << " us"
218 << " diff " << diff
.InMicroseconds() << " us";
219 return cdm::kDecodeError
;
222 last_input_timestamp_
= timestamp
;
227 av_init_packet(&packet
);
228 packet
.data
= const_cast<uint8_t*>(compressed_buffer
);
229 packet
.size
= compressed_buffer_size
;
231 // Each audio packet may contain several frames, so we must call the decoder
232 // until we've exhausted the packet. Regardless of the packet size we always
233 // want to hand it to the decoder at least once, otherwise we would end up
234 // skipping end of stream packets since they have a size of zero.
236 // Reset frame to default values.
237 avcodec_get_frame_defaults(av_frame_
);
239 int frame_decoded
= 0;
240 int result
= avcodec_decode_audio4(
241 codec_context_
, av_frame_
, &frame_decoded
, &packet
);
244 DCHECK(!is_end_of_stream
)
245 << "End of stream buffer produced an error! "
246 << "This is quite possibly a bug in the audio decoder not handling "
247 << "end of stream AVPackets correctly.";
250 << "Error decoding an audio frame with timestamp: "
251 << timestamp
.InMicroseconds() << " us, duration: "
252 << timestamp
.InMicroseconds() << " us, packet size: "
253 << compressed_buffer_size
<< " bytes";
255 return cdm::kDecodeError
;
258 // Update packet size and data pointer in case we need to call the decoder
259 // with the remaining bytes from this packet.
260 packet
.size
-= result
;
261 packet
.data
+= result
;
263 if (output_timestamp_helper_
->base_timestamp() == media::kNoTimestamp() &&
265 DCHECK(timestamp
!= media::kNoTimestamp());
266 if (output_bytes_to_drop_
> 0) {
267 // Currently Vorbis is the only codec that causes us to drop samples.
268 // If we have to drop samples it always means the timeline starts at 0.
269 DCHECK_EQ(codec_context_
->codec_id
, CODEC_ID_VORBIS
);
270 output_timestamp_helper_
->SetBaseTimestamp(base::TimeDelta());
272 output_timestamp_helper_
->SetBaseTimestamp(timestamp
);
276 int decoded_audio_size
= 0;
278 if (av_frame_
->sample_rate
!= samples_per_second_
||
279 av_frame_
->channels
!= channels_
||
280 av_frame_
->format
!= av_sample_format_
) {
281 DLOG(ERROR
) << "Unsupported midstream configuration change!"
282 << " Sample Rate: " << av_frame_
->sample_rate
<< " vs "
283 << samples_per_second_
284 << ", Channels: " << av_frame_
->channels
<< " vs "
286 << ", Sample Format: " << av_frame_
->format
<< " vs "
287 << av_sample_format_
;
288 return cdm::kDecodeError
;
291 decoded_audio_size
= av_samples_get_buffer_size(
292 NULL
, codec_context_
->channels
, av_frame_
->nb_samples
,
293 codec_context_
->sample_fmt
, 1);
294 // If we're decoding into float, adjust audio size.
295 if (converter_bus_
&& bits_per_channel_
/ 8 != sizeof(float)) {
296 DCHECK(codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLT
||
297 codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLTP
);
298 decoded_audio_size
*=
299 static_cast<float>(bits_per_channel_
/ 8) / sizeof(float);
303 int start_sample
= 0;
304 if (decoded_audio_size
> 0 && output_bytes_to_drop_
> 0) {
305 DCHECK_EQ(decoded_audio_size
% bytes_per_frame_
, 0)
306 << "Decoder didn't output full frames";
308 int dropped_size
= std::min(decoded_audio_size
, output_bytes_to_drop_
);
309 start_sample
= dropped_size
/ bytes_per_frame_
;
310 decoded_audio_size
-= dropped_size
;
311 output_bytes_to_drop_
-= dropped_size
;
314 scoped_refptr
<media::DataBuffer
> output
;
315 if (decoded_audio_size
> 0) {
316 DCHECK_EQ(decoded_audio_size
% bytes_per_frame_
, 0)
317 << "Decoder didn't output full frames";
319 // Convert float data using an AudioBus.
320 if (converter_bus_
) {
321 // Setup the AudioBus as a wrapper of the AVFrame data and then use
322 // AudioBus::ToInterleaved() to convert the data as necessary.
323 int skip_frames
= start_sample
;
324 int total_frames
= av_frame_
->nb_samples
;
325 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLT
) {
326 DCHECK_EQ(converter_bus_
->channels(), 1);
327 total_frames
*= codec_context_
->channels
;
328 skip_frames
*= codec_context_
->channels
;
330 converter_bus_
->set_frames(total_frames
);
331 DCHECK_EQ(decoded_audio_size
,
332 (converter_bus_
->frames() - skip_frames
) * bytes_per_frame_
);
334 for (int i
= 0; i
< converter_bus_
->channels(); ++i
) {
335 converter_bus_
->SetChannelData(i
, reinterpret_cast<float*>(
336 av_frame_
->extended_data
[i
]));
339 output
= new media::DataBuffer(decoded_audio_size
);
340 output
->SetDataSize(decoded_audio_size
);
341 converter_bus_
->ToInterleavedPartial(
342 skip_frames
, converter_bus_
->frames() - skip_frames
,
343 bits_per_channel_
/ 8, output
->GetWritableData());
345 output
= media::DataBuffer::CopyFrom(
346 av_frame_
->extended_data
[0] + start_sample
* bytes_per_frame_
,
350 base::TimeDelta output_timestamp
=
351 output_timestamp_helper_
->GetTimestamp();
352 output_timestamp_helper_
->AddBytes(decoded_audio_size
);
354 // Serialize the audio samples into |serialized_audio_frames_|.
355 SerializeInt64(output_timestamp
.InMicroseconds());
356 SerializeInt64(output
->GetDataSize());
357 serialized_audio_frames_
.insert(
358 serialized_audio_frames_
.end(),
360 output
->GetData() + output
->GetDataSize());
362 } while (packet
.size
> 0);
364 if (!serialized_audio_frames_
.empty()) {
365 decoded_frames
->SetFrameBuffer(
366 host_
->Allocate(serialized_audio_frames_
.size()));
367 if (!decoded_frames
->FrameBuffer()) {
368 LOG(ERROR
) << "DecodeBuffer() cdm::Host::Allocate failed.";
369 return cdm::kDecodeError
;
371 memcpy(decoded_frames
->FrameBuffer()->Data(),
372 &serialized_audio_frames_
[0],
373 serialized_audio_frames_
.size());
374 decoded_frames
->FrameBuffer()->SetSize(serialized_audio_frames_
.size());
375 serialized_audio_frames_
.clear();
377 return cdm::kSuccess
;
380 return cdm::kNeedMoreData
;
383 void FFmpegCdmAudioDecoder::ResetTimestampState() {
384 output_timestamp_helper_
->SetBaseTimestamp(media::kNoTimestamp());
385 last_input_timestamp_
= media::kNoTimestamp();
386 output_bytes_to_drop_
= 0;
389 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
390 DVLOG(1) << "ReleaseFFmpegResources()";
392 if (codec_context_
) {
393 av_free(codec_context_
->extradata
);
394 avcodec_close(codec_context_
);
395 av_free(codec_context_
);
396 codec_context_
= NULL
;
404 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value
) {
405 int previous_size
= serialized_audio_frames_
.size();
406 serialized_audio_frames_
.resize(previous_size
+ sizeof(value
));
407 memcpy(&serialized_audio_frames_
[0] + previous_size
, &value
, sizeof(value
));
410 } // namespace webkit_media