1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/cdm/ppapi/external_clear_key/ffmpeg_cdm_audio_decoder.h"
9 #include "base/logging.h"
10 #include "media/base/audio_bus.h"
11 #include "media/base/audio_timestamp_helper.h"
12 #include "media/base/buffers.h"
13 #include "media/base/data_buffer.h"
14 #include "media/base/limits.h"
15 #include "media/ffmpeg/ffmpeg_common.h"
17 // Include FFmpeg header files.
19 // Temporarily disable possible loss of data warning.
20 MSVC_PUSH_DISABLE_WARNING(4244);
21 #include <libavcodec/avcodec.h>
27 // Maximum number of channels with defined layout in src/media.
28 static const int kMaxChannels
= 8;
30 static AVCodecID
CdmAudioCodecToCodecID(
31 cdm::AudioDecoderConfig::AudioCodec audio_codec
) {
32 switch (audio_codec
) {
33 case cdm::AudioDecoderConfig::kCodecVorbis
:
34 return AV_CODEC_ID_VORBIS
;
35 case cdm::AudioDecoderConfig::kCodecAac
:
36 return AV_CODEC_ID_AAC
;
37 case cdm::AudioDecoderConfig::kUnknownAudioCodec
:
39 NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec
;
40 return AV_CODEC_ID_NONE
;
44 static void CdmAudioDecoderConfigToAVCodecContext(
45 const cdm::AudioDecoderConfig
& config
,
46 AVCodecContext
* codec_context
) {
47 codec_context
->codec_type
= AVMEDIA_TYPE_AUDIO
;
48 codec_context
->codec_id
= CdmAudioCodecToCodecID(config
.codec
);
50 switch (config
.bits_per_channel
) {
52 codec_context
->sample_fmt
= AV_SAMPLE_FMT_U8
;
55 codec_context
->sample_fmt
= AV_SAMPLE_FMT_S16
;
58 codec_context
->sample_fmt
= AV_SAMPLE_FMT_S32
;
61 DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits "
62 "per channel: " << config
.bits_per_channel
;
63 codec_context
->sample_fmt
= AV_SAMPLE_FMT_NONE
;
66 codec_context
->channels
= config
.channel_count
;
67 codec_context
->sample_rate
= config
.samples_per_second
;
69 if (config
.extra_data
) {
70 codec_context
->extradata_size
= config
.extra_data_size
;
71 codec_context
->extradata
= reinterpret_cast<uint8_t*>(
72 av_malloc(config
.extra_data_size
+ FF_INPUT_BUFFER_PADDING_SIZE
));
73 memcpy(codec_context
->extradata
, config
.extra_data
,
74 config
.extra_data_size
);
75 memset(codec_context
->extradata
+ config
.extra_data_size
, '\0',
76 FF_INPUT_BUFFER_PADDING_SIZE
);
78 codec_context
->extradata
= NULL
;
79 codec_context
->extradata_size
= 0;
83 static cdm::AudioFormat
AVSampleFormatToCdmAudioFormat(
84 AVSampleFormat sample_format
) {
85 switch (sample_format
) {
86 case AV_SAMPLE_FMT_U8
:
87 return cdm::kAudioFormatU8
;
88 case AV_SAMPLE_FMT_S16
:
89 return cdm::kAudioFormatS16
;
90 case AV_SAMPLE_FMT_S32
:
91 return cdm::kAudioFormatS32
;
92 case AV_SAMPLE_FMT_FLT
:
93 return cdm::kAudioFormatF32
;
94 case AV_SAMPLE_FMT_S16P
:
95 return cdm::kAudioFormatPlanarS16
;
96 case AV_SAMPLE_FMT_FLTP
:
97 return cdm::kAudioFormatPlanarF32
;
99 DVLOG(1) << "Unknown AVSampleFormat: " << sample_format
;
101 return cdm::kUnknownAudioFormat
;
104 static void CopySamples(cdm::AudioFormat cdm_format
,
105 int decoded_audio_size
,
106 const AVFrame
& av_frame
,
107 uint8_t* output_buffer
) {
108 switch (cdm_format
) {
109 case cdm::kAudioFormatU8
:
110 case cdm::kAudioFormatS16
:
111 case cdm::kAudioFormatS32
:
112 case cdm::kAudioFormatF32
:
113 memcpy(output_buffer
, av_frame
.data
[0], decoded_audio_size
);
115 case cdm::kAudioFormatPlanarS16
:
116 case cdm::kAudioFormatPlanarF32
: {
117 const int decoded_size_per_channel
=
118 decoded_audio_size
/ av_frame
.channels
;
119 for (int i
= 0; i
< av_frame
.channels
; ++i
) {
120 memcpy(output_buffer
,
121 av_frame
.extended_data
[i
],
122 decoded_size_per_channel
);
123 output_buffer
+= decoded_size_per_channel
;
128 NOTREACHED() << "Unsupported CDM Audio Format!";
129 memset(output_buffer
, 0, decoded_audio_size
);
133 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(ClearKeyCdmHost
* host
)
134 : is_initialized_(false),
136 samples_per_second_(0),
138 av_sample_format_(0),
140 last_input_timestamp_(kNoTimestamp()),
141 output_bytes_to_drop_(0) {
144 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() {
145 ReleaseFFmpegResources();
148 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig
& config
) {
149 DVLOG(1) << "Initialize()";
150 if (!IsValidConfig(config
)) {
151 LOG(ERROR
) << "Initialize(): invalid audio decoder configuration.";
155 if (is_initialized_
) {
156 LOG(ERROR
) << "Initialize(): Already initialized.";
160 // Initialize AVCodecContext structure.
161 codec_context_
.reset(avcodec_alloc_context3(NULL
));
162 CdmAudioDecoderConfigToAVCodecContext(config
, codec_context_
.get());
164 // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
165 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_S16P
)
166 codec_context_
->request_sample_fmt
= AV_SAMPLE_FMT_S16
;
168 AVCodec
* codec
= avcodec_find_decoder(codec_context_
->codec_id
);
169 if (!codec
|| avcodec_open2(codec_context_
.get(), codec
, NULL
) < 0) {
170 DLOG(ERROR
) << "Could not initialize audio decoder: "
171 << codec_context_
->codec_id
;
175 // Ensure avcodec_open2() respected our format request.
176 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_S16P
) {
177 DLOG(ERROR
) << "Unable to configure a supported sample format: "
178 << codec_context_
->sample_fmt
;
183 av_frame_
.reset(av_frame_alloc());
184 samples_per_second_
= config
.samples_per_second
;
185 bytes_per_frame_
= codec_context_
->channels
* config
.bits_per_channel
/ 8;
186 output_timestamp_helper_
.reset(
187 new AudioTimestampHelper(config
.samples_per_second
));
188 is_initialized_
= true;
190 // Store initial values to guard against midstream configuration changes.
191 channels_
= codec_context_
->channels
;
192 av_sample_format_
= codec_context_
->sample_fmt
;
197 void FFmpegCdmAudioDecoder::Deinitialize() {
198 DVLOG(1) << "Deinitialize()";
199 ReleaseFFmpegResources();
200 is_initialized_
= false;
201 ResetTimestampState();
204 void FFmpegCdmAudioDecoder::Reset() {
205 DVLOG(1) << "Reset()";
206 avcodec_flush_buffers(codec_context_
.get());
207 ResetTimestampState();
211 bool FFmpegCdmAudioDecoder::IsValidConfig(
212 const cdm::AudioDecoderConfig
& config
) {
213 return config
.codec
!= cdm::AudioDecoderConfig::kUnknownAudioCodec
&&
214 config
.channel_count
> 0 &&
215 config
.channel_count
<= kMaxChannels
&&
216 config
.bits_per_channel
> 0 &&
217 config
.bits_per_channel
<= limits::kMaxBitsPerSample
&&
218 config
.samples_per_second
> 0 &&
219 config
.samples_per_second
<= limits::kMaxSampleRate
;
222 cdm::Status
FFmpegCdmAudioDecoder::DecodeBuffer(
223 const uint8_t* compressed_buffer
,
224 int32_t compressed_buffer_size
,
225 int64_t input_timestamp
,
226 cdm::AudioFrames
* decoded_frames
) {
227 DVLOG(1) << "DecodeBuffer()";
228 const bool is_end_of_stream
= !compressed_buffer
;
229 base::TimeDelta timestamp
=
230 base::TimeDelta::FromMicroseconds(input_timestamp
);
232 bool is_vorbis
= codec_context_
->codec_id
== AV_CODEC_ID_VORBIS
;
233 if (!is_end_of_stream
) {
234 if (last_input_timestamp_
== kNoTimestamp()) {
235 if (is_vorbis
&& timestamp
< base::TimeDelta()) {
236 // Dropping frames for negative timestamps as outlined in section A.2
237 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
238 int frames_to_drop
= floor(
239 0.5 + -timestamp
.InSecondsF() * samples_per_second_
);
240 output_bytes_to_drop_
= bytes_per_frame_
* frames_to_drop
;
242 last_input_timestamp_
= timestamp
;
244 } else if (timestamp
!= kNoTimestamp()) {
245 if (timestamp
< last_input_timestamp_
) {
246 base::TimeDelta diff
= timestamp
- last_input_timestamp_
;
247 DVLOG(1) << "Input timestamps are not monotonically increasing! "
248 << " ts " << timestamp
.InMicroseconds() << " us"
249 << " diff " << diff
.InMicroseconds() << " us";
250 return cdm::kDecodeError
;
253 last_input_timestamp_
= timestamp
;
258 av_init_packet(&packet
);
259 packet
.data
= const_cast<uint8_t*>(compressed_buffer
);
260 packet
.size
= compressed_buffer_size
;
262 // Tell the CDM what AudioFormat we're using.
263 const cdm::AudioFormat cdm_format
= AVSampleFormatToCdmAudioFormat(
264 static_cast<AVSampleFormat
>(av_sample_format_
));
265 DCHECK_NE(cdm_format
, cdm::kUnknownAudioFormat
);
266 decoded_frames
->SetFormat(cdm_format
);
268 // Each audio packet may contain several frames, so we must call the decoder
269 // until we've exhausted the packet. Regardless of the packet size we always
270 // want to hand it to the decoder at least once, otherwise we would end up
271 // skipping end of stream packets since they have a size of zero.
273 // Reset frame to default values.
274 av_frame_unref(av_frame_
.get());
276 int frame_decoded
= 0;
277 int result
= avcodec_decode_audio4(
278 codec_context_
.get(), av_frame_
.get(), &frame_decoded
, &packet
);
281 DCHECK(!is_end_of_stream
)
282 << "End of stream buffer produced an error! "
283 << "This is quite possibly a bug in the audio decoder not handling "
284 << "end of stream AVPackets correctly.";
287 << "Error decoding an audio frame with timestamp: "
288 << timestamp
.InMicroseconds() << " us, duration: "
289 << timestamp
.InMicroseconds() << " us, packet size: "
290 << compressed_buffer_size
<< " bytes";
292 return cdm::kDecodeError
;
295 // Update packet size and data pointer in case we need to call the decoder
296 // with the remaining bytes from this packet.
297 packet
.size
-= result
;
298 packet
.data
+= result
;
300 if (output_timestamp_helper_
->base_timestamp() == kNoTimestamp() &&
302 DCHECK(timestamp
!= kNoTimestamp());
303 if (output_bytes_to_drop_
> 0) {
304 // Currently Vorbis is the only codec that causes us to drop samples.
305 // If we have to drop samples it always means the timeline starts at 0.
306 DCHECK_EQ(codec_context_
->codec_id
, AV_CODEC_ID_VORBIS
);
307 output_timestamp_helper_
->SetBaseTimestamp(base::TimeDelta());
309 output_timestamp_helper_
->SetBaseTimestamp(timestamp
);
313 int decoded_audio_size
= 0;
315 if (av_frame_
->sample_rate
!= samples_per_second_
||
316 av_frame_
->channels
!= channels_
||
317 av_frame_
->format
!= av_sample_format_
) {
318 DLOG(ERROR
) << "Unsupported midstream configuration change!"
319 << " Sample Rate: " << av_frame_
->sample_rate
<< " vs "
320 << samples_per_second_
321 << ", Channels: " << av_frame_
->channels
<< " vs "
323 << ", Sample Format: " << av_frame_
->format
<< " vs "
324 << av_sample_format_
;
325 return cdm::kDecodeError
;
328 decoded_audio_size
= av_samples_get_buffer_size(
329 NULL
, codec_context_
->channels
, av_frame_
->nb_samples
,
330 codec_context_
->sample_fmt
, 1);
333 if (decoded_audio_size
> 0 && output_bytes_to_drop_
> 0) {
334 DCHECK_EQ(decoded_audio_size
% bytes_per_frame_
, 0)
335 << "Decoder didn't output full frames";
337 int dropped_size
= std::min(decoded_audio_size
, output_bytes_to_drop_
);
338 decoded_audio_size
-= dropped_size
;
339 output_bytes_to_drop_
-= dropped_size
;
342 if (decoded_audio_size
> 0) {
343 DCHECK_EQ(decoded_audio_size
% bytes_per_frame_
, 0)
344 << "Decoder didn't output full frames";
346 base::TimeDelta output_timestamp
=
347 output_timestamp_helper_
->GetTimestamp();
348 output_timestamp_helper_
->AddFrames(decoded_audio_size
/
351 // If we've exhausted the packet in the first decode we can write directly
352 // into the frame buffer instead of a multistep serialization approach.
353 if (serialized_audio_frames_
.empty() && !packet
.size
) {
354 const uint32_t buffer_size
= decoded_audio_size
+ sizeof(int64
) * 2;
355 decoded_frames
->SetFrameBuffer(host_
->Allocate(buffer_size
));
356 if (!decoded_frames
->FrameBuffer()) {
357 LOG(ERROR
) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed.";
358 return cdm::kDecodeError
;
360 decoded_frames
->FrameBuffer()->SetSize(buffer_size
);
361 uint8_t* output_buffer
= decoded_frames
->FrameBuffer()->Data();
363 const int64 timestamp
= output_timestamp
.InMicroseconds();
364 memcpy(output_buffer
, ×tamp
, sizeof(timestamp
));
365 output_buffer
+= sizeof(timestamp
);
367 const int64 output_size
= decoded_audio_size
;
368 memcpy(output_buffer
, &output_size
, sizeof(output_size
));
369 output_buffer
+= sizeof(output_size
);
371 // Copy the samples and return success.
373 cdm_format
, decoded_audio_size
, *av_frame_
, output_buffer
);
374 return cdm::kSuccess
;
377 // There are still more frames to decode, so we need to serialize them in
378 // a secondary buffer since we don't know their sizes ahead of time (which
379 // is required to allocate the FrameBuffer object).
380 SerializeInt64(output_timestamp
.InMicroseconds());
381 SerializeInt64(decoded_audio_size
);
383 const size_t previous_size
= serialized_audio_frames_
.size();
384 serialized_audio_frames_
.resize(previous_size
+ decoded_audio_size
);
385 uint8_t* output_buffer
= &serialized_audio_frames_
[0] + previous_size
;
387 cdm_format
, decoded_audio_size
, *av_frame_
, output_buffer
);
389 } while (packet
.size
> 0);
391 if (!serialized_audio_frames_
.empty()) {
392 decoded_frames
->SetFrameBuffer(
393 host_
->Allocate(serialized_audio_frames_
.size()));
394 if (!decoded_frames
->FrameBuffer()) {
395 LOG(ERROR
) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed.";
396 return cdm::kDecodeError
;
398 memcpy(decoded_frames
->FrameBuffer()->Data(),
399 &serialized_audio_frames_
[0],
400 serialized_audio_frames_
.size());
401 decoded_frames
->FrameBuffer()->SetSize(serialized_audio_frames_
.size());
402 serialized_audio_frames_
.clear();
404 return cdm::kSuccess
;
407 return cdm::kNeedMoreData
;
410 void FFmpegCdmAudioDecoder::ResetTimestampState() {
411 output_timestamp_helper_
->SetBaseTimestamp(kNoTimestamp());
412 last_input_timestamp_
= kNoTimestamp();
413 output_bytes_to_drop_
= 0;
416 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
417 DVLOG(1) << "ReleaseFFmpegResources()";
419 codec_context_
.reset();
423 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value
) {
424 const size_t previous_size
= serialized_audio_frames_
.size();
425 serialized_audio_frames_
.resize(previous_size
+ sizeof(value
));
426 memcpy(&serialized_audio_frames_
[0] + previous_size
, &value
, sizeof(value
));