Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / media / cdm / ppapi / external_clear_key / ffmpeg_cdm_audio_decoder.cc
blob29b29a53d6341a8e6491e1ac7c0be015b5d2dc9b
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/cdm/ppapi/external_clear_key/ffmpeg_cdm_audio_decoder.h"
7 #include <algorithm>
9 #include "base/logging.h"
10 #include "media/base/audio_bus.h"
11 #include "media/base/audio_timestamp_helper.h"
12 #include "media/base/data_buffer.h"
13 #include "media/base/limits.h"
14 #include "media/base/timestamp_constants.h"
15 #include "media/ffmpeg/ffmpeg_common.h"
17 // Include FFmpeg header files.
18 extern "C" {
19 // Temporarily disable possible loss of data warning.
20 MSVC_PUSH_DISABLE_WARNING(4244);
21 #include <libavcodec/avcodec.h>
22 MSVC_POP_WARNING();
23 } // extern "C"
25 namespace media {
27 // Maximum number of channels with defined layout in src/media.
28 static const int kMaxChannels = 8;
30 static AVCodecID CdmAudioCodecToCodecID(
31 cdm::AudioDecoderConfig::AudioCodec audio_codec) {
32 switch (audio_codec) {
33 case cdm::AudioDecoderConfig::kCodecVorbis:
34 return AV_CODEC_ID_VORBIS;
35 case cdm::AudioDecoderConfig::kCodecAac:
36 return AV_CODEC_ID_AAC;
37 case cdm::AudioDecoderConfig::kUnknownAudioCodec:
38 default:
39 NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec;
40 return AV_CODEC_ID_NONE;
44 static void CdmAudioDecoderConfigToAVCodecContext(
45 const cdm::AudioDecoderConfig& config,
46 AVCodecContext* codec_context) {
47 codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
48 codec_context->codec_id = CdmAudioCodecToCodecID(config.codec);
50 switch (config.bits_per_channel) {
51 case 8:
52 codec_context->sample_fmt = AV_SAMPLE_FMT_U8;
53 break;
54 case 16:
55 codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
56 break;
57 case 32:
58 codec_context->sample_fmt = AV_SAMPLE_FMT_S32;
59 break;
60 default:
61 DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits "
62 "per channel: " << config.bits_per_channel;
63 codec_context->sample_fmt = AV_SAMPLE_FMT_NONE;
66 codec_context->channels = config.channel_count;
67 codec_context->sample_rate = config.samples_per_second;
69 if (config.extra_data) {
70 codec_context->extradata_size = config.extra_data_size;
71 codec_context->extradata = reinterpret_cast<uint8_t*>(
72 av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE));
73 memcpy(codec_context->extradata, config.extra_data,
74 config.extra_data_size);
75 memset(codec_context->extradata + config.extra_data_size, '\0',
76 FF_INPUT_BUFFER_PADDING_SIZE);
77 } else {
78 codec_context->extradata = NULL;
79 codec_context->extradata_size = 0;
83 static cdm::AudioFormat AVSampleFormatToCdmAudioFormat(
84 AVSampleFormat sample_format) {
85 switch (sample_format) {
86 case AV_SAMPLE_FMT_U8:
87 return cdm::kAudioFormatU8;
88 case AV_SAMPLE_FMT_S16:
89 return cdm::kAudioFormatS16;
90 case AV_SAMPLE_FMT_S32:
91 return cdm::kAudioFormatS32;
92 case AV_SAMPLE_FMT_FLT:
93 return cdm::kAudioFormatF32;
94 case AV_SAMPLE_FMT_S16P:
95 return cdm::kAudioFormatPlanarS16;
96 case AV_SAMPLE_FMT_FLTP:
97 return cdm::kAudioFormatPlanarF32;
98 default:
99 DVLOG(1) << "Unknown AVSampleFormat: " << sample_format;
101 return cdm::kUnknownAudioFormat;
104 static void CopySamples(cdm::AudioFormat cdm_format,
105 int decoded_audio_size,
106 const AVFrame& av_frame,
107 uint8_t* output_buffer) {
108 switch (cdm_format) {
109 case cdm::kAudioFormatU8:
110 case cdm::kAudioFormatS16:
111 case cdm::kAudioFormatS32:
112 case cdm::kAudioFormatF32:
113 memcpy(output_buffer, av_frame.data[0], decoded_audio_size);
114 break;
115 case cdm::kAudioFormatPlanarS16:
116 case cdm::kAudioFormatPlanarF32: {
117 const int decoded_size_per_channel =
118 decoded_audio_size / av_frame.channels;
119 for (int i = 0; i < av_frame.channels; ++i) {
120 memcpy(output_buffer,
121 av_frame.extended_data[i],
122 decoded_size_per_channel);
123 output_buffer += decoded_size_per_channel;
125 break;
127 default:
128 NOTREACHED() << "Unsupported CDM Audio Format!";
129 memset(output_buffer, 0, decoded_audio_size);
133 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(ClearKeyCdmHost* host)
134 : is_initialized_(false),
135 host_(host),
136 samples_per_second_(0),
137 channels_(0),
138 av_sample_format_(0),
139 bytes_per_frame_(0),
140 last_input_timestamp_(kNoTimestamp()),
141 output_bytes_to_drop_(0) {
144 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() {
145 ReleaseFFmpegResources();
148 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) {
149 DVLOG(1) << "Initialize()";
150 if (!IsValidConfig(config)) {
151 LOG(ERROR) << "Initialize(): invalid audio decoder configuration.";
152 return false;
155 if (is_initialized_) {
156 LOG(ERROR) << "Initialize(): Already initialized.";
157 return false;
160 // Initialize AVCodecContext structure.
161 codec_context_.reset(avcodec_alloc_context3(NULL));
162 CdmAudioDecoderConfigToAVCodecContext(config, codec_context_.get());
164 // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
165 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
166 codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
168 AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
169 if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
170 DLOG(ERROR) << "Could not initialize audio decoder: "
171 << codec_context_->codec_id;
172 return false;
175 // Ensure avcodec_open2() respected our format request.
176 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
177 DLOG(ERROR) << "Unable to configure a supported sample format: "
178 << codec_context_->sample_fmt;
179 return false;
182 // Success!
183 av_frame_.reset(av_frame_alloc());
184 samples_per_second_ = config.samples_per_second;
185 bytes_per_frame_ = codec_context_->channels * config.bits_per_channel / 8;
186 output_timestamp_helper_.reset(
187 new AudioTimestampHelper(config.samples_per_second));
188 is_initialized_ = true;
190 // Store initial values to guard against midstream configuration changes.
191 channels_ = codec_context_->channels;
192 av_sample_format_ = codec_context_->sample_fmt;
194 return true;
197 void FFmpegCdmAudioDecoder::Deinitialize() {
198 DVLOG(1) << "Deinitialize()";
199 ReleaseFFmpegResources();
200 is_initialized_ = false;
201 ResetTimestampState();
204 void FFmpegCdmAudioDecoder::Reset() {
205 DVLOG(1) << "Reset()";
206 avcodec_flush_buffers(codec_context_.get());
207 ResetTimestampState();
210 // static
211 bool FFmpegCdmAudioDecoder::IsValidConfig(
212 const cdm::AudioDecoderConfig& config) {
213 return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec &&
214 config.channel_count > 0 &&
215 config.channel_count <= kMaxChannels &&
216 config.bits_per_channel > 0 &&
217 config.bits_per_channel <= limits::kMaxBitsPerSample &&
218 config.samples_per_second > 0 &&
219 config.samples_per_second <= limits::kMaxSampleRate;
222 cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
223 const uint8_t* compressed_buffer,
224 int32_t compressed_buffer_size,
225 int64_t input_timestamp,
226 cdm::AudioFrames* decoded_frames) {
227 DVLOG(1) << "DecodeBuffer()";
228 const bool is_end_of_stream = !compressed_buffer;
229 base::TimeDelta timestamp =
230 base::TimeDelta::FromMicroseconds(input_timestamp);
232 bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS;
233 if (!is_end_of_stream) {
234 if (last_input_timestamp_ == kNoTimestamp()) {
235 if (is_vorbis && timestamp < base::TimeDelta()) {
236 // Dropping frames for negative timestamps as outlined in section A.2
237 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
238 int frames_to_drop = floor(
239 0.5 + -timestamp.InSecondsF() * samples_per_second_);
240 output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop;
241 } else {
242 last_input_timestamp_ = timestamp;
244 } else if (timestamp != kNoTimestamp()) {
245 if (timestamp < last_input_timestamp_) {
246 base::TimeDelta diff = timestamp - last_input_timestamp_;
247 DVLOG(1) << "Input timestamps are not monotonically increasing! "
248 << " ts " << timestamp.InMicroseconds() << " us"
249 << " diff " << diff.InMicroseconds() << " us";
250 return cdm::kDecodeError;
253 last_input_timestamp_ = timestamp;
257 AVPacket packet;
258 av_init_packet(&packet);
259 packet.data = const_cast<uint8_t*>(compressed_buffer);
260 packet.size = compressed_buffer_size;
262 // Tell the CDM what AudioFormat we're using.
263 const cdm::AudioFormat cdm_format = AVSampleFormatToCdmAudioFormat(
264 static_cast<AVSampleFormat>(av_sample_format_));
265 DCHECK_NE(cdm_format, cdm::kUnknownAudioFormat);
266 decoded_frames->SetFormat(cdm_format);
268 // Each audio packet may contain several frames, so we must call the decoder
269 // until we've exhausted the packet. Regardless of the packet size we always
270 // want to hand it to the decoder at least once, otherwise we would end up
271 // skipping end of stream packets since they have a size of zero.
272 do {
273 // Reset frame to default values.
274 av_frame_unref(av_frame_.get());
276 int frame_decoded = 0;
277 int result = avcodec_decode_audio4(
278 codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
280 if (result < 0) {
281 DCHECK(!is_end_of_stream)
282 << "End of stream buffer produced an error! "
283 << "This is quite possibly a bug in the audio decoder not handling "
284 << "end of stream AVPackets correctly.";
286 DLOG(ERROR)
287 << "Error decoding an audio frame with timestamp: "
288 << timestamp.InMicroseconds() << " us, duration: "
289 << timestamp.InMicroseconds() << " us, packet size: "
290 << compressed_buffer_size << " bytes";
292 return cdm::kDecodeError;
295 // Update packet size and data pointer in case we need to call the decoder
296 // with the remaining bytes from this packet.
297 packet.size -= result;
298 packet.data += result;
300 if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
301 !is_end_of_stream) {
302 DCHECK(timestamp != kNoTimestamp());
303 if (output_bytes_to_drop_ > 0) {
304 // Currently Vorbis is the only codec that causes us to drop samples.
305 // If we have to drop samples it always means the timeline starts at 0.
306 DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
307 output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
308 } else {
309 output_timestamp_helper_->SetBaseTimestamp(timestamp);
313 int decoded_audio_size = 0;
314 if (frame_decoded) {
315 if (av_frame_->sample_rate != samples_per_second_ ||
316 av_frame_->channels != channels_ ||
317 av_frame_->format != av_sample_format_) {
318 DLOG(ERROR) << "Unsupported midstream configuration change!"
319 << " Sample Rate: " << av_frame_->sample_rate << " vs "
320 << samples_per_second_
321 << ", Channels: " << av_frame_->channels << " vs "
322 << channels_
323 << ", Sample Format: " << av_frame_->format << " vs "
324 << av_sample_format_;
325 return cdm::kDecodeError;
328 decoded_audio_size = av_samples_get_buffer_size(
329 NULL, codec_context_->channels, av_frame_->nb_samples,
330 codec_context_->sample_fmt, 1);
333 if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
334 DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
335 << "Decoder didn't output full frames";
337 int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
338 decoded_audio_size -= dropped_size;
339 output_bytes_to_drop_ -= dropped_size;
342 if (decoded_audio_size > 0) {
343 DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
344 << "Decoder didn't output full frames";
346 base::TimeDelta output_timestamp =
347 output_timestamp_helper_->GetTimestamp();
348 output_timestamp_helper_->AddFrames(decoded_audio_size /
349 bytes_per_frame_);
351 // If we've exhausted the packet in the first decode we can write directly
352 // into the frame buffer instead of a multistep serialization approach.
353 if (serialized_audio_frames_.empty() && !packet.size) {
354 const uint32_t buffer_size = decoded_audio_size + sizeof(int64) * 2;
355 decoded_frames->SetFrameBuffer(host_->Allocate(buffer_size));
356 if (!decoded_frames->FrameBuffer()) {
357 LOG(ERROR) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed.";
358 return cdm::kDecodeError;
360 decoded_frames->FrameBuffer()->SetSize(buffer_size);
361 uint8_t* output_buffer = decoded_frames->FrameBuffer()->Data();
363 const int64 timestamp = output_timestamp.InMicroseconds();
364 memcpy(output_buffer, &timestamp, sizeof(timestamp));
365 output_buffer += sizeof(timestamp);
367 const int64 output_size = decoded_audio_size;
368 memcpy(output_buffer, &output_size, sizeof(output_size));
369 output_buffer += sizeof(output_size);
371 // Copy the samples and return success.
372 CopySamples(
373 cdm_format, decoded_audio_size, *av_frame_, output_buffer);
374 return cdm::kSuccess;
377 // There are still more frames to decode, so we need to serialize them in
378 // a secondary buffer since we don't know their sizes ahead of time (which
379 // is required to allocate the FrameBuffer object).
380 SerializeInt64(output_timestamp.InMicroseconds());
381 SerializeInt64(decoded_audio_size);
383 const size_t previous_size = serialized_audio_frames_.size();
384 serialized_audio_frames_.resize(previous_size + decoded_audio_size);
385 uint8_t* output_buffer = &serialized_audio_frames_[0] + previous_size;
386 CopySamples(
387 cdm_format, decoded_audio_size, *av_frame_, output_buffer);
389 } while (packet.size > 0);
391 if (!serialized_audio_frames_.empty()) {
392 decoded_frames->SetFrameBuffer(
393 host_->Allocate(serialized_audio_frames_.size()));
394 if (!decoded_frames->FrameBuffer()) {
395 LOG(ERROR) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed.";
396 return cdm::kDecodeError;
398 memcpy(decoded_frames->FrameBuffer()->Data(),
399 &serialized_audio_frames_[0],
400 serialized_audio_frames_.size());
401 decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size());
402 serialized_audio_frames_.clear();
404 return cdm::kSuccess;
407 return cdm::kNeedMoreData;
410 void FFmpegCdmAudioDecoder::ResetTimestampState() {
411 output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
412 last_input_timestamp_ = kNoTimestamp();
413 output_bytes_to_drop_ = 0;
416 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
417 DVLOG(1) << "ReleaseFFmpegResources()";
419 codec_context_.reset();
420 av_frame_.reset();
423 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) {
424 const size_t previous_size = serialized_audio_frames_.size();
425 serialized_audio_frames_.resize(previous_size + sizeof(value));
426 memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value));
429 } // namespace media