webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "webkit/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/logging.h"
  10 #include "media/base/audio_bus.h"
  11 #include "media/base/audio_timestamp_helper.h"
  12 #include "media/base/buffers.h"
  13 #include "media/base/data_buffer.h"
  14 #include "media/base/limits.h"
  15 #include "webkit/media/crypto/ppapi/cdm/content_decryption_module.h"
  16
  17 // Include FFmpeg header files.
  18 extern "C" {
  19 // Temporarily disable possible loss of data warning.
  20 MSVC_PUSH_DISABLE_WARNING(4244);
  21 #include <libavcodec/avcodec.h>
  22 MSVC_POP_WARNING();
  23 }  // extern "C"
  24
  25 namespace webkit_media {
  26
  27 // Maximum number of channels with defined layout in src/media.
  28 static const int kMaxChannels = 8;
  29
  30 static CodecID CdmAudioCodecToCodecID(
  31     cdm::AudioDecoderConfig::AudioCodec audio_codec) {
  32   switch (audio_codec) {
  33     case cdm::AudioDecoderConfig::kCodecVorbis:
  34       return CODEC_ID_VORBIS;
  35     case cdm::AudioDecoderConfig::kCodecAac:
  36       return CODEC_ID_AAC;
  37     case cdm::AudioDecoderConfig::kUnknownAudioCodec:
  38     default:
  39       NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec;
  40       return CODEC_ID_NONE;
  41   }
  42 }
  43
  44 static void CdmAudioDecoderConfigToAVCodecContext(
  45     const cdm::AudioDecoderConfig& config,
  46     AVCodecContext* codec_context) {
  47   codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
  48   codec_context->codec_id = CdmAudioCodecToCodecID(config.codec);
  49
  50   switch (config.bits_per_channel) {
  51     case 8:
  52       codec_context->sample_fmt = AV_SAMPLE_FMT_U8;
  53       break;
  54     case 16:
  55       codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
  56       break;
  57     case 32:
  58       codec_context->sample_fmt = AV_SAMPLE_FMT_S32;
  59       break;
  60     default:
  61       DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits "
  62                   "per channel: " << config.bits_per_channel;
  63       codec_context->sample_fmt = AV_SAMPLE_FMT_NONE;
  64   }
  65
  66   codec_context->channels = config.channel_count;
  67   codec_context->sample_rate = config.samples_per_second;
  68
  69   if (config.extra_data) {
  70     codec_context->extradata_size = config.extra_data_size;
  71     codec_context->extradata = reinterpret_cast<uint8_t*>(
  72         av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE));
  73     memcpy(codec_context->extradata, config.extra_data,
  74            config.extra_data_size);
  75     memset(codec_context->extradata + config.extra_data_size, '\0',
  76            FF_INPUT_BUFFER_PADDING_SIZE);
  77   } else {
  78     codec_context->extradata = NULL;
  79     codec_context->extradata_size = 0;
  80   }
  81 }
  82
  83 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(cdm::Host* host)
  84     : is_initialized_(false),
  85       host_(host),
  86       codec_context_(NULL),
  87       av_frame_(NULL),
  88       bits_per_channel_(0),
  89       samples_per_second_(0),
  90       channels_(0),
  91       av_sample_format_(0),
  92       bytes_per_frame_(0),
  93       last_input_timestamp_(media::kNoTimestamp()),
  94       output_bytes_to_drop_(0) {
  95 }
  96
  97 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() {
  98   ReleaseFFmpegResources();
  99 }
 100
 101 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) {
 102   DVLOG(1) << "Initialize()";
 103
 104   if (!IsValidConfig(config)) {
 105     LOG(ERROR) << "Initialize(): invalid audio decoder configuration.";
 106     return false;
 107   }
 108
 109   if (is_initialized_) {
 110     LOG(ERROR) << "Initialize(): Already initialized.";
 111     return false;
 112   }
 113
 114   // Initialize AVCodecContext structure.
 115   codec_context_ = avcodec_alloc_context3(NULL);
 116   CdmAudioDecoderConfigToAVCodecContext(config, codec_context_);
 117
 118   // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
 119   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
 120     codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
 121
 122   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
 123   if (!codec || avcodec_open2(codec_context_, codec, NULL) < 0) {
 124     DLOG(ERROR) << "Could not initialize audio decoder: "
 125                 << codec_context_->codec_id;
 126     return false;
 127   }
 128
 129   // Ensure avcodec_open2() respected our format request.
 130   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
 131     DLOG(ERROR) << "Unable to configure a supported sample format: "
 132                 << codec_context_->sample_fmt;
 133     return false;
 134   }
 135
 136   // Some codecs will only output float data, so we need to convert to integer
 137   // before returning the decoded buffer.
 138   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP ||
 139       codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
 140     // Preallocate the AudioBus for float conversions.  We can treat interleaved
 141     // float data as a single planar channel since our output is expected in an
 142     // interleaved format anyways.
 143     int channels = codec_context_->channels;
 144     if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT)
 145       channels = 1;
 146     converter_bus_ = media::AudioBus::CreateWrapper(channels);
 147   }
 148
 149   // Success!
 150   av_frame_ = avcodec_alloc_frame();
 151   bits_per_channel_ = config.bits_per_channel;
 152   samples_per_second_ = config.samples_per_second;
 153   bytes_per_frame_ = codec_context_->channels * bits_per_channel_ / 8;
 154   output_timestamp_helper_.reset(new media::AudioTimestampHelper(
 155       bytes_per_frame_, config.samples_per_second));
 156   serialized_audio_frames_.reserve(bytes_per_frame_ * samples_per_second_);
 157   is_initialized_ = true;
 158
 159   // Store initial values to guard against midstream configuration changes.
 160   channels_ = codec_context_->channels;
 161   av_sample_format_ = codec_context_->sample_fmt;
 162
 163   return true;
 164 }
 165
 166 void FFmpegCdmAudioDecoder::Deinitialize() {
 167   DVLOG(1) << "Deinitialize()";
 168   ReleaseFFmpegResources();
 169   is_initialized_ = false;
 170   ResetTimestampState();
 171 }
 172
 173 void FFmpegCdmAudioDecoder::Reset() {
 174   DVLOG(1) << "Reset()";
 175   avcodec_flush_buffers(codec_context_);
 176   ResetTimestampState();
 177 }
 178
 179 // static
 180 bool FFmpegCdmAudioDecoder::IsValidConfig(
 181     const cdm::AudioDecoderConfig& config) {
 182   return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec &&
 183          config.channel_count > 0 &&
 184          config.channel_count <= kMaxChannels &&
 185          config.bits_per_channel > 0 &&
 186          config.bits_per_channel <= media::limits::kMaxBitsPerSample &&
 187          config.samples_per_second > 0 &&
 188          config.samples_per_second <= media::limits::kMaxSampleRate;
 189 }
 190
 191 cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
 192     const uint8_t* compressed_buffer,
 193     int32_t compressed_buffer_size,
 194     int64_t input_timestamp,
 195     cdm::AudioFrames* decoded_frames) {
 196   DVLOG(1) << "DecodeBuffer()";
 197   const bool is_end_of_stream = !compressed_buffer;
 198   base::TimeDelta timestamp =
 199       base::TimeDelta::FromMicroseconds(input_timestamp);
 200
 201   bool is_vorbis = codec_context_->codec_id == CODEC_ID_VORBIS;
 202   if (!is_end_of_stream) {
 203     if (last_input_timestamp_ == media::kNoTimestamp()) {
 204       if (is_vorbis && timestamp < base::TimeDelta()) {
 205         // Dropping frames for negative timestamps as outlined in section A.2
 206         // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
 207         int frames_to_drop = floor(
 208             0.5 + -timestamp.InSecondsF() * samples_per_second_);
 209         output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop;
 210       } else {
 211         last_input_timestamp_ = timestamp;
 212       }
 213     } else if (timestamp != media::kNoTimestamp()) {
 214       if (timestamp < last_input_timestamp_) {
 215         base::TimeDelta diff = timestamp - last_input_timestamp_;
 216         DVLOG(1) << "Input timestamps are not monotonically increasing! "
 217                  << " ts " << timestamp.InMicroseconds() << " us"
 218                  << " diff " << diff.InMicroseconds() << " us";
 219         return cdm::kDecodeError;
 220       }
 221
 222       last_input_timestamp_ = timestamp;
 223     }
 224   }
 225
 226   AVPacket packet;
 227   av_init_packet(&packet);
 228   packet.data = const_cast<uint8_t*>(compressed_buffer);
 229   packet.size = compressed_buffer_size;
 230
 231   // Each audio packet may contain several frames, so we must call the decoder
 232   // until we've exhausted the packet.  Regardless of the packet size we always
 233   // want to hand it to the decoder at least once, otherwise we would end up
 234   // skipping end of stream packets since they have a size of zero.
 235   do {
 236     // Reset frame to default values.
 237     avcodec_get_frame_defaults(av_frame_);
 238
 239     int frame_decoded = 0;
 240     int result = avcodec_decode_audio4(
 241         codec_context_, av_frame_, &frame_decoded, &packet);
 242
 243     if (result < 0) {
 244       DCHECK(!is_end_of_stream)
 245           << "End of stream buffer produced an error! "
 246           << "This is quite possibly a bug in the audio decoder not handling "
 247           << "end of stream AVPackets correctly.";
 248
 249       DLOG(ERROR)
 250           << "Error decoding an audio frame with timestamp: "
 251           << timestamp.InMicroseconds() << " us, duration: "
 252           << timestamp.InMicroseconds() << " us, packet size: "
 253           << compressed_buffer_size << " bytes";
 254
 255       return cdm::kDecodeError;
 256     }
 257
 258     // Update packet size and data pointer in case we need to call the decoder
 259     // with the remaining bytes from this packet.
 260     packet.size -= result;
 261     packet.data += result;
 262
 263     if (output_timestamp_helper_->base_timestamp() == media::kNoTimestamp() &&
 264         !is_end_of_stream) {
 265       DCHECK(timestamp != media::kNoTimestamp());
 266       if (output_bytes_to_drop_ > 0) {
 267         // Currently Vorbis is the only codec that causes us to drop samples.
 268         // If we have to drop samples it always means the timeline starts at 0.
 269         DCHECK_EQ(codec_context_->codec_id, CODEC_ID_VORBIS);
 270         output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
 271       } else {
 272         output_timestamp_helper_->SetBaseTimestamp(timestamp);
 273       }
 274     }
 275
 276     int decoded_audio_size = 0;
 277     if (frame_decoded) {
 278       if (av_frame_->sample_rate != samples_per_second_ ||
 279           av_frame_->channels != channels_ ||
 280           av_frame_->format != av_sample_format_) {
 281         DLOG(ERROR) << "Unsupported midstream configuration change!"
 282                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
 283                     << samples_per_second_
 284                     << ", Channels: " << av_frame_->channels << " vs "
 285                     << channels_
 286                     << ", Sample Format: " << av_frame_->format << " vs "
 287                     << av_sample_format_;
 288         return cdm::kDecodeError;
 289       }
 290
 291       decoded_audio_size = av_samples_get_buffer_size(
 292           NULL, codec_context_->channels, av_frame_->nb_samples,
 293           codec_context_->sample_fmt, 1);
 294       // If we're decoding into float, adjust audio size.
 295       if (converter_bus_ && bits_per_channel_ / 8 != sizeof(float)) {
 296         DCHECK(codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT ||
 297                codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP);
 298         decoded_audio_size *=
 299             static_cast<float>(bits_per_channel_ / 8) / sizeof(float);
 300       }
 301     }
 302
 303     int start_sample = 0;
 304     if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
 305       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
 306           << "Decoder didn't output full frames";
 307
 308       int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
 309       start_sample = dropped_size / bytes_per_frame_;
 310       decoded_audio_size -= dropped_size;
 311       output_bytes_to_drop_ -= dropped_size;
 312     }
 313
 314     scoped_refptr<media::DataBuffer> output;
 315     if (decoded_audio_size > 0) {
 316       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
 317           << "Decoder didn't output full frames";
 318
 319       // Convert float data using an AudioBus.
 320       if (converter_bus_) {
 321         // Setup the AudioBus as a wrapper of the AVFrame data and then use
 322         // AudioBus::ToInterleaved() to convert the data as necessary.
 323         int skip_frames = start_sample;
 324         int total_frames = av_frame_->nb_samples;
 325         if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
 326           DCHECK_EQ(converter_bus_->channels(), 1);
 327           total_frames *= codec_context_->channels;
 328           skip_frames *= codec_context_->channels;
 329         }
 330         converter_bus_->set_frames(total_frames);
 331         DCHECK_EQ(decoded_audio_size,
 332                   (converter_bus_->frames() - skip_frames) * bytes_per_frame_);
 333
 334         for (int i = 0; i < converter_bus_->channels(); ++i) {
 335           converter_bus_->SetChannelData(i, reinterpret_cast<float*>(
 336               av_frame_->extended_data[i]));
 337         }
 338
 339         output = new media::DataBuffer(decoded_audio_size);
 340         output->SetDataSize(decoded_audio_size);
 341         converter_bus_->ToInterleavedPartial(
 342             skip_frames, converter_bus_->frames() - skip_frames,
 343             bits_per_channel_ / 8, output->GetWritableData());
 344       } else {
 345         output = media::DataBuffer::CopyFrom(
 346             av_frame_->extended_data[0] + start_sample * bytes_per_frame_,
 347             decoded_audio_size);
 348       }
 349
 350       base::TimeDelta output_timestamp =
 351           output_timestamp_helper_->GetTimestamp();
 352       output_timestamp_helper_->AddBytes(decoded_audio_size);
 353
 354       // Serialize the audio samples into |serialized_audio_frames_|.
 355       SerializeInt64(output_timestamp.InMicroseconds());
 356       SerializeInt64(output->GetDataSize());
 357       serialized_audio_frames_.insert(
 358           serialized_audio_frames_.end(),
 359           output->GetData(),
 360           output->GetData() + output->GetDataSize());
 361     }
 362   } while (packet.size > 0);
 363
 364   if (!serialized_audio_frames_.empty()) {
 365     decoded_frames->SetFrameBuffer(
 366         host_->Allocate(serialized_audio_frames_.size()));
 367     if (!decoded_frames->FrameBuffer()) {
 368       LOG(ERROR) << "DecodeBuffer() cdm::Host::Allocate failed.";
 369       return cdm::kDecodeError;
 370     }
 371     memcpy(decoded_frames->FrameBuffer()->Data(),
 372            &serialized_audio_frames_[0],
 373            serialized_audio_frames_.size());
 374     decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size());
 375     serialized_audio_frames_.clear();
 376
 377     return cdm::kSuccess;
 378   }
 379
 380   return cdm::kNeedMoreData;
 381 }
 382
 383 void FFmpegCdmAudioDecoder::ResetTimestampState() {
 384   output_timestamp_helper_->SetBaseTimestamp(media::kNoTimestamp());
 385   last_input_timestamp_ = media::kNoTimestamp();
 386   output_bytes_to_drop_ = 0;
 387 }
 388
 389 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
 390   DVLOG(1) << "ReleaseFFmpegResources()";
 391
 392   if (codec_context_) {
 393     av_free(codec_context_->extradata);
 394     avcodec_close(codec_context_);
 395     av_free(codec_context_);
 396     codec_context_ = NULL;
 397   }
 398   if (av_frame_) {
 399     av_free(av_frame_);
 400     av_frame_ = NULL;
 401   }
 402 }
 403
 404 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) {
 405   int previous_size = serialized_audio_frames_.size();
 406   serialized_audio_frames_.resize(previous_size + sizeof(value));
 407   memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value));
 408 }
 409
 410 }  // namespace webkit_media