media/filters/opus_audio_decoder.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/filters/opus_audio_decoder.h"
   6
   7 #include "base/bind.h"
   8 #include "base/callback_helpers.h"
   9 #include "base/location.h"
  10 #include "base/message_loop/message_loop_proxy.h"
  11 #include "base/sys_byteorder.h"
  12 #include "media/base/audio_buffer.h"
  13 #include "media/base/audio_decoder_config.h"
  14 #include "media/base/audio_timestamp_helper.h"
  15 #include "media/base/bind_to_loop.h"
  16 #include "media/base/buffers.h"
  17 #include "media/base/decoder_buffer.h"
  18 #include "media/base/demuxer.h"
  19 #include "media/base/pipeline.h"
  20 #include "third_party/opus/src/include/opus.h"
  21 #include "third_party/opus/src/include/opus_multistream.h"
  22
  23 namespace media {
  24
  25 static uint16 ReadLE16(const uint8* data, size_t data_size, int read_offset) {
  26   DCHECK(data);
  27   uint16 value = 0;
  28   DCHECK_LE(read_offset + sizeof(value), data_size);
  29   memcpy(&value, data + read_offset, sizeof(value));
  30   return base::ByteSwapToLE16(value);
  31 }
  32
  33 // Returns true if the decode result was end of stream.
  34 static inline bool IsEndOfStream(int decoded_size,
  35                                  const scoped_refptr<DecoderBuffer>& input) {
  36   // Two conditions to meet to declare end of stream for this decoder:
  37   // 1. Opus didn't output anything.
  38   // 2. An end of stream buffer is received.
  39   return decoded_size == 0 && input->end_of_stream();
  40 }
  41
  42 // The Opus specification is part of IETF RFC 6716:
  43 // http://tools.ietf.org/html/rfc6716
  44
  45 // Opus uses Vorbis channel mapping, and Vorbis channel mapping specifies
  46 // mappings for up to 8 channels. This information is part of the Vorbis I
  47 // Specification:
  48 // http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html
  49 static const int kMaxVorbisChannels = 8;
  50
  51 // Opus allows for decode of S16 or float samples. OpusAudioDecoder always uses
  52 // S16 samples.
  53 static const int kBitsPerChannel = 16;
  54 static const int kBytesPerChannel = kBitsPerChannel / 8;
  55
  56 // Maximum packet size used in Xiph's opusdec and FFmpeg's libopusdec.
  57 static const int kMaxOpusOutputPacketSizeSamples = 960 * 6 * kMaxVorbisChannels;
  58 static const int kMaxOpusOutputPacketSizeBytes =
  59     kMaxOpusOutputPacketSizeSamples * kBytesPerChannel;
  60
  61 static void RemapOpusChannelLayout(const uint8* opus_mapping,
  62                                    int num_channels,
  63                                    uint8* channel_layout) {
  64   DCHECK_LE(num_channels, kMaxVorbisChannels);
  65
  66   // Opus uses Vorbis channel layout.
  67   const int32 num_layouts = kMaxVorbisChannels;
  68   const int32 num_layout_values = kMaxVorbisChannels;
  69
  70   // Vorbis channel ordering for streams with >= 2 channels:
  71   // 2 Channels
  72   //   L, R
  73   // 3 Channels
  74   //   L, Center, R
  75   // 4 Channels
  76   //   Front L, Front R, Back L, Back R
  77   // 5 Channels
  78   //   Front L, Center, Front R, Back L, Back R
  79   // 6 Channels (5.1)
  80   //   Front L, Center, Front R, Back L, Back R, LFE
  81   // 7 channels (6.1)
  82   //   Front L, Front Center, Front R, Side L, Side R, Back Center, LFE
  83   // 8 Channels (7.1)
  84   //   Front L, Center, Front R, Side L, Side R, Back L, Back R, LFE
  85   //
  86   // Channel ordering information is taken from section 4.3.9 of the Vorbis I
  87   // Specification:
  88   // http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9
  89
  90   // These are the FFmpeg channel layouts expressed using the position of each
  91   // channel in the output stream from libopus.
  92   const uint8 kFFmpegChannelLayouts[num_layouts][num_layout_values] = {
  93     { 0 },
  94
  95     // Stereo: No reorder.
  96     { 0, 1 },
  97
  98     // 3 Channels, from Vorbis order to:
  99     //  L, R, Center
 100     { 0, 2, 1 },
 101
 102     // 4 Channels: No reorder.
 103     { 0, 1, 2, 3 },
 104
 105     // 5 Channels, from Vorbis order to:
 106     //  Front L, Front R, Center, Back L, Back R
 107     { 0, 2, 1, 3, 4 },
 108
 109     // 6 Channels (5.1), from Vorbis order to:
 110     //  Front L, Front R, Center, LFE, Back L, Back R
 111     { 0, 2, 1, 5, 3, 4 },
 112
 113     // 7 Channels (6.1), from Vorbis order to:
 114     //  Front L, Front R, Front Center, LFE, Side L, Side R, Back Center
 115     { 0, 2, 1, 6, 3, 4, 5 },
 116
 117     // 8 Channels (7.1), from Vorbis order to:
 118     //  Front L, Front R, Center, LFE, Back L, Back R, Side L, Side R
 119     { 0, 2, 1, 7, 5, 6, 3, 4 },
 120   };
 121
 122   // Reorder the channels to produce the same ordering as FFmpeg, which is
 123   // what the pipeline expects.
 124   const uint8* vorbis_layout_offset = kFFmpegChannelLayouts[num_channels - 1];
 125   for (int channel = 0; channel < num_channels; ++channel)
 126     channel_layout[channel] = opus_mapping[vorbis_layout_offset[channel]];
 127 }
 128
 129 // Opus Header contents:
 130 // - "OpusHead" (64 bits)
 131 // - version number (8 bits)
 132 // - Channels C (8 bits)
 133 // - Pre-skip (16 bits)
 134 // - Sampling rate (32 bits)
 135 // - Gain in dB (16 bits, S7.8)
 136 // - Mapping (8 bits, 0=single stream (mono/stereo) 1=Vorbis mapping,
 137 //            2..254: reserved, 255: multistream with no mapping)
 138 //
 139 // - if (mapping != 0)
 140 //    - N = totel number of streams (8 bits)
 141 //    - M = number of paired streams (8 bits)
 142 //    - C times channel origin
 143 //         - if (C<2*M)
 144 //            - stream = byte/2
 145 //            - if (byte&0x1 == 0)
 146 //                - left
 147 //              else
 148 //                - right
 149 //         - else
 150 //            - stream = byte-M
 151
 152 // Default audio output channel layout. Used to initialize |stream_map| in
 153 // OpusHeader, and passed to opus_multistream_decoder_create() when the header
 154 // does not contain mapping information. The values are valid only for mono and
 155 // stereo output: Opus streams with more than 2 channels require a stream map.
 156 static const int kMaxChannelsWithDefaultLayout = 2;
 157 static const uint8 kDefaultOpusChannelLayout[kMaxChannelsWithDefaultLayout] = {
 158     0, 1 };
 159
 160 // Size of the Opus header excluding optional mapping information.
 161 static const int kOpusHeaderSize = 19;
 162
 163 // Offset to the channel count byte in the Opus header.
 164 static const int kOpusHeaderChannelsOffset = 9;
 165
 166 // Offset to the pre-skip value in the Opus header.
 167 static const int kOpusHeaderSkipSamplesOffset = 10;
 168
 169 // Offset to the channel mapping byte in the Opus header.
 170 static const int kOpusHeaderChannelMappingOffset = 18;
 171
 172 // Header contains a stream map. The mapping values are in extra data beyond
 173 // the always present |kOpusHeaderSize| bytes of data. The mapping data
 174 // contains stream count, coupling information, and per channel mapping values:
 175 //   - Byte 0: Number of streams.
 176 //   - Byte 1: Number coupled.
 177 //   - Byte 2: Starting at byte 2 are |header->channels| uint8 mapping values.
 178 static const int kOpusHeaderNumStreamsOffset = kOpusHeaderSize;
 179 static const int kOpusHeaderNumCoupledOffset = kOpusHeaderNumStreamsOffset + 1;
 180 static const int kOpusHeaderStreamMapOffset = kOpusHeaderNumStreamsOffset + 2;
 181
 182 struct OpusHeader {
 183   OpusHeader()
 184       : channels(0),
 185         skip_samples(0),
 186         channel_mapping(0),
 187         num_streams(0),
 188         num_coupled(0) {
 189     memcpy(stream_map,
 190            kDefaultOpusChannelLayout,
 191            kMaxChannelsWithDefaultLayout);
 192   }
 193   int channels;
 194   int skip_samples;
 195   int channel_mapping;
 196   int num_streams;
 197   int num_coupled;
 198   uint8 stream_map[kMaxVorbisChannels];
 199 };
 200
 201 // Returns true when able to successfully parse and store Opus header data in
 202 // data parsed in |header|. Based on opus header parsing code in libopusdec
 203 // from FFmpeg, and opus_header from Xiph's opus-tools project.
 204 static void ParseOpusHeader(const uint8* data, int data_size,
 205                             const AudioDecoderConfig& config,
 206                             OpusHeader* header) {
 207   CHECK_GE(data_size, kOpusHeaderSize);
 208
 209   header->channels = *(data + kOpusHeaderChannelsOffset);
 210
 211   CHECK(header->channels > 0 && header->channels <= kMaxVorbisChannels)
 212       << "invalid channel count in header: " << header->channels;
 213
 214   header->skip_samples =
 215       ReadLE16(data, data_size, kOpusHeaderSkipSamplesOffset);
 216
 217   header->channel_mapping = *(data + kOpusHeaderChannelMappingOffset);
 218
 219   if (!header->channel_mapping) {
 220     CHECK_LE(header->channels, kMaxChannelsWithDefaultLayout)
 221         << "Invalid header, missing stream map.";
 222
 223     header->num_streams = 1;
 224     header->num_coupled =
 225         (ChannelLayoutToChannelCount(config.channel_layout()) > 1) ? 1 : 0;
 226     return;
 227   }
 228
 229   CHECK_GE(data_size, kOpusHeaderStreamMapOffset + header->channels)
 230       << "Invalid stream map; insufficient data for current channel count: "
 231       << header->channels;
 232
 233   header->num_streams = *(data + kOpusHeaderNumStreamsOffset);
 234   header->num_coupled = *(data + kOpusHeaderNumCoupledOffset);
 235
 236   if (header->num_streams + header->num_coupled != header->channels)
 237     LOG(WARNING) << "Inconsistent channel mapping.";
 238
 239   for (int i = 0; i < header->channels; ++i)
 240     header->stream_map[i] = *(data + kOpusHeaderStreamMapOffset + i);
 241 }
 242
 243 OpusAudioDecoder::OpusAudioDecoder(
 244     const scoped_refptr<base::MessageLoopProxy>& message_loop)
 245     : message_loop_(message_loop),
 246       weak_factory_(this),
 247       demuxer_stream_(NULL),
 248       opus_decoder_(NULL),
 249       bits_per_channel_(0),
 250       channel_layout_(CHANNEL_LAYOUT_NONE),
 251       samples_per_second_(0),
 252       last_input_timestamp_(kNoTimestamp()),
 253       output_bytes_to_drop_(0),
 254       skip_samples_(0) {
 255 }
 256
 257 void OpusAudioDecoder::Initialize(
 258     DemuxerStream* stream,
 259     const PipelineStatusCB& status_cb,
 260     const StatisticsCB& statistics_cb) {
 261   DCHECK(message_loop_->BelongsToCurrentThread());
 262   PipelineStatusCB initialize_cb = BindToCurrentLoop(status_cb);
 263
 264   if (demuxer_stream_) {
 265     // TODO(scherkus): initialization currently happens more than once in
 266     // PipelineIntegrationTest.BasicPlayback.
 267     LOG(ERROR) << "Initialize has already been called.";
 268     CHECK(false);
 269   }
 270
 271   weak_this_ = weak_factory_.GetWeakPtr();
 272   demuxer_stream_ = stream;
 273
 274   if (!ConfigureDecoder()) {
 275     initialize_cb.Run(DECODER_ERROR_NOT_SUPPORTED);
 276     return;
 277   }
 278
 279   statistics_cb_ = statistics_cb;
 280   initialize_cb.Run(PIPELINE_OK);
 281 }
 282
 283 void OpusAudioDecoder::Read(const ReadCB& read_cb) {
 284   DCHECK(message_loop_->BelongsToCurrentThread());
 285   DCHECK(!read_cb.is_null());
 286   CHECK(read_cb_.is_null()) << "Overlapping decodes are not supported.";
 287   read_cb_ = BindToCurrentLoop(read_cb);
 288
 289   ReadFromDemuxerStream();
 290 }
 291
 292 int OpusAudioDecoder::bits_per_channel() {
 293   DCHECK(message_loop_->BelongsToCurrentThread());
 294   return bits_per_channel_;
 295 }
 296
 297 ChannelLayout OpusAudioDecoder::channel_layout() {
 298   DCHECK(message_loop_->BelongsToCurrentThread());
 299   return channel_layout_;
 300 }
 301
 302 int OpusAudioDecoder::samples_per_second() {
 303   DCHECK(message_loop_->BelongsToCurrentThread());
 304   return samples_per_second_;
 305 }
 306
 307 void OpusAudioDecoder::Reset(const base::Closure& closure) {
 308   DCHECK(message_loop_->BelongsToCurrentThread());
 309   base::Closure reset_cb = BindToCurrentLoop(closure);
 310
 311   opus_multistream_decoder_ctl(opus_decoder_, OPUS_RESET_STATE);
 312   ResetTimestampState();
 313   reset_cb.Run();
 314 }
 315
 316 OpusAudioDecoder::~OpusAudioDecoder() {
 317   // TODO(scherkus): should we require Stop() to be called? this might end up
 318   // getting called on a random thread due to refcounting.
 319   CloseDecoder();
 320 }
 321
 322 void OpusAudioDecoder::ReadFromDemuxerStream() {
 323   DCHECK(!read_cb_.is_null());
 324   demuxer_stream_->Read(base::Bind(&OpusAudioDecoder::BufferReady, weak_this_));
 325 }
 326
 327 void OpusAudioDecoder::BufferReady(
 328     DemuxerStream::Status status,
 329     const scoped_refptr<DecoderBuffer>& input) {
 330   DCHECK(message_loop_->BelongsToCurrentThread());
 331   DCHECK(!read_cb_.is_null());
 332   DCHECK_EQ(status != DemuxerStream::kOk, !input.get()) << status;
 333
 334   if (status == DemuxerStream::kAborted) {
 335     DCHECK(!input.get());
 336     base::ResetAndReturn(&read_cb_).Run(kAborted, NULL);
 337     return;
 338   }
 339
 340   if (status == DemuxerStream::kConfigChanged) {
 341     DCHECK(!input.get());
 342     DVLOG(1) << "Config changed.";
 343
 344     if (!ConfigureDecoder()) {
 345       base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 346       return;
 347     }
 348
 349     ResetTimestampState();
 350     ReadFromDemuxerStream();
 351     return;
 352   }
 353
 354   DCHECK_EQ(status, DemuxerStream::kOk);
 355   DCHECK(input.get());
 356
 357   // Libopus does not buffer output. Decoding is complete when an end of stream
 358   // input buffer is received.
 359   if (input->end_of_stream()) {
 360     base::ResetAndReturn(&read_cb_).Run(kOk, AudioBuffer::CreateEOSBuffer());
 361     return;
 362   }
 363
 364   // Make sure we are notified if http://crbug.com/49709 returns.  Issue also
 365   // occurs with some damaged files.
 366   if (input->timestamp() == kNoTimestamp() &&
 367       output_timestamp_helper_->base_timestamp() == kNoTimestamp()) {
 368     DVLOG(1) << "Received a buffer without timestamps!";
 369     base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 370     return;
 371   }
 372
 373   if (last_input_timestamp_ != kNoTimestamp() &&
 374       input->timestamp() != kNoTimestamp() &&
 375       input->timestamp() < last_input_timestamp_) {
 376     base::TimeDelta diff = input->timestamp() - last_input_timestamp_;
 377     DVLOG(1) << "Input timestamps are not monotonically increasing! "
 378               << " ts " << input->timestamp().InMicroseconds() << " us"
 379               << " diff " << diff.InMicroseconds() << " us";
 380     base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 381     return;
 382   }
 383
 384   last_input_timestamp_ = input->timestamp();
 385
 386   scoped_refptr<AudioBuffer> output_buffer;
 387
 388   if (!Decode(input, &output_buffer)) {
 389     base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 390     return;
 391   }
 392
 393   if (output_buffer.get()) {
 394     // Execute callback to return the decoded audio.
 395     base::ResetAndReturn(&read_cb_).Run(kOk, output_buffer);
 396   } else {
 397     // We exhausted the input data, but it wasn't enough for a frame.  Ask for
 398     // more data in order to fulfill this read.
 399     ReadFromDemuxerStream();
 400   }
 401 }
 402
 403 bool OpusAudioDecoder::ConfigureDecoder() {
 404   const AudioDecoderConfig& config = demuxer_stream_->audio_decoder_config();
 405
 406   if (config.codec() != kCodecOpus) {
 407     DLOG(ERROR) << "codec must be kCodecOpus.";
 408     return false;
 409   }
 410
 411   const int channel_count =
 412       ChannelLayoutToChannelCount(config.channel_layout());
 413   if (!config.IsValidConfig() || channel_count > kMaxVorbisChannels) {
 414     DLOG(ERROR) << "Invalid or unsupported audio stream -"
 415                 << " codec: " << config.codec()
 416                 << " channel count: " << channel_count
 417                 << " channel layout: " << config.channel_layout()
 418                 << " bits per channel: " << config.bits_per_channel()
 419                 << " samples per second: " << config.samples_per_second();
 420     return false;
 421   }
 422
 423   if (config.bits_per_channel() != kBitsPerChannel) {
 424     DLOG(ERROR) << "16 bit samples required.";
 425     return false;
 426   }
 427
 428   if (config.is_encrypted()) {
 429     DLOG(ERROR) << "Encrypted audio stream not supported.";
 430     return false;
 431   }
 432
 433   if (opus_decoder_ &&
 434       (bits_per_channel_ != config.bits_per_channel() ||
 435        channel_layout_ != config.channel_layout() ||
 436        samples_per_second_ != config.samples_per_second())) {
 437     DVLOG(1) << "Unsupported config change :";
 438     DVLOG(1) << "\tbits_per_channel : " << bits_per_channel_
 439              << " -> " << config.bits_per_channel();
 440     DVLOG(1) << "\tchannel_layout : " << channel_layout_
 441              << " -> " << config.channel_layout();
 442     DVLOG(1) << "\tsample_rate : " << samples_per_second_
 443              << " -> " << config.samples_per_second();
 444     return false;
 445   }
 446
 447   // Clean up existing decoder if necessary.
 448   CloseDecoder();
 449
 450   // Allocate the output buffer if necessary.
 451   if (!output_buffer_)
 452     output_buffer_.reset(new int16[kMaxOpusOutputPacketSizeSamples]);
 453
 454   // Parse the Opus header.
 455   OpusHeader opus_header;
 456   ParseOpusHeader(config.extra_data(), config.extra_data_size(),
 457                   config,
 458                   &opus_header);
 459
 460   skip_samples_ = opus_header.skip_samples;
 461
 462   if (skip_samples_ > 0)
 463     output_bytes_to_drop_ = skip_samples_ * config.bytes_per_frame();
 464
 465   uint8 channel_mapping[kMaxVorbisChannels];
 466   memcpy(&channel_mapping,
 467          kDefaultOpusChannelLayout,
 468          kMaxChannelsWithDefaultLayout);
 469
 470   if (channel_count > kMaxChannelsWithDefaultLayout) {
 471     RemapOpusChannelLayout(opus_header.stream_map,
 472                            channel_count,
 473                            channel_mapping);
 474   }
 475
 476   // Init Opus.
 477   int status = OPUS_INVALID_STATE;
 478   opus_decoder_ = opus_multistream_decoder_create(config.samples_per_second(),
 479                                                   channel_count,
 480                                                   opus_header.num_streams,
 481                                                   opus_header.num_coupled,
 482                                                   channel_mapping,
 483                                                   &status);
 484   if (!opus_decoder_ || status != OPUS_OK) {
 485     LOG(ERROR) << "opus_multistream_decoder_create failed status="
 486                << opus_strerror(status);
 487     return false;
 488   }
 489
 490   // TODO(tomfinegan): Handle audio delay once the matroska spec is updated
 491   // to represent the value.
 492
 493   bits_per_channel_ = config.bits_per_channel();
 494   channel_layout_ = config.channel_layout();
 495   samples_per_second_ = config.samples_per_second();
 496   output_timestamp_helper_.reset(
 497       new AudioTimestampHelper(config.samples_per_second()));
 498   return true;
 499 }
 500
 501 void OpusAudioDecoder::CloseDecoder() {
 502   if (opus_decoder_) {
 503     opus_multistream_decoder_destroy(opus_decoder_);
 504     opus_decoder_ = NULL;
 505   }
 506 }
 507
 508 void OpusAudioDecoder::ResetTimestampState() {
 509   output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
 510   last_input_timestamp_ = kNoTimestamp();
 511   output_bytes_to_drop_ = 0;
 512 }
 513
 514 bool OpusAudioDecoder::Decode(const scoped_refptr<DecoderBuffer>& input,
 515                               scoped_refptr<AudioBuffer>* output_buffer) {
 516   int samples_decoded = opus_multistream_decode(opus_decoder_,
 517                                                 input->data(),
 518                                                 input->data_size(),
 519                                                 &output_buffer_[0],
 520                                                 kMaxOpusOutputPacketSizeSamples,
 521                                                 0);
 522   if (samples_decoded < 0) {
 523     LOG(ERROR) << "opus_multistream_decode failed for"
 524                << " timestamp: " << input->timestamp().InMicroseconds()
 525                << " us, duration: " << input->duration().InMicroseconds()
 526                << " us, packet size: " << input->data_size() << " bytes with"
 527                << " status: " << opus_strerror(samples_decoded);
 528     return false;
 529   }
 530
 531   uint8* decoded_audio_data = reinterpret_cast<uint8*>(&output_buffer_[0]);
 532   int decoded_audio_size = samples_decoded *
 533       demuxer_stream_->audio_decoder_config().bytes_per_frame();
 534   DCHECK_LE(decoded_audio_size, kMaxOpusOutputPacketSizeBytes);
 535
 536   if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
 537       !input->end_of_stream()) {
 538     DCHECK(input->timestamp() != kNoTimestamp());
 539     output_timestamp_helper_->SetBaseTimestamp(input->timestamp());
 540   }
 541
 542   if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
 543     int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
 544     DCHECK_EQ(dropped_size % kBytesPerChannel, 0);
 545     decoded_audio_data += dropped_size;
 546     decoded_audio_size -= dropped_size;
 547     output_bytes_to_drop_ -= dropped_size;
 548     samples_decoded = decoded_audio_size /
 549                       demuxer_stream_->audio_decoder_config().bytes_per_frame();
 550   }
 551
 552   if (decoded_audio_size > 0) {
 553     // Copy the audio samples into an output buffer.
 554     uint8* data[] = { decoded_audio_data };
 555     *output_buffer = AudioBuffer::CopyFrom(
 556         kSampleFormatS16,
 557         ChannelLayoutToChannelCount(channel_layout_),
 558         samples_decoded,
 559         data,
 560         output_timestamp_helper_->GetTimestamp(),
 561         output_timestamp_helper_->GetFrameDuration(samples_decoded));
 562     output_timestamp_helper_->AddFrames(samples_decoded);
 563   }
 564
 565   // Decoding finished successfully, update statistics.
 566   PipelineStatistics statistics;
 567   statistics.audio_bytes_decoded = decoded_audio_size;
 568   statistics_cb_.Run(statistics);
 569
 570   return true;
 571 }
 572
 573 }  // namespace media