media/cast/audio_sender/audio_encoder.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/cast/audio_sender/audio_encoder.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/bind.h"
  10 #include "base/bind_helpers.h"
  11 #include "base/location.h"
  12 #include "base/logging.h"
  13 #include "base/stl_util.h"
  14 #include "base/sys_byteorder.h"
  15 #include "base/time/time.h"
  16 #include "media/base/audio_bus.h"
  17 #include "media/cast/cast_defines.h"
  18 #include "media/cast/cast_environment.h"
  19 #include "media/cast/logging/logging_defines.h"
  20 #include "third_party/opus/src/include/opus.h"
  21
  22 namespace media {
  23 namespace cast {
  24
  25 namespace {
  26
  27 // The fixed number of audio frames per second and, inversely, the duration of
  28 // one frame's worth of samples.
  29 const int kFramesPerSecond = 100;
  30 const int kFrameDurationMillis = 1000 / kFramesPerSecond;  // No remainder!
  31
  32 // Threshold used to decide whether audio being delivered to the encoder is
  33 // coming in too slow with respect to the capture timestamps.
  34 const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis;
  35
  36 void LogAudioFrameEncodedEvent(
  37     const scoped_refptr<media::cast::CastEnvironment>& cast_environment,
  38     base::TimeTicks event_time,
  39     media::cast::RtpTimestamp rtp_timestamp,
  40     uint32 frame_id,
  41     size_t frame_size) {
  42   if (!cast_environment->CurrentlyOn(CastEnvironment::MAIN)) {
  43     cast_environment->PostTask(
  44         CastEnvironment::MAIN,
  45         FROM_HERE,
  46         base::Bind(&LogAudioFrameEncodedEvent,
  47                    cast_environment, event_time,
  48                    rtp_timestamp, frame_id, frame_size));
  49     return;
  50   }
  51   cast_environment->Logging()->InsertEncodedFrameEvent(
  52       event_time, media::cast::FRAME_ENCODED, media::cast::AUDIO_EVENT,
  53       rtp_timestamp, frame_id,
  54       static_cast<int>(frame_size), /* key_frame - unused */ false,
  55       /*target_bitrate - unused*/ 0);
  56 }
  57
  58 }  // namespace
  59
  60
  61 // Base class that handles the common problem of feeding one or more AudioBus'
  62 // data into a buffer and then, once the buffer is full, encoding the signal and
  63 // emitting an EncodedAudioFrame via the FrameEncodedCallback.
  64 //
  65 // Subclasses complete the implementation by handling the actual encoding
  66 // details.
  67 class AudioEncoder::ImplBase
  68     : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
  69  public:
  70   ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
  71            transport::AudioCodec codec,
  72            int num_channels,
  73            int sampling_rate,
  74            const FrameEncodedCallback& callback)
  75       : cast_environment_(cast_environment),
  76         codec_(codec),
  77         num_channels_(num_channels),
  78         samples_per_frame_(sampling_rate / kFramesPerSecond),
  79         callback_(callback),
  80         cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED),
  81         buffer_fill_end_(0),
  82         frame_id_(0),
  83         frame_rtp_timestamp_(0) {
  84     if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
  85         sampling_rate % kFramesPerSecond != 0 ||
  86         samples_per_frame_ * num_channels_ >
  87             transport::EncodedAudioFrame::kMaxNumberOfSamples) {
  88       cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION;
  89     }
  90   }
  91
  92   CastInitializationStatus InitializationResult() const {
  93     return cast_initialization_status_;
  94   }
  95
  96   void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
  97                    const base::TimeTicks& recorded_time) {
  98     DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED);
  99     DCHECK(!recorded_time.is_null());
 100
 101     // Determine whether |recorded_time| is consistent with the amount of audio
 102     // data having been processed in the past.  Resolve the underrun problem by
 103     // dropping data from the internal buffer and skipping ahead the next
 104     // frame's RTP timestamp by the estimated number of frames missed.  On the
 105     // other hand, don't attempt to resolve overruns: A receiver should
 106     // gracefully deal with an excess of audio data.
 107     const base::TimeDelta frame_duration =
 108         base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
 109     base::TimeDelta buffer_fill_duration =
 110         buffer_fill_end_ * frame_duration / samples_per_frame_;
 111     if (!frame_capture_time_.is_null()) {
 112       const base::TimeDelta amount_ahead_by =
 113           recorded_time - (frame_capture_time_ + buffer_fill_duration);
 114       if (amount_ahead_by >
 115               base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) {
 116         buffer_fill_end_ = 0;
 117         buffer_fill_duration = base::TimeDelta();
 118         const int64 num_frames_missed = amount_ahead_by /
 119             base::TimeDelta::FromMilliseconds(kFrameDurationMillis);
 120         frame_rtp_timestamp_ +=
 121             static_cast<uint32>(num_frames_missed * samples_per_frame_);
 122         DVLOG(1) << "Skipping RTP timestamp ahead to account for "
 123                  << num_frames_missed * samples_per_frame_
 124                  << " samples' worth of underrun.";
 125       }
 126     }
 127     frame_capture_time_ = recorded_time - buffer_fill_duration;
 128
 129     // Encode all audio in |audio_bus| into zero or more frames.
 130     int src_pos = 0;
 131     while (src_pos < audio_bus->frames()) {
 132       const int num_samples_to_xfer = std::min(
 133           samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
 134       DCHECK_EQ(audio_bus->channels(), num_channels_);
 135       TransferSamplesIntoBuffer(
 136           audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
 137       src_pos += num_samples_to_xfer;
 138       buffer_fill_end_ += num_samples_to_xfer;
 139
 140       if (buffer_fill_end_ < samples_per_frame_)
 141         break;
 142
 143       scoped_ptr<transport::EncodedAudioFrame> audio_frame(
 144           new transport::EncodedAudioFrame());
 145       audio_frame->codec = codec_;
 146       audio_frame->frame_id = frame_id_;
 147       audio_frame->rtp_timestamp = frame_rtp_timestamp_;
 148
 149       if (EncodeFromFilledBuffer(&audio_frame->data)) {
 150         LogAudioFrameEncodedEvent(cast_environment_,
 151                                   cast_environment_->Clock()->NowTicks(),
 152                                   audio_frame->rtp_timestamp,
 153                                   audio_frame->frame_id,
 154                                   audio_frame->data.size());
 155         cast_environment_->PostTask(
 156             CastEnvironment::MAIN,
 157             FROM_HERE,
 158             base::Bind(callback_,
 159                        base::Passed(&audio_frame),
 160                        frame_capture_time_));
 161       }
 162
 163       // Reset the internal buffer, frame ID, and timestamps for the next frame.
 164       buffer_fill_end_ = 0;
 165       ++frame_id_;
 166       frame_rtp_timestamp_ += samples_per_frame_;
 167       frame_capture_time_ += frame_duration;
 168     }
 169   }
 170
 171  protected:
 172   friend class base::RefCountedThreadSafe<ImplBase>;
 173   virtual ~ImplBase() {}
 174
 175   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 176                                          int source_offset,
 177                                          int buffer_fill_offset,
 178                                          int num_samples) = 0;
 179   virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
 180
 181   const scoped_refptr<CastEnvironment> cast_environment_;
 182   const transport::AudioCodec codec_;
 183   const int num_channels_;
 184   const int samples_per_frame_;
 185   const FrameEncodedCallback callback_;
 186
 187   // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED.
 188   CastInitializationStatus cast_initialization_status_;
 189
 190  private:
 191   // In the case where a call to EncodeAudio() cannot completely fill the
 192   // buffer, this points to the position at which to populate data in a later
 193   // call.
 194   int buffer_fill_end_;
 195
 196   // A counter used to label EncodedAudioFrames.
 197   uint32 frame_id_;
 198
 199   // The RTP timestamp for the next frame of encoded audio.  This is defined as
 200   // the number of audio samples encoded so far, plus the estimated number of
 201   // samples that were missed due to data underruns.  A receiver uses this value
 202   // to detect gaps in the audio signal data being provided.  Per the spec, RTP
 203   // timestamp values are allowed to overflow and roll around past zero.
 204   uint32 frame_rtp_timestamp_;
 205
 206   // The local system time associated with the start of the next frame of
 207   // encoded audio.  This value is passed on to a receiver as a reference clock
 208   // timestamp for the purposes of synchronizing audio and video.  Its
 209   // progression is expected to drift relative to the elapsed time implied by
 210   // the RTP timestamps.
 211   base::TimeTicks frame_capture_time_;
 212
 213   DISALLOW_COPY_AND_ASSIGN(ImplBase);
 214 };
 215
 216 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
 217  public:
 218   OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
 219            int num_channels,
 220            int sampling_rate,
 221            int bitrate,
 222            const FrameEncodedCallback& callback)
 223       : ImplBase(cast_environment,
 224                  transport::kOpus,
 225                  num_channels,
 226                  sampling_rate,
 227                  callback),
 228         encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
 229         opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
 230         buffer_(new float[num_channels * samples_per_frame_]) {
 231     if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
 232       return;
 233     if (opus_encoder_init(opus_encoder_,
 234                           sampling_rate,
 235                           num_channels,
 236                           OPUS_APPLICATION_AUDIO) != OPUS_OK) {
 237       ImplBase::cast_initialization_status_ =
 238           STATUS_INVALID_AUDIO_CONFIGURATION;
 239       return;
 240     }
 241     ImplBase::cast_initialization_status_ = STATUS_AUDIO_INITIALIZED;
 242
 243     if (bitrate <= 0) {
 244       // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
 245       // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
 246       // frame size.  The opus library authors may, of course, adjust this in
 247       // later versions.
 248       bitrate = OPUS_AUTO;
 249     }
 250     CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
 251              OPUS_OK);
 252   }
 253
 254  private:
 255   virtual ~OpusImpl() {}
 256
 257   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 258                                          int source_offset,
 259                                          int buffer_fill_offset,
 260                                          int num_samples) OVERRIDE {
 261     // Opus requires channel-interleaved samples in a single array.
 262     for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 263       const float* src = audio_bus->channel(ch) + source_offset;
 264       const float* const src_end = src + num_samples;
 265       float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
 266       for (; src < src_end; ++src, dest += num_channels_)
 267         *dest = *src;
 268     }
 269   }
 270
 271   virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
 272     out->resize(kOpusMaxPayloadSize);
 273     const opus_int32 result =
 274         opus_encode_float(opus_encoder_,
 275                           buffer_.get(),
 276                           samples_per_frame_,
 277                           reinterpret_cast<uint8*>(string_as_array(out)),
 278                           kOpusMaxPayloadSize);
 279     if (result > 1) {
 280       out->resize(result);
 281       return true;
 282     } else if (result < 0) {
 283       LOG(ERROR) << "Error code from opus_encode_float(): " << result;
 284       return false;
 285     } else {
 286       // Do nothing: The documentation says that a return value of zero or
 287       // one byte means the packet does not need to be transmitted.
 288       return false;
 289     }
 290   }
 291
 292   const scoped_ptr<uint8[]> encoder_memory_;
 293   OpusEncoder* const opus_encoder_;
 294   const scoped_ptr<float[]> buffer_;
 295
 296   // This is the recommended value, according to documentation in
 297   // third_party/opus/src/include/opus.h, so that the Opus encoder does not
 298   // degrade the audio due to memory constraints.
 299   //
 300   // Note: Whereas other RTP implementations do not, the cast library is
 301   // perfectly capable of transporting larger than MTU-sized audio frames.
 302   static const int kOpusMaxPayloadSize = 4000;
 303
 304   DISALLOW_COPY_AND_ASSIGN(OpusImpl);
 305 };
 306
 307 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
 308  public:
 309   Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
 310             int num_channels,
 311             int sampling_rate,
 312             const FrameEncodedCallback& callback)
 313       : ImplBase(cast_environment,
 314                  transport::kPcm16,
 315                  num_channels,
 316                  sampling_rate,
 317                  callback),
 318         buffer_(new int16[num_channels * samples_per_frame_]) {
 319     if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED)
 320       return;
 321     cast_initialization_status_ = STATUS_AUDIO_INITIALIZED;
 322   }
 323
 324  private:
 325   virtual ~Pcm16Impl() {}
 326
 327   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 328                                          int source_offset,
 329                                          int buffer_fill_offset,
 330                                          int num_samples) OVERRIDE {
 331     audio_bus->ToInterleavedPartial(
 332         source_offset,
 333         num_samples,
 334         sizeof(int16),
 335         buffer_.get() + buffer_fill_offset * num_channels_);
 336   }
 337
 338   virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
 339     // Output 16-bit PCM integers in big-endian byte order.
 340     out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
 341     const int16* src = buffer_.get();
 342     const int16* const src_end = src + num_channels_ * samples_per_frame_;
 343     uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
 344     for (; src < src_end; ++src, ++dest)
 345       *dest = base::HostToNet16(*src);
 346     return true;
 347   }
 348
 349  private:
 350   const scoped_ptr<int16[]> buffer_;
 351
 352   DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
 353 };
 354
 355 AudioEncoder::AudioEncoder(
 356     const scoped_refptr<CastEnvironment>& cast_environment,
 357     const AudioSenderConfig& audio_config,
 358     const FrameEncodedCallback& frame_encoded_callback)
 359     : cast_environment_(cast_environment) {
 360   // Note: It doesn't matter which thread constructs AudioEncoder, just so long
 361   // as all calls to InsertAudio() are by the same thread.
 362   insert_thread_checker_.DetachFromThread();
 363   switch (audio_config.codec) {
 364     case transport::kOpus:
 365       impl_ = new OpusImpl(cast_environment,
 366                            audio_config.channels,
 367                            audio_config.frequency,
 368                            audio_config.bitrate,
 369                            frame_encoded_callback);
 370       break;
 371     case transport::kPcm16:
 372       impl_ = new Pcm16Impl(cast_environment,
 373                             audio_config.channels,
 374                             audio_config.frequency,
 375                             frame_encoded_callback);
 376       break;
 377     default:
 378       NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
 379       break;
 380   }
 381 }
 382
 383 AudioEncoder::~AudioEncoder() {}
 384
 385 CastInitializationStatus AudioEncoder::InitializationResult() const {
 386   DCHECK(insert_thread_checker_.CalledOnValidThread());
 387   if (impl_) {
 388     return impl_->InitializationResult();
 389   }
 390   return STATUS_UNSUPPORTED_AUDIO_CODEC;
 391 }
 392
 393 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
 394                                const base::TimeTicks& recorded_time) {
 395   DCHECK(insert_thread_checker_.CalledOnValidThread());
 396   DCHECK(audio_bus.get());
 397   if (!impl_) {
 398     NOTREACHED();
 399     return;
 400   }
 401   cast_environment_->PostTask(CastEnvironment::AUDIO,
 402                               FROM_HERE,
 403                               base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
 404                                          impl_,
 405                                          base::Passed(&audio_bus),
 406                                          recorded_time));
 407 }
 408
 409 }  // namespace cast
 410 }  // namespace media