media/cast/audio_sender/audio_encoder.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/cast/audio_sender/audio_encoder.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/bind.h"
  10 #include "base/bind_helpers.h"
  11 #include "base/logging.h"
  12 #include "base/message_loop/message_loop.h"
  13 #include "base/sys_byteorder.h"
  14 #include "base/time/time.h"
  15 #include "media/base/audio_bus.h"
  16 #include "media/cast/cast_defines.h"
  17 #include "media/cast/cast_environment.h"
  18 #include "third_party/opus/src/include/opus.h"
  19
  20 namespace media {
  21 namespace cast {
  22
  23 // Base class that handles the common problem of feeding one or more AudioBus'
  24 // data into a 10 ms buffer and then, once the buffer is full, encoding the
  25 // signal and emitting an EncodedAudioFrame via the FrameEncodedCallback.
  26 //
  27 // Subclasses complete the implementation by handling the actual encoding
  28 // details.
  29 class AudioEncoder::ImplBase : public base::SupportsWeakPtr<ImplBase> {
  30  public:
  31   ImplBase(CastEnvironment* cast_environment,
  32            transport::AudioCodec codec,
  33            int num_channels,
  34            int sampling_rate,
  35            const FrameEncodedCallback& callback)
  36       : cast_environment_(cast_environment),
  37         codec_(codec),
  38         num_channels_(num_channels),
  39         samples_per_10ms_(sampling_rate / 100),
  40         callback_(callback),
  41         buffer_fill_end_(0),
  42         frame_id_(0),
  43         rtp_timestamp_(0),
  44         weak_factory_(this) {
  45     DCHECK_GT(num_channels_, 0);
  46     DCHECK_GT(samples_per_10ms_, 0);
  47     DCHECK_EQ(sampling_rate % 100, 0);
  48     DCHECK_LE(samples_per_10ms_ * num_channels_,
  49               transport::EncodedAudioFrame::kMaxNumberOfSamples);
  50
  51     if (num_channels_ <= 0 || samples_per_10ms_ <= 0 ||
  52         sampling_rate % 100 != 0 ||
  53         samples_per_10ms_ * num_channels_ >
  54             transport::EncodedAudioFrame::kMaxNumberOfSamples) {
  55       initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION;
  56     } else {
  57       initialization_status_ = STATUS_INITIALIZED;
  58     }
  59   }
  60
  61   virtual ~ImplBase() {}
  62
  63   CastInitializationStatus InitializationResult() const {
  64     return initialization_status_;
  65   }
  66
  67   void LogAudioFrameEvent(uint32 rtp_timestamp,
  68                           uint32 frame_id,
  69                           CastLoggingEvent type) {
  70     cast_environment_->Logging()->InsertFrameEvent(
  71         cast_environment_->Clock()->NowTicks(), type, rtp_timestamp, frame_id);
  72   }
  73
  74   void EncodeAudio(const AudioBus* audio_bus,
  75                    const base::TimeTicks& recorded_time,
  76                    const base::Closure& done_callback) {
  77     int src_pos = 0;
  78     int packet_count = 0;
  79     while (audio_bus && src_pos < audio_bus->frames()) {
  80       const int num_samples_to_xfer = std::min(
  81           samples_per_10ms_ - buffer_fill_end_, audio_bus->frames() - src_pos);
  82       DCHECK_EQ(audio_bus->channels(), num_channels_);
  83       TransferSamplesIntoBuffer(
  84           audio_bus, src_pos, buffer_fill_end_, num_samples_to_xfer);
  85       src_pos += num_samples_to_xfer;
  86       buffer_fill_end_ += num_samples_to_xfer;
  87
  88       if (src_pos == audio_bus->frames()) {
  89         cast_environment_->PostTask(
  90             CastEnvironment::MAIN, FROM_HERE, done_callback);
  91         // Note: |audio_bus| is invalid once done_callback is invoked.
  92         audio_bus = NULL;
  93       }
  94
  95       if (buffer_fill_end_ == samples_per_10ms_) {
  96         scoped_ptr<transport::EncodedAudioFrame> audio_frame(
  97             new transport::EncodedAudioFrame());
  98         audio_frame->codec = codec_;
  99         audio_frame->frame_id = frame_id_++;
 100         rtp_timestamp_ += samples_per_10ms_;
 101         audio_frame->rtp_timestamp = rtp_timestamp_;
 102
 103         // Update logging.
 104         cast_environment_->PostTask(CastEnvironment::MAIN,
 105                                     FROM_HERE,
 106                                     base::Bind(&ImplBase::LogAudioFrameEvent,
 107                                                weak_factory_.GetWeakPtr(),
 108                                                audio_frame->rtp_timestamp,
 109                                                audio_frame->frame_id,
 110                                                kAudioFrameReceived));
 111
 112         if (EncodeFromFilledBuffer(&audio_frame->data)) {
 113           // Update logging.
 114           cast_environment_->PostTask(CastEnvironment::MAIN,
 115                                       FROM_HERE,
 116                                       base::Bind(&ImplBase::LogAudioFrameEvent,
 117                                                  weak_factory_.GetWeakPtr(),
 118                                                  audio_frame->rtp_timestamp,
 119                                                  audio_frame->frame_id,
 120                                                  kAudioFrameEncoded));
 121           // Compute an offset to determine the recorded time for the first
 122           // audio sample in the buffer.
 123           const base::TimeDelta buffer_time_offset =
 124               (buffer_fill_end_ - src_pos) *
 125               base::TimeDelta::FromMilliseconds(10) / samples_per_10ms_;
 126           // TODO(miu): Consider batching EncodedAudioFrames so we only post a
 127           // at most one task for each call to this method.
 128           // Postpone every packet by 10mS with respect to the previous. Playout
 129           // is postponed already by 10mS, and this will better correlate with
 130           // the pacer's expectations.
 131           //TODO(mikhal): Turn this into a list of packets.
 132           // Update the end2end allowed error once this is fixed.
 133           cast_environment_->PostDelayedTask(
 134               CastEnvironment::MAIN,
 135               FROM_HERE,
 136               base::Bind(callback_,
 137                          base::Passed(&audio_frame),
 138                          recorded_time - buffer_time_offset),
 139               base::TimeDelta::FromMilliseconds(packet_count * 10));
 140           ++packet_count;
 141         }
 142         buffer_fill_end_ = 0;
 143       }
 144     }
 145   }
 146
 147  protected:
 148   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 149                                          int source_offset,
 150                                          int buffer_fill_offset,
 151                                          int num_samples) = 0;
 152   virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
 153
 154   CastEnvironment* const cast_environment_;
 155   const transport::AudioCodec codec_;
 156   const int num_channels_;
 157   const int samples_per_10ms_;
 158   const FrameEncodedCallback callback_;
 159   CastInitializationStatus initialization_status_;
 160
 161  private:
 162   // In the case where a call to EncodeAudio() cannot completely fill the
 163   // buffer, this points to the position at which to populate data in a later
 164   // call.
 165   int buffer_fill_end_;
 166
 167   // A counter used to label EncodedAudioFrames.
 168   uint32 frame_id_;
 169
 170   // For audio, rtp_timestamp is computed as the sum of the audio samples seen
 171   // so far.
 172   uint32 rtp_timestamp_;
 173
 174   base::WeakPtrFactory<ImplBase> weak_factory_;
 175
 176  private:
 177   DISALLOW_COPY_AND_ASSIGN(ImplBase);
 178 };
 179
 180 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
 181  public:
 182   OpusImpl(CastEnvironment* cast_environment,
 183            int num_channels,
 184            int sampling_rate,
 185            int bitrate,
 186            const FrameEncodedCallback& callback)
 187       : ImplBase(cast_environment,
 188                  transport::kOpus,
 189                  num_channels,
 190                  sampling_rate,
 191                  callback),
 192         encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
 193         opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
 194         buffer_(new float[num_channels * samples_per_10ms_]) {
 195     if (ImplBase::initialization_status_ != STATUS_INITIALIZED) {
 196       return;
 197     }
 198
 199     CHECK_EQ(
 200         opus_encoder_init(
 201             opus_encoder_, sampling_rate, num_channels, OPUS_APPLICATION_AUDIO),
 202         OPUS_OK);
 203     if (bitrate <= 0) {
 204       // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
 205       // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
 206       // frame size.  The opus library authors may, of course, adjust this in
 207       // later versions.
 208       bitrate = OPUS_AUTO;
 209     }
 210     CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
 211              OPUS_OK);
 212   }
 213
 214   virtual ~OpusImpl() {}
 215
 216  private:
 217   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 218                                          int source_offset,
 219                                          int buffer_fill_offset,
 220                                          int num_samples) OVERRIDE {
 221     // Opus requires channel-interleaved samples in a single array.
 222     for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 223       const float* src = audio_bus->channel(ch) + source_offset;
 224       const float* const src_end = src + num_samples;
 225       float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
 226       for (; src < src_end; ++src, dest += num_channels_)
 227         *dest = *src;
 228     }
 229   }
 230
 231   virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
 232     out->resize(kOpusMaxPayloadSize);
 233     const opus_int32 result =
 234         opus_encode_float(opus_encoder_,
 235                           buffer_.get(),
 236                           samples_per_10ms_,
 237                           reinterpret_cast<uint8*>(&out->at(0)),
 238                           kOpusMaxPayloadSize);
 239     if (result > 1) {
 240       out->resize(result);
 241       return true;
 242     } else if (result < 0) {
 243       LOG(ERROR) << "Error code from opus_encode_float(): " << result;
 244       return false;
 245     } else {
 246       // Do nothing: The documentation says that a return value of zero or
 247       // one byte means the packet does not need to be transmitted.
 248       return false;
 249     }
 250   }
 251
 252   const scoped_ptr<uint8[]> encoder_memory_;
 253   OpusEncoder* const opus_encoder_;
 254   const scoped_ptr<float[]> buffer_;
 255
 256   // This is the recommended value, according to documentation in
 257   // third_party/opus/src/include/opus.h, so that the Opus encoder does not
 258   // degrade the audio due to memory constraints.
 259   //
 260   // Note: Whereas other RTP implementations do not, the cast library is
 261   // perfectly capable of transporting larger than MTU-sized audio frames.
 262   static const int kOpusMaxPayloadSize = 4000;
 263
 264   DISALLOW_COPY_AND_ASSIGN(OpusImpl);
 265 };
 266
 267 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
 268  public:
 269   Pcm16Impl(CastEnvironment* cast_environment,
 270             int num_channels,
 271             int sampling_rate,
 272             const FrameEncodedCallback& callback)
 273       : ImplBase(cast_environment,
 274                  transport::kPcm16,
 275                  num_channels,
 276                  sampling_rate,
 277                  callback),
 278         buffer_(new int16[num_channels * samples_per_10ms_]) {}
 279
 280   virtual ~Pcm16Impl() {}
 281
 282  private:
 283   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 284                                          int source_offset,
 285                                          int buffer_fill_offset,
 286                                          int num_samples) OVERRIDE {
 287     audio_bus->ToInterleavedPartial(
 288         source_offset,
 289         num_samples,
 290         sizeof(int16),
 291         buffer_.get() + buffer_fill_offset * num_channels_);
 292   }
 293
 294   virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
 295     // Output 16-bit PCM integers in big-endian byte order.
 296     out->resize(num_channels_ * samples_per_10ms_ * sizeof(int16));
 297     const int16* src = buffer_.get();
 298     const int16* const src_end = src + num_channels_ * samples_per_10ms_;
 299     uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
 300     for (; src < src_end; ++src, ++dest)
 301       *dest = base::HostToNet16(*src);
 302     return true;
 303   }
 304
 305  private:
 306   const scoped_ptr<int16[]> buffer_;
 307
 308   DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
 309 };
 310
 311 AudioEncoder::AudioEncoder(
 312     const scoped_refptr<CastEnvironment>& cast_environment,
 313     const AudioSenderConfig& audio_config,
 314     const FrameEncodedCallback& frame_encoded_callback)
 315     : cast_environment_(cast_environment) {
 316   // Note: It doesn't matter which thread constructs AudioEncoder, just so long
 317   // as all calls to InsertAudio() are by the same thread.
 318   insert_thread_checker_.DetachFromThread();
 319
 320   switch (audio_config.codec) {
 321     case transport::kOpus:
 322       impl_.reset(new OpusImpl(cast_environment,
 323                                audio_config.channels,
 324                                audio_config.frequency,
 325                                audio_config.bitrate,
 326                                frame_encoded_callback));
 327       break;
 328     case transport::kPcm16:
 329       impl_.reset(new Pcm16Impl(cast_environment,
 330                                 audio_config.channels,
 331                                 audio_config.frequency,
 332                                 frame_encoded_callback));
 333       break;
 334     default:
 335       NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
 336       break;
 337   }
 338 }
 339
 340 AudioEncoder::~AudioEncoder() {}
 341
 342 CastInitializationStatus AudioEncoder::InitializationResult() const {
 343   if (impl_) {
 344     return impl_->InitializationResult();
 345   }
 346   return STATUS_UNSUPPORTED_AUDIO_CODEC;
 347 }
 348
 349 void AudioEncoder::InsertAudio(const AudioBus* audio_bus,
 350                                const base::TimeTicks& recorded_time,
 351                                const base::Closure& done_callback) {
 352   DCHECK(insert_thread_checker_.CalledOnValidThread());
 353   if (!impl_) {
 354     NOTREACHED();
 355     cast_environment_->PostTask(
 356         CastEnvironment::MAIN, FROM_HERE, done_callback);
 357     return;
 358   }
 359   cast_environment_->PostTask(CastEnvironment::AUDIO_ENCODER,
 360                               FROM_HERE,
 361                               base::Bind(&AudioEncoder::EncodeAudio,
 362                                          this,
 363                                          audio_bus,
 364                                          recorded_time,
 365                                          done_callback));
 366 }
 367
 368 void AudioEncoder::EncodeAudio(const AudioBus* audio_bus,
 369                                const base::TimeTicks& recorded_time,
 370                                const base::Closure& done_callback) {
 371   DCHECK(cast_environment_->CurrentlyOn(CastEnvironment::AUDIO_ENCODER));
 372   impl_->EncodeAudio(audio_bus, recorded_time, done_callback);
 373 }
 374
 375 }  // namespace cast
 376 }  // namespace media