media/cast/sender/audio_encoder.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/cast/sender/audio_encoder.h"
   6
   7 #include <algorithm>
   8 #include <limits>
   9 #include <string>
  10
  11 #include "base/bind.h"
  12 #include "base/bind_helpers.h"
  13 #include "base/location.h"
  14 #include "base/stl_util.h"
  15 #include "base/sys_byteorder.h"
  16 #include "base/time/time.h"
  17 #include "media/cast/cast_defines.h"
  18
  19 #if !defined(OS_IOS)
  20 #include "third_party/opus/src/include/opus.h"
  21 #endif
  22
  23 #if defined(OS_MACOSX)
  24 #include <AudioToolbox/AudioToolbox.h>
  25 #endif
  26
  27 namespace media {
  28 namespace cast {
  29
  30 namespace {
  31
  32 const int kUnderrunSkipThreshold = 3;
  33 const int kDefaultFramesPerSecond = 100;
  34
  35 }  // namespace
  36
  37 // Base class that handles the common problem of feeding one or more AudioBus'
  38 // data into a buffer and then, once the buffer is full, encoding the signal and
  39 // emitting a SenderEncodedFrame via the FrameEncodedCallback.
  40 //
  41 // Subclasses complete the implementation by handling the actual encoding
  42 // details.
  43 class AudioEncoder::ImplBase
  44     : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
  45  public:
  46   ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
  47            Codec codec,
  48            int num_channels,
  49            int sampling_rate,
  50            int samples_per_frame,
  51            const FrameEncodedCallback& callback)
  52       : cast_environment_(cast_environment),
  53         codec_(codec),
  54         num_channels_(num_channels),
  55         samples_per_frame_(samples_per_frame),
  56         callback_(callback),
  57         operational_status_(STATUS_UNINITIALIZED),
  58         frame_duration_(base::TimeDelta::FromMicroseconds(
  59             base::Time::kMicrosecondsPerSecond * samples_per_frame_ /
  60             sampling_rate)),
  61         buffer_fill_end_(0),
  62         frame_id_(0),
  63         frame_rtp_timestamp_(0),
  64         samples_dropped_from_buffer_(0) {
  65     // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
  66     const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
  67     if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
  68         frame_duration_ == base::TimeDelta() ||
  69         samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
  70       operational_status_ = STATUS_INVALID_CONFIGURATION;
  71     }
  72   }
  73
  74   OperationalStatus InitializationResult() const {
  75     return operational_status_;
  76   }
  77
  78   int samples_per_frame() const {
  79     return samples_per_frame_;
  80   }
  81
  82   base::TimeDelta frame_duration() const { return frame_duration_; }
  83
  84   void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
  85                    const base::TimeTicks& recorded_time) {
  86     DCHECK_EQ(operational_status_, STATUS_INITIALIZED);
  87     DCHECK(!recorded_time.is_null());
  88
  89     // Determine whether |recorded_time| is consistent with the amount of audio
  90     // data having been processed in the past.  Resolve the underrun problem by
  91     // dropping data from the internal buffer and skipping ahead the next
  92     // frame's RTP timestamp by the estimated number of frames missed.  On the
  93     // other hand, don't attempt to resolve overruns: A receiver should
  94     // gracefully deal with an excess of audio data.
  95     base::TimeDelta buffer_fill_duration =
  96         buffer_fill_end_ * frame_duration_ / samples_per_frame_;
  97     if (!frame_capture_time_.is_null()) {
  98       const base::TimeDelta amount_ahead_by =
  99           recorded_time - (frame_capture_time_ + buffer_fill_duration);
 100       const int64 num_frames_missed = amount_ahead_by / frame_duration_;
 101       if (num_frames_missed > kUnderrunSkipThreshold) {
 102         samples_dropped_from_buffer_ += buffer_fill_end_;
 103         buffer_fill_end_ = 0;
 104         buffer_fill_duration = base::TimeDelta();
 105         frame_rtp_timestamp_ +=
 106             static_cast<uint32>(num_frames_missed * samples_per_frame_);
 107         DVLOG(1) << "Skipping RTP timestamp ahead to account for "
 108                  << num_frames_missed * samples_per_frame_
 109                  << " samples' worth of underrun.";
 110       }
 111     }
 112     frame_capture_time_ = recorded_time - buffer_fill_duration;
 113
 114     // Encode all audio in |audio_bus| into zero or more frames.
 115     int src_pos = 0;
 116     while (src_pos < audio_bus->frames()) {
 117       // Note: This is used to compute the deadline utilization and so it uses
 118       // the real-world clock instead of the CastEnvironment clock, the latter
 119       // of which might be simulated.
 120       const base::TimeTicks start_time = base::TimeTicks::Now();
 121
 122       const int num_samples_to_xfer = std::min(
 123           samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
 124       DCHECK_EQ(audio_bus->channels(), num_channels_);
 125       TransferSamplesIntoBuffer(
 126           audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
 127       src_pos += num_samples_to_xfer;
 128       buffer_fill_end_ += num_samples_to_xfer;
 129
 130       if (buffer_fill_end_ < samples_per_frame_)
 131         break;
 132
 133       scoped_ptr<SenderEncodedFrame> audio_frame(
 134           new SenderEncodedFrame());
 135       audio_frame->dependency = EncodedFrame::KEY;
 136       audio_frame->frame_id = frame_id_;
 137       audio_frame->referenced_frame_id = frame_id_;
 138       audio_frame->rtp_timestamp = frame_rtp_timestamp_;
 139       audio_frame->reference_time = frame_capture_time_;
 140
 141       if (EncodeFromFilledBuffer(&audio_frame->data)) {
 142         // Compute deadline utilization as the real-world time elapsed divided
 143         // by the signal duration.
 144         audio_frame->deadline_utilization =
 145             (base::TimeTicks::Now() - start_time).InSecondsF() /
 146                 frame_duration_.InSecondsF();
 147
 148         cast_environment_->PostTask(
 149             CastEnvironment::MAIN,
 150             FROM_HERE,
 151             base::Bind(callback_,
 152                        base::Passed(&audio_frame),
 153                        samples_dropped_from_buffer_));
 154         samples_dropped_from_buffer_ = 0;
 155       }
 156
 157       // Reset the internal buffer, frame ID, and timestamps for the next frame.
 158       buffer_fill_end_ = 0;
 159       ++frame_id_;
 160       frame_rtp_timestamp_ += samples_per_frame_;
 161       frame_capture_time_ += frame_duration_;
 162     }
 163   }
 164
 165  protected:
 166   friend class base::RefCountedThreadSafe<ImplBase>;
 167   virtual ~ImplBase() {}
 168
 169   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 170                                          int source_offset,
 171                                          int buffer_fill_offset,
 172                                          int num_samples) = 0;
 173   virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
 174
 175   const scoped_refptr<CastEnvironment> cast_environment_;
 176   const Codec codec_;
 177   const int num_channels_;
 178   const int samples_per_frame_;
 179   const FrameEncodedCallback callback_;
 180
 181   // Subclass' ctor is expected to set this to STATUS_INITIALIZED.
 182   OperationalStatus operational_status_;
 183
 184   // The duration of one frame of encoded audio samples. Derived from
 185   // |samples_per_frame_| and the sampling rate.
 186   const base::TimeDelta frame_duration_;
 187
 188  private:
 189   // In the case where a call to EncodeAudio() cannot completely fill the
 190   // buffer, this points to the position at which to populate data in a later
 191   // call.
 192   int buffer_fill_end_;
 193
 194   // A counter used to label EncodedFrames.
 195   uint32 frame_id_;
 196
 197   // The RTP timestamp for the next frame of encoded audio.  This is defined as
 198   // the number of audio samples encoded so far, plus the estimated number of
 199   // samples that were missed due to data underruns.  A receiver uses this value
 200   // to detect gaps in the audio signal data being provided.  Per the spec, RTP
 201   // timestamp values are allowed to overflow and roll around past zero.
 202   uint32 frame_rtp_timestamp_;
 203
 204   // The local system time associated with the start of the next frame of
 205   // encoded audio.  This value is passed on to a receiver as a reference clock
 206   // timestamp for the purposes of synchronizing audio and video.  Its
 207   // progression is expected to drift relative to the elapsed time implied by
 208   // the RTP timestamps.
 209   base::TimeTicks frame_capture_time_;
 210
 211   // Set to non-zero to indicate the next output frame skipped over audio
 212   // samples in order to recover from an input underrun.
 213   int samples_dropped_from_buffer_;
 214
 215   DISALLOW_COPY_AND_ASSIGN(ImplBase);
 216 };
 217
 218 #if !defined(OS_IOS)
 219 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
 220  public:
 221   OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
 222            int num_channels,
 223            int sampling_rate,
 224            int bitrate,
 225            const FrameEncodedCallback& callback)
 226       : ImplBase(cast_environment,
 227                  CODEC_AUDIO_OPUS,
 228                  num_channels,
 229                  sampling_rate,
 230                  sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
 231                  callback),
 232         encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
 233         opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
 234         buffer_(new float[num_channels * samples_per_frame_]) {
 235     if (ImplBase::operational_status_ != STATUS_UNINITIALIZED ||
 236         sampling_rate % samples_per_frame_ != 0 ||
 237         !IsValidFrameDuration(frame_duration_)) {
 238       return;
 239     }
 240     if (opus_encoder_init(opus_encoder_,
 241                           sampling_rate,
 242                           num_channels,
 243                           OPUS_APPLICATION_AUDIO) != OPUS_OK) {
 244       ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
 245       return;
 246     }
 247     ImplBase::operational_status_ = STATUS_INITIALIZED;
 248
 249     if (bitrate <= 0) {
 250       // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
 251       // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
 252       // frame size.  The opus library authors may, of course, adjust this in
 253       // later versions.
 254       bitrate = OPUS_AUTO;
 255     }
 256     CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
 257              OPUS_OK);
 258   }
 259
 260  private:
 261   ~OpusImpl() final {}
 262
 263   void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 264                                  int source_offset,
 265                                  int buffer_fill_offset,
 266                                  int num_samples) final {
 267     // Opus requires channel-interleaved samples in a single array.
 268     for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 269       const float* src = audio_bus->channel(ch) + source_offset;
 270       const float* const src_end = src + num_samples;
 271       float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
 272       for (; src < src_end; ++src, dest += num_channels_)
 273         *dest = *src;
 274     }
 275   }
 276
 277   bool EncodeFromFilledBuffer(std::string* out) final {
 278     out->resize(kOpusMaxPayloadSize);
 279     const opus_int32 result =
 280         opus_encode_float(opus_encoder_,
 281                           buffer_.get(),
 282                           samples_per_frame_,
 283                           reinterpret_cast<uint8*>(string_as_array(out)),
 284                           kOpusMaxPayloadSize);
 285     if (result > 1) {
 286       out->resize(result);
 287       return true;
 288     } else if (result < 0) {
 289       LOG(ERROR) << "Error code from opus_encode_float(): " << result;
 290       return false;
 291     } else {
 292       // Do nothing: The documentation says that a return value of zero or
 293       // one byte means the packet does not need to be transmitted.
 294       return false;
 295     }
 296   }
 297
 298   static bool IsValidFrameDuration(base::TimeDelta duration) {
 299     // See https://tools.ietf.org/html/rfc6716#section-2.1.4
 300     return duration == base::TimeDelta::FromMicroseconds(2500) ||
 301            duration == base::TimeDelta::FromMilliseconds(5) ||
 302            duration == base::TimeDelta::FromMilliseconds(10) ||
 303            duration == base::TimeDelta::FromMilliseconds(20) ||
 304            duration == base::TimeDelta::FromMilliseconds(40) ||
 305            duration == base::TimeDelta::FromMilliseconds(60);
 306   }
 307
 308   const scoped_ptr<uint8[]> encoder_memory_;
 309   OpusEncoder* const opus_encoder_;
 310   const scoped_ptr<float[]> buffer_;
 311
 312   // This is the recommended value, according to documentation in
 313   // third_party/opus/src/include/opus.h, so that the Opus encoder does not
 314   // degrade the audio due to memory constraints.
 315   //
 316   // Note: Whereas other RTP implementations do not, the cast library is
 317   // perfectly capable of transporting larger than MTU-sized audio frames.
 318   static const int kOpusMaxPayloadSize = 4000;
 319
 320   DISALLOW_COPY_AND_ASSIGN(OpusImpl);
 321 };
 322 #endif
 323
 324 #if defined(OS_MACOSX)
 325 class AudioEncoder::AppleAacImpl : public AudioEncoder::ImplBase {
 326   // AAC-LC has two access unit sizes (960 and 1024). The Apple encoder only
 327   // supports the latter.
 328   static const int kAccessUnitSamples = 1024;
 329
 330   // Size of an ADTS header (w/o checksum). See
 331   // http://wiki.multimedia.cx/index.php?title=ADTS
 332   static const int kAdtsHeaderSize = 7;
 333
 334  public:
 335   AppleAacImpl(const scoped_refptr<CastEnvironment>& cast_environment,
 336                int num_channels,
 337                int sampling_rate,
 338                int bitrate,
 339                const FrameEncodedCallback& callback)
 340       : ImplBase(cast_environment,
 341                  CODEC_AUDIO_AAC,
 342                  num_channels,
 343                  sampling_rate,
 344                  kAccessUnitSamples,
 345                  callback),
 346         input_buffer_(AudioBus::Create(num_channels, kAccessUnitSamples)),
 347         input_bus_(AudioBus::CreateWrapper(num_channels)),
 348         max_access_unit_size_(0),
 349         output_buffer_(nullptr),
 350         converter_(nullptr),
 351         file_(nullptr),
 352         num_access_units_(0),
 353         can_resume_(true) {
 354     if (ImplBase::operational_status_ != STATUS_UNINITIALIZED) {
 355       return;
 356     }
 357     if (!Initialize(sampling_rate, bitrate)) {
 358       ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
 359       return;
 360     }
 361     ImplBase::operational_status_ = STATUS_INITIALIZED;
 362   }
 363
 364  private:
 365   ~AppleAacImpl() final { Teardown(); }
 366
 367   // Destroys the existing audio converter and file, if any.
 368   void Teardown() {
 369     if (converter_) {
 370       AudioConverterDispose(converter_);
 371       converter_ = nullptr;
 372     }
 373     if (file_) {
 374       AudioFileClose(file_);
 375       file_ = nullptr;
 376     }
 377   }
 378
 379   // Initializes the audio converter and file. Calls Teardown to destroy any
 380   // existing state. This is so that Initialize() may be called to setup another
 381   // converter after a non-resumable interruption.
 382   bool Initialize(int sampling_rate, int bitrate) {
 383     // Teardown previous audio converter and file.
 384     Teardown();
 385
 386     // Input data comes from AudioBus objects, which carry non-interleaved
 387     // packed native-endian float samples. Note that in Core Audio, a frame is
 388     // one sample across all channels at a given point in time. When describing
 389     // a non-interleaved samples format, the "per frame" fields mean "per
 390     // channel" or "per stream", with the exception of |mChannelsPerFrame|. For
 391     // uncompressed formats, one packet contains one frame.
 392     AudioStreamBasicDescription in_asbd;
 393     in_asbd.mSampleRate = sampling_rate;
 394     in_asbd.mFormatID = kAudioFormatLinearPCM;
 395     in_asbd.mFormatFlags =
 396         kAudioFormatFlagsNativeFloatPacked | kAudioFormatFlagIsNonInterleaved;
 397     in_asbd.mChannelsPerFrame = num_channels_;
 398     in_asbd.mBitsPerChannel = sizeof(float) * 8;
 399     in_asbd.mFramesPerPacket = 1;
 400     in_asbd.mBytesPerPacket = in_asbd.mBytesPerFrame = sizeof(float);
 401     in_asbd.mReserved = 0;
 402
 403     // Request AAC-LC encoding, with no downmixing or downsampling.
 404     AudioStreamBasicDescription out_asbd;
 405     memset(&out_asbd, 0, sizeof(AudioStreamBasicDescription));
 406     out_asbd.mSampleRate = sampling_rate;
 407     out_asbd.mFormatID = kAudioFormatMPEG4AAC;
 408     out_asbd.mChannelsPerFrame = num_channels_;
 409     UInt32 prop_size = sizeof(out_asbd);
 410     if (AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
 411                                0,
 412                                nullptr,
 413                                &prop_size,
 414                                &out_asbd) != noErr) {
 415       return false;
 416     }
 417
 418     if (AudioConverterNew(&in_asbd, &out_asbd, &converter_) != noErr) {
 419       return false;
 420     }
 421
 422     // The converter will fully specify the output format and update the
 423     // relevant fields of the structure, which we can now query.
 424     prop_size = sizeof(out_asbd);
 425     if (AudioConverterGetProperty(converter_,
 426                                   kAudioConverterCurrentOutputStreamDescription,
 427                                   &prop_size,
 428                                   &out_asbd) != noErr) {
 429       return false;
 430     }
 431
 432     // If bitrate is <= 0, allow the encoder to pick a suitable value.
 433     // Otherwise, set the bitrate (which can fail if the value is not suitable
 434     // or compatible with the output sampling rate or channels).
 435     if (bitrate > 0) {
 436       prop_size = sizeof(int);
 437       if (AudioConverterSetProperty(
 438               converter_, kAudioConverterEncodeBitRate, prop_size, &bitrate) !=
 439           noErr) {
 440         return false;
 441       }
 442     }
 443
 444 #if defined(OS_IOS)
 445     // See the comment next to |can_resume_| for details on resumption. Some
 446     // converters can return kAudioConverterErr_PropertyNotSupported, in which
 447     // case resumption is implicitly supported. This is the only location where
 448     // the implementation modifies |can_resume_|.
 449     uint32_t can_resume;
 450     prop_size = sizeof(can_resume);
 451     OSStatus oserr = AudioConverterGetProperty(
 452         converter_,
 453         kAudioConverterPropertyCanResumeFromInterruption,
 454         &prop_size,
 455         &can_resume);
 456     if (oserr == noErr) {
 457       const_cast<bool&>(can_resume_) = can_resume != 0;
 458     }
 459 #endif
 460
 461     // Figure out the maximum size of an access unit that the encoder can
 462     // produce. |mBytesPerPacket| will be 0 for variable size configurations,
 463     // in which case we must query the value.
 464     uint32_t max_access_unit_size = out_asbd.mBytesPerPacket;
 465     if (max_access_unit_size == 0) {
 466       prop_size = sizeof(max_access_unit_size);
 467       if (AudioConverterGetProperty(
 468               converter_,
 469               kAudioConverterPropertyMaximumOutputPacketSize,
 470               &prop_size,
 471               &max_access_unit_size) != noErr) {
 472         return false;
 473       }
 474     }
 475
 476     // This is the only location where the implementation modifies
 477     // |max_access_unit_size_|.
 478     const_cast<uint32_t&>(max_access_unit_size_) = max_access_unit_size;
 479
 480     // Allocate a buffer to store one access unit. This is the only location
 481     // where the implementation modifies |access_unit_buffer_|.
 482     const_cast<scoped_ptr<uint8[]>&>(access_unit_buffer_)
 483         .reset(new uint8[max_access_unit_size]);
 484
 485     // Initialize the converter ABL. Note that the buffer size has to be set
 486     // before every encode operation, since the field is modified to indicate
 487     // the size of the output data (on input it indicates the buffer capacity).
 488     converter_abl_.mNumberBuffers = 1;
 489     converter_abl_.mBuffers[0].mNumberChannels = num_channels_;
 490     converter_abl_.mBuffers[0].mData = access_unit_buffer_.get();
 491
 492     // The "magic cookie" is an encoder state vector required for decoding and
 493     // packetization. It is queried now from |converter_| then set on |file_|
 494     // after initialization.
 495     UInt32 cookie_size;
 496     if (AudioConverterGetPropertyInfo(converter_,
 497                                       kAudioConverterCompressionMagicCookie,
 498                                       &cookie_size,
 499                                       nullptr) != noErr) {
 500       return false;
 501     }
 502     scoped_ptr<uint8[]> cookie_data(new uint8[cookie_size]);
 503     if (AudioConverterGetProperty(converter_,
 504                                   kAudioConverterCompressionMagicCookie,
 505                                   &cookie_size,
 506                                   cookie_data.get()) != noErr) {
 507       return false;
 508     }
 509
 510     if (AudioFileInitializeWithCallbacks(this,
 511                                          nullptr,
 512                                          &FileWriteCallback,
 513                                          nullptr,
 514                                          nullptr,
 515                                          kAudioFileAAC_ADTSType,
 516                                          &out_asbd,
 517                                          0,
 518                                          &file_) != noErr) {
 519       return false;
 520     }
 521
 522     if (AudioFileSetProperty(file_,
 523                              kAudioFilePropertyMagicCookieData,
 524                              cookie_size,
 525                              cookie_data.get()) != noErr) {
 526       return false;
 527     }
 528
 529     // Initially the input bus points to the input buffer. See the comment on
 530     // |input_bus_| for more on this optimization.
 531     input_bus_->set_frames(kAccessUnitSamples);
 532     for (int ch = 0; ch < input_buffer_->channels(); ++ch) {
 533       input_bus_->SetChannelData(ch, input_buffer_->channel(ch));
 534     }
 535
 536     return true;
 537   }
 538
 539   void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 540                                  int source_offset,
 541                                  int buffer_fill_offset,
 542                                  int num_samples) final {
 543     DCHECK_EQ(audio_bus->channels(), input_buffer_->channels());
 544
 545     // See the comment on |input_bus_| for more on this optimization. Note that
 546     // we cannot elide the copy if the source offset would result in an
 547     // unaligned pointer.
 548     if (num_samples == kAccessUnitSamples &&
 549         source_offset * sizeof(float) % AudioBus::kChannelAlignment == 0) {
 550       DCHECK_EQ(buffer_fill_offset, 0);
 551       for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 552         auto samples = const_cast<float*>(audio_bus->channel(ch));
 553         input_bus_->SetChannelData(ch, samples + source_offset);
 554       }
 555       return;
 556     }
 557
 558     // Copy the samples into the input buffer.
 559     DCHECK_EQ(input_bus_->channel(0), input_buffer_->channel(0));
 560     audio_bus->CopyPartialFramesTo(
 561         source_offset, num_samples, buffer_fill_offset, input_buffer_.get());
 562   }
 563
 564   bool EncodeFromFilledBuffer(std::string* out) final {
 565     // Reset the buffer size field to the buffer capacity.
 566     converter_abl_.mBuffers[0].mDataByteSize = max_access_unit_size_;
 567
 568     // Encode the current input buffer. This is a sychronous call.
 569     OSStatus oserr;
 570     UInt32 io_num_packets = 1;
 571     AudioStreamPacketDescription packet_description;
 572     oserr = AudioConverterFillComplexBuffer(converter_,
 573                                             &ConverterFillDataCallback,
 574                                             this,
 575                                             &io_num_packets,
 576                                             &converter_abl_,
 577                                             &packet_description);
 578     if (oserr != noErr || io_num_packets == 0) {
 579       return false;
 580     }
 581
 582     // Reserve space in the output buffer to write the packet.
 583     out->reserve(packet_description.mDataByteSize + kAdtsHeaderSize);
 584
 585     // Set the current output buffer and emit an ADTS-wrapped AAC access unit.
 586     // This is a synchronous call. After it returns, reset the output buffer.
 587     output_buffer_ = out;
 588     oserr = AudioFileWritePackets(file_,
 589                                   false,
 590                                   converter_abl_.mBuffers[0].mDataByteSize,
 591                                   &packet_description,
 592                                   num_access_units_,
 593                                   &io_num_packets,
 594                                   converter_abl_.mBuffers[0].mData);
 595     output_buffer_ = nullptr;
 596     if (oserr != noErr || io_num_packets == 0) {
 597       return false;
 598     }
 599     num_access_units_ += io_num_packets;
 600     return true;
 601   }
 602
 603   // The |AudioConverterFillComplexBuffer| input callback function. Configures
 604   // the provided |AudioBufferList| to alias |input_bus_|. The implementation
 605   // can only supply |kAccessUnitSamples| samples as a result of not copying
 606   // samples or tracking read and write positions. Note that this function is
 607   // called synchronously by |AudioConverterFillComplexBuffer|.
 608   static OSStatus ConverterFillDataCallback(
 609       AudioConverterRef in_converter,
 610       UInt32* io_num_packets,
 611       AudioBufferList* io_data,
 612       AudioStreamPacketDescription** out_packet_desc,
 613       void* in_encoder) {
 614     DCHECK(in_encoder);
 615     auto encoder = reinterpret_cast<AppleAacImpl*>(in_encoder);
 616     auto input_buffer = encoder->input_buffer_.get();
 617     auto input_bus = encoder->input_bus_.get();
 618
 619     DCHECK_EQ(static_cast<int>(*io_num_packets), kAccessUnitSamples);
 620     DCHECK_EQ(io_data->mNumberBuffers,
 621               static_cast<unsigned>(input_bus->channels()));
 622     for (int i_buf = 0, end = io_data->mNumberBuffers; i_buf < end; ++i_buf) {
 623       io_data->mBuffers[i_buf].mNumberChannels = 1;
 624       io_data->mBuffers[i_buf].mDataByteSize = sizeof(float) * *io_num_packets;
 625       io_data->mBuffers[i_buf].mData = input_bus->channel(i_buf);
 626
 627       // Reset the input bus back to the input buffer. See the comment on
 628       // |input_bus_| for more on this optimization.
 629       input_bus->SetChannelData(i_buf, input_buffer->channel(i_buf));
 630     }
 631     return noErr;
 632   }
 633
 634   // The AudioFile write callback function. Appends the data to the encoder's
 635   // current |output_buffer_|.
 636   static OSStatus FileWriteCallback(void* in_encoder,
 637                                     SInt64 in_position,
 638                                     UInt32 in_size,
 639                                     const void* in_buffer,
 640                                     UInt32* out_size) {
 641     DCHECK(in_encoder);
 642     DCHECK(in_buffer);
 643     auto encoder = reinterpret_cast<const AppleAacImpl*>(in_encoder);
 644     auto buffer = reinterpret_cast<const std::string::value_type*>(in_buffer);
 645
 646     std::string* const output_buffer = encoder->output_buffer_;
 647     DCHECK(output_buffer);
 648
 649     output_buffer->append(buffer, in_size);
 650     *out_size = in_size;
 651     return noErr;
 652   }
 653
 654   // Buffer that holds one AAC access unit worth of samples. The input callback
 655   // function provides samples from this buffer via |input_bus_| to the encoder.
 656   const scoped_ptr<AudioBus> input_buffer_;
 657
 658   // Wrapper AudioBus used by the input callback function. Normally it wraps
 659   // |input_buffer_|. However, as an optimization when the client submits a
 660   // buffer containing exactly one access unit worth of samples, the bus is
 661   // redirected to the client buffer temporarily. We know that the base
 662   // implementation will call us right after to encode the buffer and thus we
 663   // can eliminate the copy into |input_buffer_|.
 664   const scoped_ptr<AudioBus> input_bus_;
 665
 666   // A buffer that holds one AAC access unit. Initialized in |Initialize| once
 667   // the maximum access unit size is known.
 668   const scoped_ptr<uint8[]> access_unit_buffer_;
 669
 670   // The maximum size of an access unit that the encoder can emit.
 671   const uint32_t max_access_unit_size_;
 672
 673   // A temporary pointer to the current output buffer. Only non-null when
 674   // writing an access unit. Accessed by the AudioFile write callback function.
 675   std::string* output_buffer_;
 676
 677   // The |AudioConverter| is responsible for AAC encoding. This is a Core Audio
 678   // object, not to be confused with |media::AudioConverter|.
 679   AudioConverterRef converter_;
 680
 681   // The |AudioFile| is responsible for ADTS packetization.
 682   AudioFileID file_;
 683
 684   // An |AudioBufferList| passed to the converter to store encoded samples.
 685   AudioBufferList converter_abl_;
 686
 687   // The number of access units emitted so far by the encoder.
 688   uint64_t num_access_units_;
 689
 690   // On iOS, audio codecs can be interrupted by other services (such as an
 691   // audio alert or phone call). Depending on the underlying hardware and
 692   // configuration, the codec may have to be thrown away and re-initialized
 693   // after such an interruption. This flag tracks if we can resume or not from
 694   // such an interruption. It is initialized to true, which is the only possible
 695   // value on OS X and on most modern iOS hardware.
 696   // TODO(jfroy): Implement encoder re-initialization after interruption.
 697   //              https://crbug.com/424787
 698   const bool can_resume_;
 699
 700   DISALLOW_COPY_AND_ASSIGN(AppleAacImpl);
 701 };
 702 #endif  // defined(OS_MACOSX)
 703
 704 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
 705  public:
 706   Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
 707             int num_channels,
 708             int sampling_rate,
 709             const FrameEncodedCallback& callback)
 710       : ImplBase(cast_environment,
 711                  CODEC_AUDIO_PCM16,
 712                  num_channels,
 713                  sampling_rate,
 714                  sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
 715                  callback),
 716         buffer_(new int16[num_channels * samples_per_frame_]) {
 717     if (ImplBase::operational_status_ != STATUS_UNINITIALIZED)
 718       return;
 719     operational_status_ = STATUS_INITIALIZED;
 720   }
 721
 722  private:
 723   ~Pcm16Impl() final {}
 724
 725   void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 726                                  int source_offset,
 727                                  int buffer_fill_offset,
 728                                  int num_samples) final {
 729     audio_bus->ToInterleavedPartial(
 730         source_offset,
 731         num_samples,
 732         sizeof(int16),
 733         buffer_.get() + buffer_fill_offset * num_channels_);
 734   }
 735
 736   bool EncodeFromFilledBuffer(std::string* out) final {
 737     // Output 16-bit PCM integers in big-endian byte order.
 738     out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
 739     const int16* src = buffer_.get();
 740     const int16* const src_end = src + num_channels_ * samples_per_frame_;
 741     uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
 742     for (; src < src_end; ++src, ++dest)
 743       *dest = base::HostToNet16(*src);
 744     return true;
 745   }
 746
 747  private:
 748   const scoped_ptr<int16[]> buffer_;
 749
 750   DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
 751 };
 752
 753 AudioEncoder::AudioEncoder(
 754     const scoped_refptr<CastEnvironment>& cast_environment,
 755     int num_channels,
 756     int sampling_rate,
 757     int bitrate,
 758     Codec codec,
 759     const FrameEncodedCallback& frame_encoded_callback)
 760     : cast_environment_(cast_environment) {
 761   // Note: It doesn't matter which thread constructs AudioEncoder, just so long
 762   // as all calls to InsertAudio() are by the same thread.
 763   insert_thread_checker_.DetachFromThread();
 764   switch (codec) {
 765 #if !defined(OS_IOS)
 766     case CODEC_AUDIO_OPUS:
 767       impl_ = new OpusImpl(cast_environment,
 768                            num_channels,
 769                            sampling_rate,
 770                            bitrate,
 771                            frame_encoded_callback);
 772       break;
 773 #endif
 774 #if defined(OS_MACOSX)
 775     case CODEC_AUDIO_AAC:
 776       impl_ = new AppleAacImpl(cast_environment,
 777                                num_channels,
 778                                sampling_rate,
 779                                bitrate,
 780                                frame_encoded_callback);
 781       break;
 782 #endif  // defined(OS_MACOSX)
 783     case CODEC_AUDIO_PCM16:
 784       impl_ = new Pcm16Impl(cast_environment,
 785                             num_channels,
 786                             sampling_rate,
 787                             frame_encoded_callback);
 788       break;
 789     default:
 790       NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
 791       break;
 792   }
 793 }
 794
 795 AudioEncoder::~AudioEncoder() {}
 796
 797 OperationalStatus AudioEncoder::InitializationResult() const {
 798   DCHECK(insert_thread_checker_.CalledOnValidThread());
 799   if (impl_.get()) {
 800     return impl_->InitializationResult();
 801   }
 802   return STATUS_UNSUPPORTED_CODEC;
 803 }
 804
 805 int AudioEncoder::GetSamplesPerFrame() const {
 806   DCHECK(insert_thread_checker_.CalledOnValidThread());
 807   if (InitializationResult() != STATUS_INITIALIZED) {
 808     NOTREACHED();
 809     return std::numeric_limits<int>::max();
 810   }
 811   return impl_->samples_per_frame();
 812 }
 813
 814 base::TimeDelta AudioEncoder::GetFrameDuration() const {
 815   DCHECK(insert_thread_checker_.CalledOnValidThread());
 816   if (InitializationResult() != STATUS_INITIALIZED) {
 817     NOTREACHED();
 818     return base::TimeDelta();
 819   }
 820   return impl_->frame_duration();
 821 }
 822
 823 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
 824                                const base::TimeTicks& recorded_time) {
 825   DCHECK(insert_thread_checker_.CalledOnValidThread());
 826   DCHECK(audio_bus.get());
 827   if (InitializationResult() != STATUS_INITIALIZED) {
 828     NOTREACHED();
 829     return;
 830   }
 831   cast_environment_->PostTask(CastEnvironment::AUDIO,
 832                               FROM_HERE,
 833                               base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
 834                                          impl_,
 835                                          base::Passed(&audio_bus),
 836                                          recorded_time));
 837 }
 838
 839 }  // namespace cast
 840 }  // namespace media