media/cast/sender/audio_encoder.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/cast/sender/audio_encoder.h"
   6
   7 #include <algorithm>
   8 #include <limits>
   9 #include <string>
  10
  11 #include "base/bind.h"
  12 #include "base/bind_helpers.h"
  13 #include "base/location.h"
  14 #include "base/stl_util.h"
  15 #include "base/sys_byteorder.h"
  16 #include "base/time/time.h"
  17 #include "media/base/audio_bus.h"
  18 #include "media/cast/cast_defines.h"
  19 #include "media/cast/cast_environment.h"
  20
  21 #if !defined(OS_IOS)
  22 #include "third_party/opus/src/include/opus.h"
  23 #endif
  24
  25 #if defined(OS_MACOSX)
  26 #include <AudioToolbox/AudioToolbox.h>
  27 #endif
  28
  29 namespace media {
  30 namespace cast {
  31
  32 namespace {
  33
  34 const int kUnderrunSkipThreshold = 3;
  35 const int kDefaultFramesPerSecond = 100;
  36
  37 }  // namespace
  38
  39 // Base class that handles the common problem of feeding one or more AudioBus'
  40 // data into a buffer and then, once the buffer is full, encoding the signal and
  41 // emitting an EncodedFrame via the FrameEncodedCallback.
  42 //
  43 // Subclasses complete the implementation by handling the actual encoding
  44 // details.
  45 class AudioEncoder::ImplBase
  46     : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
  47  public:
  48   ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
  49            Codec codec,
  50            int num_channels,
  51            int sampling_rate,
  52            int samples_per_frame,
  53            const FrameEncodedCallback& callback)
  54       : cast_environment_(cast_environment),
  55         codec_(codec),
  56         num_channels_(num_channels),
  57         samples_per_frame_(samples_per_frame),
  58         callback_(callback),
  59         operational_status_(STATUS_UNINITIALIZED),
  60         frame_duration_(base::TimeDelta::FromMicroseconds(
  61             base::Time::kMicrosecondsPerSecond * samples_per_frame_ /
  62             sampling_rate)),
  63         buffer_fill_end_(0),
  64         frame_id_(0),
  65         frame_rtp_timestamp_(0),
  66         samples_dropped_from_buffer_(0) {
  67     // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
  68     const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
  69     if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
  70         frame_duration_ == base::TimeDelta() ||
  71         samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
  72       operational_status_ = STATUS_INVALID_CONFIGURATION;
  73     }
  74   }
  75
  76   OperationalStatus InitializationResult() const {
  77     return operational_status_;
  78   }
  79
  80   int samples_per_frame() const {
  81     return samples_per_frame_;
  82   }
  83
  84   base::TimeDelta frame_duration() const { return frame_duration_; }
  85
  86   void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
  87                    const base::TimeTicks& recorded_time) {
  88     DCHECK_EQ(operational_status_, STATUS_INITIALIZED);
  89     DCHECK(!recorded_time.is_null());
  90
  91     // Determine whether |recorded_time| is consistent with the amount of audio
  92     // data having been processed in the past.  Resolve the underrun problem by
  93     // dropping data from the internal buffer and skipping ahead the next
  94     // frame's RTP timestamp by the estimated number of frames missed.  On the
  95     // other hand, don't attempt to resolve overruns: A receiver should
  96     // gracefully deal with an excess of audio data.
  97     base::TimeDelta buffer_fill_duration =
  98         buffer_fill_end_ * frame_duration_ / samples_per_frame_;
  99     if (!frame_capture_time_.is_null()) {
 100       const base::TimeDelta amount_ahead_by =
 101           recorded_time - (frame_capture_time_ + buffer_fill_duration);
 102       const int64 num_frames_missed = amount_ahead_by / frame_duration_;
 103       if (num_frames_missed > kUnderrunSkipThreshold) {
 104         samples_dropped_from_buffer_ += buffer_fill_end_;
 105         buffer_fill_end_ = 0;
 106         buffer_fill_duration = base::TimeDelta();
 107         frame_rtp_timestamp_ +=
 108             static_cast<uint32>(num_frames_missed * samples_per_frame_);
 109         DVLOG(1) << "Skipping RTP timestamp ahead to account for "
 110                  << num_frames_missed * samples_per_frame_
 111                  << " samples' worth of underrun.";
 112       }
 113     }
 114     frame_capture_time_ = recorded_time - buffer_fill_duration;
 115
 116     // Encode all audio in |audio_bus| into zero or more frames.
 117     int src_pos = 0;
 118     while (src_pos < audio_bus->frames()) {
 119       const int num_samples_to_xfer = std::min(
 120           samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
 121       DCHECK_EQ(audio_bus->channels(), num_channels_);
 122       TransferSamplesIntoBuffer(
 123           audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
 124       src_pos += num_samples_to_xfer;
 125       buffer_fill_end_ += num_samples_to_xfer;
 126
 127       if (buffer_fill_end_ < samples_per_frame_)
 128         break;
 129
 130       scoped_ptr<EncodedFrame> audio_frame(
 131           new EncodedFrame());
 132       audio_frame->dependency = EncodedFrame::KEY;
 133       audio_frame->frame_id = frame_id_;
 134       audio_frame->referenced_frame_id = frame_id_;
 135       audio_frame->rtp_timestamp = frame_rtp_timestamp_;
 136       audio_frame->reference_time = frame_capture_time_;
 137
 138       if (EncodeFromFilledBuffer(&audio_frame->data)) {
 139         cast_environment_->PostTask(
 140             CastEnvironment::MAIN,
 141             FROM_HERE,
 142             base::Bind(callback_,
 143                        base::Passed(&audio_frame),
 144                        samples_dropped_from_buffer_));
 145         samples_dropped_from_buffer_ = 0;
 146       }
 147
 148       // Reset the internal buffer, frame ID, and timestamps for the next frame.
 149       buffer_fill_end_ = 0;
 150       ++frame_id_;
 151       frame_rtp_timestamp_ += samples_per_frame_;
 152       frame_capture_time_ += frame_duration_;
 153     }
 154   }
 155
 156  protected:
 157   friend class base::RefCountedThreadSafe<ImplBase>;
 158   virtual ~ImplBase() {}
 159
 160   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 161                                          int source_offset,
 162                                          int buffer_fill_offset,
 163                                          int num_samples) = 0;
 164   virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
 165
 166   const scoped_refptr<CastEnvironment> cast_environment_;
 167   const Codec codec_;
 168   const int num_channels_;
 169   const int samples_per_frame_;
 170   const FrameEncodedCallback callback_;
 171
 172   // Subclass' ctor is expected to set this to STATUS_INITIALIZED.
 173   OperationalStatus operational_status_;
 174
 175   // The duration of one frame of encoded audio samples. Derived from
 176   // |samples_per_frame_| and the sampling rate.
 177   const base::TimeDelta frame_duration_;
 178
 179  private:
 180   // In the case where a call to EncodeAudio() cannot completely fill the
 181   // buffer, this points to the position at which to populate data in a later
 182   // call.
 183   int buffer_fill_end_;
 184
 185   // A counter used to label EncodedFrames.
 186   uint32 frame_id_;
 187
 188   // The RTP timestamp for the next frame of encoded audio.  This is defined as
 189   // the number of audio samples encoded so far, plus the estimated number of
 190   // samples that were missed due to data underruns.  A receiver uses this value
 191   // to detect gaps in the audio signal data being provided.  Per the spec, RTP
 192   // timestamp values are allowed to overflow and roll around past zero.
 193   uint32 frame_rtp_timestamp_;
 194
 195   // The local system time associated with the start of the next frame of
 196   // encoded audio.  This value is passed on to a receiver as a reference clock
 197   // timestamp for the purposes of synchronizing audio and video.  Its
 198   // progression is expected to drift relative to the elapsed time implied by
 199   // the RTP timestamps.
 200   base::TimeTicks frame_capture_time_;
 201
 202   // Set to non-zero to indicate the next output frame skipped over audio
 203   // samples in order to recover from an input underrun.
 204   int samples_dropped_from_buffer_;
 205
 206   DISALLOW_COPY_AND_ASSIGN(ImplBase);
 207 };
 208
 209 #if !defined(OS_IOS)
 210 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
 211  public:
 212   OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
 213            int num_channels,
 214            int sampling_rate,
 215            int bitrate,
 216            const FrameEncodedCallback& callback)
 217       : ImplBase(cast_environment,
 218                  CODEC_AUDIO_OPUS,
 219                  num_channels,
 220                  sampling_rate,
 221                  sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
 222                  callback),
 223         encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
 224         opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
 225         buffer_(new float[num_channels * samples_per_frame_]) {
 226     if (ImplBase::operational_status_ != STATUS_UNINITIALIZED ||
 227         sampling_rate % samples_per_frame_ != 0 ||
 228         !IsValidFrameDuration(frame_duration_)) {
 229       return;
 230     }
 231     if (opus_encoder_init(opus_encoder_,
 232                           sampling_rate,
 233                           num_channels,
 234                           OPUS_APPLICATION_AUDIO) != OPUS_OK) {
 235       ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
 236       return;
 237     }
 238     ImplBase::operational_status_ = STATUS_INITIALIZED;
 239
 240     if (bitrate <= 0) {
 241       // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
 242       // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
 243       // frame size.  The opus library authors may, of course, adjust this in
 244       // later versions.
 245       bitrate = OPUS_AUTO;
 246     }
 247     CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
 248              OPUS_OK);
 249   }
 250
 251  private:
 252   ~OpusImpl() override {}
 253
 254   void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 255                                  int source_offset,
 256                                  int buffer_fill_offset,
 257                                  int num_samples) override {
 258     // Opus requires channel-interleaved samples in a single array.
 259     for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 260       const float* src = audio_bus->channel(ch) + source_offset;
 261       const float* const src_end = src + num_samples;
 262       float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
 263       for (; src < src_end; ++src, dest += num_channels_)
 264         *dest = *src;
 265     }
 266   }
 267
 268   bool EncodeFromFilledBuffer(std::string* out) override {
 269     out->resize(kOpusMaxPayloadSize);
 270     const opus_int32 result =
 271         opus_encode_float(opus_encoder_,
 272                           buffer_.get(),
 273                           samples_per_frame_,
 274                           reinterpret_cast<uint8*>(string_as_array(out)),
 275                           kOpusMaxPayloadSize);
 276     if (result > 1) {
 277       out->resize(result);
 278       return true;
 279     } else if (result < 0) {
 280       LOG(ERROR) << "Error code from opus_encode_float(): " << result;
 281       return false;
 282     } else {
 283       // Do nothing: The documentation says that a return value of zero or
 284       // one byte means the packet does not need to be transmitted.
 285       return false;
 286     }
 287   }
 288
 289   static bool IsValidFrameDuration(base::TimeDelta duration) {
 290     // See https://tools.ietf.org/html/rfc6716#section-2.1.4
 291     return duration == base::TimeDelta::FromMicroseconds(2500) ||
 292            duration == base::TimeDelta::FromMilliseconds(5) ||
 293            duration == base::TimeDelta::FromMilliseconds(10) ||
 294            duration == base::TimeDelta::FromMilliseconds(20) ||
 295            duration == base::TimeDelta::FromMilliseconds(40) ||
 296            duration == base::TimeDelta::FromMilliseconds(60);
 297   }
 298
 299   const scoped_ptr<uint8[]> encoder_memory_;
 300   OpusEncoder* const opus_encoder_;
 301   const scoped_ptr<float[]> buffer_;
 302
 303   // This is the recommended value, according to documentation in
 304   // third_party/opus/src/include/opus.h, so that the Opus encoder does not
 305   // degrade the audio due to memory constraints.
 306   //
 307   // Note: Whereas other RTP implementations do not, the cast library is
 308   // perfectly capable of transporting larger than MTU-sized audio frames.
 309   static const int kOpusMaxPayloadSize = 4000;
 310
 311   DISALLOW_COPY_AND_ASSIGN(OpusImpl);
 312 };
 313 #endif
 314
 315 #if defined(OS_MACOSX)
 316 class AudioEncoder::AppleAacImpl : public AudioEncoder::ImplBase {
 317   // AAC-LC has two access unit sizes (960 and 1024). The Apple encoder only
 318   // supports the latter.
 319   static const int kAccessUnitSamples = 1024;
 320
 321   // Size of an ADTS header (w/o checksum). See
 322   // http://wiki.multimedia.cx/index.php?title=ADTS
 323   static const int kAdtsHeaderSize = 7;
 324
 325  public:
 326   AppleAacImpl(const scoped_refptr<CastEnvironment>& cast_environment,
 327                int num_channels,
 328                int sampling_rate,
 329                int bitrate,
 330                const FrameEncodedCallback& callback)
 331       : ImplBase(cast_environment,
 332                  CODEC_AUDIO_AAC,
 333                  num_channels,
 334                  sampling_rate,
 335                  kAccessUnitSamples,
 336                  callback),
 337         input_buffer_(AudioBus::Create(num_channels, kAccessUnitSamples)),
 338         input_bus_(AudioBus::CreateWrapper(num_channels)),
 339         max_access_unit_size_(0),
 340         output_buffer_(nullptr),
 341         converter_(nullptr),
 342         file_(nullptr),
 343         num_access_units_(0),
 344         can_resume_(true) {
 345     if (ImplBase::operational_status_ != STATUS_UNINITIALIZED) {
 346       return;
 347     }
 348     if (!Initialize(sampling_rate, bitrate)) {
 349       ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
 350       return;
 351     }
 352     ImplBase::operational_status_ = STATUS_INITIALIZED;
 353   }
 354
 355  private:
 356   ~AppleAacImpl() override { Teardown(); }
 357
 358   // Destroys the existing audio converter and file, if any.
 359   void Teardown() {
 360     if (converter_) {
 361       AudioConverterDispose(converter_);
 362       converter_ = nullptr;
 363     }
 364     if (file_) {
 365       AudioFileClose(file_);
 366       file_ = nullptr;
 367     }
 368   }
 369
 370   // Initializes the audio converter and file. Calls Teardown to destroy any
 371   // existing state. This is so that Initialize() may be called to setup another
 372   // converter after a non-resumable interruption.
 373   bool Initialize(int sampling_rate, int bitrate) {
 374     // Teardown previous audio converter and file.
 375     Teardown();
 376
 377     // Input data comes from AudioBus objects, which carry non-interleaved
 378     // packed native-endian float samples. Note that in Core Audio, a frame is
 379     // one sample across all channels at a given point in time. When describing
 380     // a non-interleaved samples format, the "per frame" fields mean "per
 381     // channel" or "per stream", with the exception of |mChannelsPerFrame|. For
 382     // uncompressed formats, one packet contains one frame.
 383     AudioStreamBasicDescription in_asbd;
 384     in_asbd.mSampleRate = sampling_rate;
 385     in_asbd.mFormatID = kAudioFormatLinearPCM;
 386     in_asbd.mFormatFlags =
 387         kAudioFormatFlagsNativeFloatPacked | kAudioFormatFlagIsNonInterleaved;
 388     in_asbd.mChannelsPerFrame = num_channels_;
 389     in_asbd.mBitsPerChannel = sizeof(float) * 8;
 390     in_asbd.mFramesPerPacket = 1;
 391     in_asbd.mBytesPerPacket = in_asbd.mBytesPerFrame = sizeof(float);
 392     in_asbd.mReserved = 0;
 393
 394     // Request AAC-LC encoding, with no downmixing or downsampling.
 395     AudioStreamBasicDescription out_asbd;
 396     memset(&out_asbd, 0, sizeof(AudioStreamBasicDescription));
 397     out_asbd.mSampleRate = sampling_rate;
 398     out_asbd.mFormatID = kAudioFormatMPEG4AAC;
 399     out_asbd.mChannelsPerFrame = num_channels_;
 400     UInt32 prop_size = sizeof(out_asbd);
 401     if (AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
 402                                0,
 403                                nullptr,
 404                                &prop_size,
 405                                &out_asbd) != noErr) {
 406       return false;
 407     }
 408
 409     if (AudioConverterNew(&in_asbd, &out_asbd, &converter_) != noErr) {
 410       return false;
 411     }
 412
 413     // The converter will fully specify the output format and update the
 414     // relevant fields of the structure, which we can now query.
 415     prop_size = sizeof(out_asbd);
 416     if (AudioConverterGetProperty(converter_,
 417                                   kAudioConverterCurrentOutputStreamDescription,
 418                                   &prop_size,
 419                                   &out_asbd) != noErr) {
 420       return false;
 421     }
 422
 423     // If bitrate is <= 0, allow the encoder to pick a suitable value.
 424     // Otherwise, set the bitrate (which can fail if the value is not suitable
 425     // or compatible with the output sampling rate or channels).
 426     if (bitrate > 0) {
 427       prop_size = sizeof(int);
 428       if (AudioConverterSetProperty(
 429               converter_, kAudioConverterEncodeBitRate, prop_size, &bitrate) !=
 430           noErr) {
 431         return false;
 432       }
 433     }
 434
 435 #if defined(OS_IOS)
 436     // See the comment next to |can_resume_| for details on resumption. Some
 437     // converters can return kAudioConverterErr_PropertyNotSupported, in which
 438     // case resumption is implicitly supported. This is the only location where
 439     // the implementation modifies |can_resume_|.
 440     uint32_t can_resume;
 441     prop_size = sizeof(can_resume);
 442     OSStatus oserr = AudioConverterGetProperty(
 443         converter_,
 444         kAudioConverterPropertyCanResumeFromInterruption,
 445         &prop_size,
 446         &can_resume);
 447     if (oserr == noErr) {
 448       const_cast<bool&>(can_resume_) = can_resume != 0;
 449     }
 450 #endif
 451
 452     // Figure out the maximum size of an access unit that the encoder can
 453     // produce. |mBytesPerPacket| will be 0 for variable size configurations,
 454     // in which case we must query the value.
 455     uint32_t max_access_unit_size = out_asbd.mBytesPerPacket;
 456     if (max_access_unit_size == 0) {
 457       prop_size = sizeof(max_access_unit_size);
 458       if (AudioConverterGetProperty(
 459               converter_,
 460               kAudioConverterPropertyMaximumOutputPacketSize,
 461               &prop_size,
 462               &max_access_unit_size) != noErr) {
 463         return false;
 464       }
 465     }
 466
 467     // This is the only location where the implementation modifies
 468     // |max_access_unit_size_|.
 469     const_cast<uint32_t&>(max_access_unit_size_) = max_access_unit_size;
 470
 471     // Allocate a buffer to store one access unit. This is the only location
 472     // where the implementation modifies |access_unit_buffer_|.
 473     const_cast<scoped_ptr<uint8[]>&>(access_unit_buffer_)
 474         .reset(new uint8[max_access_unit_size]);
 475
 476     // Initialize the converter ABL. Note that the buffer size has to be set
 477     // before every encode operation, since the field is modified to indicate
 478     // the size of the output data (on input it indicates the buffer capacity).
 479     converter_abl_.mNumberBuffers = 1;
 480     converter_abl_.mBuffers[0].mNumberChannels = num_channels_;
 481     converter_abl_.mBuffers[0].mData = access_unit_buffer_.get();
 482
 483     // The "magic cookie" is an encoder state vector required for decoding and
 484     // packetization. It is queried now from |converter_| then set on |file_|
 485     // after initialization.
 486     UInt32 cookie_size;
 487     if (AudioConverterGetPropertyInfo(converter_,
 488                                       kAudioConverterCompressionMagicCookie,
 489                                       &cookie_size,
 490                                       nullptr) != noErr) {
 491       return false;
 492     }
 493     scoped_ptr<uint8[]> cookie_data(new uint8[cookie_size]);
 494     if (AudioConverterGetProperty(converter_,
 495                                   kAudioConverterCompressionMagicCookie,
 496                                   &cookie_size,
 497                                   cookie_data.get()) != noErr) {
 498       return false;
 499     }
 500
 501     if (AudioFileInitializeWithCallbacks(this,
 502                                          nullptr,
 503                                          &FileWriteCallback,
 504                                          nullptr,
 505                                          nullptr,
 506                                          kAudioFileAAC_ADTSType,
 507                                          &out_asbd,
 508                                          0,
 509                                          &file_) != noErr) {
 510       return false;
 511     }
 512
 513     if (AudioFileSetProperty(file_,
 514                              kAudioFilePropertyMagicCookieData,
 515                              cookie_size,
 516                              cookie_data.get()) != noErr) {
 517       return false;
 518     }
 519
 520     // Initially the input bus points to the input buffer. See the comment on
 521     // |input_bus_| for more on this optimization.
 522     input_bus_->set_frames(kAccessUnitSamples);
 523     for (int ch = 0; ch < input_buffer_->channels(); ++ch) {
 524       input_bus_->SetChannelData(ch, input_buffer_->channel(ch));
 525     }
 526
 527     return true;
 528   }
 529
 530   void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 531                                  int source_offset,
 532                                  int buffer_fill_offset,
 533                                  int num_samples) override {
 534     DCHECK_EQ(audio_bus->channels(), input_buffer_->channels());
 535
 536     // See the comment on |input_bus_| for more on this optimization. Note that
 537     // we cannot elide the copy if the source offset would result in an
 538     // unaligned pointer.
 539     if (num_samples == kAccessUnitSamples &&
 540         source_offset * sizeof(float) % AudioBus::kChannelAlignment == 0) {
 541       DCHECK_EQ(buffer_fill_offset, 0);
 542       for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 543         auto samples = const_cast<float*>(audio_bus->channel(ch));
 544         input_bus_->SetChannelData(ch, samples + source_offset);
 545       }
 546       return;
 547     }
 548
 549     // Copy the samples into the input buffer.
 550     DCHECK_EQ(input_bus_->channel(0), input_buffer_->channel(0));
 551     audio_bus->CopyPartialFramesTo(
 552         source_offset, num_samples, buffer_fill_offset, input_buffer_.get());
 553   }
 554
 555   bool EncodeFromFilledBuffer(std::string* out) override {
 556     // Reset the buffer size field to the buffer capacity.
 557     converter_abl_.mBuffers[0].mDataByteSize = max_access_unit_size_;
 558
 559     // Encode the current input buffer. This is a sychronous call.
 560     OSStatus oserr;
 561     UInt32 io_num_packets = 1;
 562     AudioStreamPacketDescription packet_description;
 563     oserr = AudioConverterFillComplexBuffer(converter_,
 564                                             &ConverterFillDataCallback,
 565                                             this,
 566                                             &io_num_packets,
 567                                             &converter_abl_,
 568                                             &packet_description);
 569     if (oserr != noErr || io_num_packets == 0) {
 570       return false;
 571     }
 572
 573     // Reserve space in the output buffer to write the packet.
 574     out->reserve(packet_description.mDataByteSize + kAdtsHeaderSize);
 575
 576     // Set the current output buffer and emit an ADTS-wrapped AAC access unit.
 577     // This is a synchronous call. After it returns, reset the output buffer.
 578     output_buffer_ = out;
 579     oserr = AudioFileWritePackets(file_,
 580                                   false,
 581                                   converter_abl_.mBuffers[0].mDataByteSize,
 582                                   &packet_description,
 583                                   num_access_units_,
 584                                   &io_num_packets,
 585                                   converter_abl_.mBuffers[0].mData);
 586     output_buffer_ = nullptr;
 587     if (oserr != noErr || io_num_packets == 0) {
 588       return false;
 589     }
 590     num_access_units_ += io_num_packets;
 591     return true;
 592   }
 593
 594   // The |AudioConverterFillComplexBuffer| input callback function. Configures
 595   // the provided |AudioBufferList| to alias |input_bus_|. The implementation
 596   // can only supply |kAccessUnitSamples| samples as a result of not copying
 597   // samples or tracking read and write positions. Note that this function is
 598   // called synchronously by |AudioConverterFillComplexBuffer|.
 599   static OSStatus ConverterFillDataCallback(
 600       AudioConverterRef in_converter,
 601       UInt32* io_num_packets,
 602       AudioBufferList* io_data,
 603       AudioStreamPacketDescription** out_packet_desc,
 604       void* in_encoder) {
 605     DCHECK(in_encoder);
 606     auto encoder = reinterpret_cast<AppleAacImpl*>(in_encoder);
 607     auto input_buffer = encoder->input_buffer_.get();
 608     auto input_bus = encoder->input_bus_.get();
 609
 610     DCHECK_EQ(static_cast<int>(*io_num_packets), kAccessUnitSamples);
 611     DCHECK_EQ(io_data->mNumberBuffers,
 612               static_cast<unsigned>(input_bus->channels()));
 613     for (int i_buf = 0, end = io_data->mNumberBuffers; i_buf < end; ++i_buf) {
 614       io_data->mBuffers[i_buf].mNumberChannels = 1;
 615       io_data->mBuffers[i_buf].mDataByteSize = sizeof(float) * *io_num_packets;
 616       io_data->mBuffers[i_buf].mData = input_bus->channel(i_buf);
 617
 618       // Reset the input bus back to the input buffer. See the comment on
 619       // |input_bus_| for more on this optimization.
 620       input_bus->SetChannelData(i_buf, input_buffer->channel(i_buf));
 621     }
 622     return noErr;
 623   }
 624
 625   // The AudioFile write callback function. Appends the data to the encoder's
 626   // current |output_buffer_|.
 627   static OSStatus FileWriteCallback(void* in_encoder,
 628                                     SInt64 in_position,
 629                                     UInt32 in_size,
 630                                     const void* in_buffer,
 631                                     UInt32* out_size) {
 632     DCHECK(in_encoder);
 633     DCHECK(in_buffer);
 634     auto encoder = reinterpret_cast<const AppleAacImpl*>(in_encoder);
 635     auto buffer = reinterpret_cast<const std::string::value_type*>(in_buffer);
 636
 637     std::string* const output_buffer = encoder->output_buffer_;
 638     DCHECK(output_buffer);
 639
 640     output_buffer->append(buffer, in_size);
 641     *out_size = in_size;
 642     return noErr;
 643   }
 644
 645   // Buffer that holds one AAC access unit worth of samples. The input callback
 646   // function provides samples from this buffer via |input_bus_| to the encoder.
 647   const scoped_ptr<AudioBus> input_buffer_;
 648
 649   // Wrapper AudioBus used by the input callback function. Normally it wraps
 650   // |input_buffer_|. However, as an optimization when the client submits a
 651   // buffer containing exactly one access unit worth of samples, the bus is
 652   // redirected to the client buffer temporarily. We know that the base
 653   // implementation will call us right after to encode the buffer and thus we
 654   // can eliminate the copy into |input_buffer_|.
 655   const scoped_ptr<AudioBus> input_bus_;
 656
 657   // A buffer that holds one AAC access unit. Initialized in |Initialize| once
 658   // the maximum access unit size is known.
 659   const scoped_ptr<uint8[]> access_unit_buffer_;
 660
 661   // The maximum size of an access unit that the encoder can emit.
 662   const uint32_t max_access_unit_size_;
 663
 664   // A temporary pointer to the current output buffer. Only non-null when
 665   // writing an access unit. Accessed by the AudioFile write callback function.
 666   std::string* output_buffer_;
 667
 668   // The |AudioConverter| is responsible for AAC encoding. This is a Core Audio
 669   // object, not to be confused with |media::AudioConverter|.
 670   AudioConverterRef converter_;
 671
 672   // The |AudioFile| is responsible for ADTS packetization.
 673   AudioFileID file_;
 674
 675   // An |AudioBufferList| passed to the converter to store encoded samples.
 676   AudioBufferList converter_abl_;
 677
 678   // The number of access units emitted so far by the encoder.
 679   uint64_t num_access_units_;
 680
 681   // On iOS, audio codecs can be interrupted by other services (such as an
 682   // audio alert or phone call). Depending on the underlying hardware and
 683   // configuration, the codec may have to be thrown away and re-initialized
 684   // after such an interruption. This flag tracks if we can resume or not from
 685   // such an interruption. It is initialized to true, which is the only possible
 686   // value on OS X and on most modern iOS hardware.
 687   // TODO(jfroy): Implement encoder re-initialization after interruption.
 688   //              https://crbug.com/424787
 689   const bool can_resume_;
 690
 691   DISALLOW_COPY_AND_ASSIGN(AppleAacImpl);
 692 };
 693 #endif  // defined(OS_MACOSX)
 694
 695 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
 696  public:
 697   Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
 698             int num_channels,
 699             int sampling_rate,
 700             const FrameEncodedCallback& callback)
 701       : ImplBase(cast_environment,
 702                  CODEC_AUDIO_PCM16,
 703                  num_channels,
 704                  sampling_rate,
 705                  sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
 706                  callback),
 707         buffer_(new int16[num_channels * samples_per_frame_]) {
 708     if (ImplBase::operational_status_ != STATUS_UNINITIALIZED)
 709       return;
 710     operational_status_ = STATUS_INITIALIZED;
 711   }
 712
 713  private:
 714   ~Pcm16Impl() override {}
 715
 716   void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 717                                  int source_offset,
 718                                  int buffer_fill_offset,
 719                                  int num_samples) override {
 720     audio_bus->ToInterleavedPartial(
 721         source_offset,
 722         num_samples,
 723         sizeof(int16),
 724         buffer_.get() + buffer_fill_offset * num_channels_);
 725   }
 726
 727   bool EncodeFromFilledBuffer(std::string* out) override {
 728     // Output 16-bit PCM integers in big-endian byte order.
 729     out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
 730     const int16* src = buffer_.get();
 731     const int16* const src_end = src + num_channels_ * samples_per_frame_;
 732     uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
 733     for (; src < src_end; ++src, ++dest)
 734       *dest = base::HostToNet16(*src);
 735     return true;
 736   }
 737
 738  private:
 739   const scoped_ptr<int16[]> buffer_;
 740
 741   DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
 742 };
 743
 744 AudioEncoder::AudioEncoder(
 745     const scoped_refptr<CastEnvironment>& cast_environment,
 746     int num_channels,
 747     int sampling_rate,
 748     int bitrate,
 749     Codec codec,
 750     const FrameEncodedCallback& frame_encoded_callback)
 751     : cast_environment_(cast_environment) {
 752   // Note: It doesn't matter which thread constructs AudioEncoder, just so long
 753   // as all calls to InsertAudio() are by the same thread.
 754   insert_thread_checker_.DetachFromThread();
 755   switch (codec) {
 756 #if !defined(OS_IOS)
 757     case CODEC_AUDIO_OPUS:
 758       impl_ = new OpusImpl(cast_environment,
 759                            num_channels,
 760                            sampling_rate,
 761                            bitrate,
 762                            frame_encoded_callback);
 763       break;
 764 #endif
 765 #if defined(OS_MACOSX)
 766     case CODEC_AUDIO_AAC:
 767       impl_ = new AppleAacImpl(cast_environment,
 768                                num_channels,
 769                                sampling_rate,
 770                                bitrate,
 771                                frame_encoded_callback);
 772       break;
 773 #endif  // defined(OS_MACOSX)
 774     case CODEC_AUDIO_PCM16:
 775       impl_ = new Pcm16Impl(cast_environment,
 776                             num_channels,
 777                             sampling_rate,
 778                             frame_encoded_callback);
 779       break;
 780     default:
 781       NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
 782       break;
 783   }
 784 }
 785
 786 AudioEncoder::~AudioEncoder() {}
 787
 788 OperationalStatus AudioEncoder::InitializationResult() const {
 789   DCHECK(insert_thread_checker_.CalledOnValidThread());
 790   if (impl_.get()) {
 791     return impl_->InitializationResult();
 792   }
 793   return STATUS_UNSUPPORTED_CODEC;
 794 }
 795
 796 int AudioEncoder::GetSamplesPerFrame() const {
 797   DCHECK(insert_thread_checker_.CalledOnValidThread());
 798   if (InitializationResult() != STATUS_INITIALIZED) {
 799     NOTREACHED();
 800     return std::numeric_limits<int>::max();
 801   }
 802   return impl_->samples_per_frame();
 803 }
 804
 805 base::TimeDelta AudioEncoder::GetFrameDuration() const {
 806   DCHECK(insert_thread_checker_.CalledOnValidThread());
 807   if (InitializationResult() != STATUS_INITIALIZED) {
 808     NOTREACHED();
 809     return base::TimeDelta();
 810   }
 811   return impl_->frame_duration();
 812 }
 813
 814 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
 815                                const base::TimeTicks& recorded_time) {
 816   DCHECK(insert_thread_checker_.CalledOnValidThread());
 817   DCHECK(audio_bus.get());
 818   if (InitializationResult() != STATUS_INITIALIZED) {
 819     NOTREACHED();
 820     return;
 821   }
 822   cast_environment_->PostTask(CastEnvironment::AUDIO,
 823                               FROM_HERE,
 824                               base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
 825                                          impl_,
 826                                          base::Passed(&audio_bus),
 827                                          recorded_time));
 828 }
 829
 830 }  // namespace cast
 831 }  // namespace media