media/cast/sender/audio_encoder.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/cast/sender/audio_encoder.h"
   6
   7 #include <algorithm>
   8 #include <limits>
   9 #include <string>
  10
  11 #include "base/bind.h"
  12 #include "base/bind_helpers.h"
  13 #include "base/location.h"
  14 #include "base/stl_util.h"
  15 #include "base/sys_byteorder.h"
  16 #include "base/time/time.h"
  17 #include "base/trace_event/trace_event.h"
  18 #include "media/cast/cast_defines.h"
  19
  20 #if !defined(OS_IOS)
  21 #include "third_party/opus/src/include/opus.h"
  22 #endif
  23
  24 #if defined(OS_MACOSX)
  25 #include <AudioToolbox/AudioToolbox.h>
  26 #endif
  27
  28 namespace media {
  29 namespace cast {
  30
  31 namespace {
  32
  33 const int kUnderrunSkipThreshold = 3;
  34 const int kDefaultFramesPerSecond = 100;
  35
  36 }  // namespace
  37
  38 // Base class that handles the common problem of feeding one or more AudioBus'
  39 // data into a buffer and then, once the buffer is full, encoding the signal and
  40 // emitting a SenderEncodedFrame via the FrameEncodedCallback.
  41 //
  42 // Subclasses complete the implementation by handling the actual encoding
  43 // details.
  44 class AudioEncoder::ImplBase
  45     : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
  46  public:
  47   ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
  48            Codec codec,
  49            int num_channels,
  50            int sampling_rate,
  51            int samples_per_frame,
  52            const FrameEncodedCallback& callback)
  53       : cast_environment_(cast_environment),
  54         codec_(codec),
  55         num_channels_(num_channels),
  56         samples_per_frame_(samples_per_frame),
  57         callback_(callback),
  58         operational_status_(STATUS_UNINITIALIZED),
  59         frame_duration_(base::TimeDelta::FromMicroseconds(
  60             base::Time::kMicrosecondsPerSecond * samples_per_frame_ /
  61             sampling_rate)),
  62         buffer_fill_end_(0),
  63         frame_id_(0),
  64         frame_rtp_timestamp_(0),
  65         samples_dropped_from_buffer_(0) {
  66     // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
  67     const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
  68     if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
  69         frame_duration_ == base::TimeDelta() ||
  70         samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
  71       operational_status_ = STATUS_INVALID_CONFIGURATION;
  72     }
  73   }
  74
  75   OperationalStatus InitializationResult() const {
  76     return operational_status_;
  77   }
  78
  79   int samples_per_frame() const {
  80     return samples_per_frame_;
  81   }
  82
  83   base::TimeDelta frame_duration() const { return frame_duration_; }
  84
  85   void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
  86                    const base::TimeTicks& recorded_time) {
  87     DCHECK_EQ(operational_status_, STATUS_INITIALIZED);
  88     DCHECK(!recorded_time.is_null());
  89
  90     // Determine whether |recorded_time| is consistent with the amount of audio
  91     // data having been processed in the past.  Resolve the underrun problem by
  92     // dropping data from the internal buffer and skipping ahead the next
  93     // frame's RTP timestamp by the estimated number of frames missed.  On the
  94     // other hand, don't attempt to resolve overruns: A receiver should
  95     // gracefully deal with an excess of audio data.
  96     base::TimeDelta buffer_fill_duration =
  97         buffer_fill_end_ * frame_duration_ / samples_per_frame_;
  98     if (!frame_capture_time_.is_null()) {
  99       const base::TimeDelta amount_ahead_by =
 100           recorded_time - (frame_capture_time_ + buffer_fill_duration);
 101       const int64 num_frames_missed = amount_ahead_by / frame_duration_;
 102       if (num_frames_missed > kUnderrunSkipThreshold) {
 103         samples_dropped_from_buffer_ += buffer_fill_end_;
 104         buffer_fill_end_ = 0;
 105         buffer_fill_duration = base::TimeDelta();
 106         frame_rtp_timestamp_ +=
 107             static_cast<uint32>(num_frames_missed * samples_per_frame_);
 108         DVLOG(1) << "Skipping RTP timestamp ahead to account for "
 109                  << num_frames_missed * samples_per_frame_
 110                  << " samples' worth of underrun.";
 111         TRACE_EVENT_INSTANT2("cast.stream", "Audio Skip",
 112                              TRACE_EVENT_SCOPE_THREAD,
 113                              "frames missed", num_frames_missed,
 114                              "samples dropped", samples_dropped_from_buffer_);
 115       }
 116     }
 117     frame_capture_time_ = recorded_time - buffer_fill_duration;
 118
 119     // Encode all audio in |audio_bus| into zero or more frames.
 120     int src_pos = 0;
 121     while (src_pos < audio_bus->frames()) {
 122       // Note: This is used to compute the deadline utilization and so it uses
 123       // the real-world clock instead of the CastEnvironment clock, the latter
 124       // of which might be simulated.
 125       const base::TimeTicks start_time = base::TimeTicks::Now();
 126
 127       const int num_samples_to_xfer = std::min(
 128           samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
 129       DCHECK_EQ(audio_bus->channels(), num_channels_);
 130       TransferSamplesIntoBuffer(
 131           audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
 132       src_pos += num_samples_to_xfer;
 133       buffer_fill_end_ += num_samples_to_xfer;
 134
 135       if (buffer_fill_end_ < samples_per_frame_)
 136         break;
 137
 138       scoped_ptr<SenderEncodedFrame> audio_frame(
 139           new SenderEncodedFrame());
 140       audio_frame->dependency = EncodedFrame::KEY;
 141       audio_frame->frame_id = frame_id_;
 142       audio_frame->referenced_frame_id = frame_id_;
 143       audio_frame->rtp_timestamp = frame_rtp_timestamp_;
 144       audio_frame->reference_time = frame_capture_time_;
 145
 146       TRACE_EVENT_ASYNC_BEGIN2("cast.stream", "Audio Encode", audio_frame.get(),
 147                                "frame_id", frame_id_,
 148                                "rtp_timestamp", frame_rtp_timestamp_);
 149       if (EncodeFromFilledBuffer(&audio_frame->data)) {
 150         // Compute deadline utilization as the real-world time elapsed divided
 151         // by the signal duration.
 152         audio_frame->deadline_utilization =
 153             (base::TimeTicks::Now() - start_time).InSecondsF() /
 154                 frame_duration_.InSecondsF();
 155
 156         TRACE_EVENT_ASYNC_END1("cast.stream", "Audio Encode", audio_frame.get(),
 157                                "Deadline utilization",
 158                                audio_frame->deadline_utilization);
 159         cast_environment_->PostTask(
 160             CastEnvironment::MAIN,
 161             FROM_HERE,
 162             base::Bind(callback_,
 163                        base::Passed(&audio_frame),
 164                        samples_dropped_from_buffer_));
 165         samples_dropped_from_buffer_ = 0;
 166       }
 167
 168       // Reset the internal buffer, frame ID, and timestamps for the next frame.
 169       buffer_fill_end_ = 0;
 170       ++frame_id_;
 171       frame_rtp_timestamp_ += samples_per_frame_;
 172       frame_capture_time_ += frame_duration_;
 173     }
 174   }
 175
 176  protected:
 177   friend class base::RefCountedThreadSafe<ImplBase>;
 178   virtual ~ImplBase() {}
 179
 180   virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 181                                          int source_offset,
 182                                          int buffer_fill_offset,
 183                                          int num_samples) = 0;
 184   virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
 185
 186   const scoped_refptr<CastEnvironment> cast_environment_;
 187   const Codec codec_;
 188   const int num_channels_;
 189   const int samples_per_frame_;
 190   const FrameEncodedCallback callback_;
 191
 192   // Subclass' ctor is expected to set this to STATUS_INITIALIZED.
 193   OperationalStatus operational_status_;
 194
 195   // The duration of one frame of encoded audio samples. Derived from
 196   // |samples_per_frame_| and the sampling rate.
 197   const base::TimeDelta frame_duration_;
 198
 199  private:
 200   // In the case where a call to EncodeAudio() cannot completely fill the
 201   // buffer, this points to the position at which to populate data in a later
 202   // call.
 203   int buffer_fill_end_;
 204
 205   // A counter used to label EncodedFrames.
 206   uint32 frame_id_;
 207
 208   // The RTP timestamp for the next frame of encoded audio.  This is defined as
 209   // the number of audio samples encoded so far, plus the estimated number of
 210   // samples that were missed due to data underruns.  A receiver uses this value
 211   // to detect gaps in the audio signal data being provided.  Per the spec, RTP
 212   // timestamp values are allowed to overflow and roll around past zero.
 213   uint32 frame_rtp_timestamp_;
 214
 215   // The local system time associated with the start of the next frame of
 216   // encoded audio.  This value is passed on to a receiver as a reference clock
 217   // timestamp for the purposes of synchronizing audio and video.  Its
 218   // progression is expected to drift relative to the elapsed time implied by
 219   // the RTP timestamps.
 220   base::TimeTicks frame_capture_time_;
 221
 222   // Set to non-zero to indicate the next output frame skipped over audio
 223   // samples in order to recover from an input underrun.
 224   int samples_dropped_from_buffer_;
 225
 226   DISALLOW_COPY_AND_ASSIGN(ImplBase);
 227 };
 228
 229 #if !defined(OS_IOS)
 230 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
 231  public:
 232   OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
 233            int num_channels,
 234            int sampling_rate,
 235            int bitrate,
 236            const FrameEncodedCallback& callback)
 237       : ImplBase(cast_environment,
 238                  CODEC_AUDIO_OPUS,
 239                  num_channels,
 240                  sampling_rate,
 241                  sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
 242                  callback),
 243         encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
 244         opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
 245         buffer_(new float[num_channels * samples_per_frame_]) {
 246     if (ImplBase::operational_status_ != STATUS_UNINITIALIZED ||
 247         sampling_rate % samples_per_frame_ != 0 ||
 248         !IsValidFrameDuration(frame_duration_)) {
 249       return;
 250     }
 251     if (opus_encoder_init(opus_encoder_,
 252                           sampling_rate,
 253                           num_channels,
 254                           OPUS_APPLICATION_AUDIO) != OPUS_OK) {
 255       ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
 256       return;
 257     }
 258     ImplBase::operational_status_ = STATUS_INITIALIZED;
 259
 260     if (bitrate <= 0) {
 261       // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
 262       // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
 263       // frame size.  The opus library authors may, of course, adjust this in
 264       // later versions.
 265       bitrate = OPUS_AUTO;
 266     }
 267     CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
 268              OPUS_OK);
 269   }
 270
 271  private:
 272   ~OpusImpl() final {}
 273
 274   void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 275                                  int source_offset,
 276                                  int buffer_fill_offset,
 277                                  int num_samples) final {
 278     // Opus requires channel-interleaved samples in a single array.
 279     for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 280       const float* src = audio_bus->channel(ch) + source_offset;
 281       const float* const src_end = src + num_samples;
 282       float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
 283       for (; src < src_end; ++src, dest += num_channels_)
 284         *dest = *src;
 285     }
 286   }
 287
 288   bool EncodeFromFilledBuffer(std::string* out) final {
 289     out->resize(kOpusMaxPayloadSize);
 290     const opus_int32 result =
 291         opus_encode_float(opus_encoder_,
 292                           buffer_.get(),
 293                           samples_per_frame_,
 294                           reinterpret_cast<uint8*>(string_as_array(out)),
 295                           kOpusMaxPayloadSize);
 296     if (result > 1) {
 297       out->resize(result);
 298       return true;
 299     } else if (result < 0) {
 300       LOG(ERROR) << "Error code from opus_encode_float(): " << result;
 301       return false;
 302     } else {
 303       // Do nothing: The documentation says that a return value of zero or
 304       // one byte means the packet does not need to be transmitted.
 305       return false;
 306     }
 307   }
 308
 309   static bool IsValidFrameDuration(base::TimeDelta duration) {
 310     // See https://tools.ietf.org/html/rfc6716#section-2.1.4
 311     return duration == base::TimeDelta::FromMicroseconds(2500) ||
 312            duration == base::TimeDelta::FromMilliseconds(5) ||
 313            duration == base::TimeDelta::FromMilliseconds(10) ||
 314            duration == base::TimeDelta::FromMilliseconds(20) ||
 315            duration == base::TimeDelta::FromMilliseconds(40) ||
 316            duration == base::TimeDelta::FromMilliseconds(60);
 317   }
 318
 319   const scoped_ptr<uint8[]> encoder_memory_;
 320   OpusEncoder* const opus_encoder_;
 321   const scoped_ptr<float[]> buffer_;
 322
 323   // This is the recommended value, according to documentation in
 324   // third_party/opus/src/include/opus.h, so that the Opus encoder does not
 325   // degrade the audio due to memory constraints.
 326   //
 327   // Note: Whereas other RTP implementations do not, the cast library is
 328   // perfectly capable of transporting larger than MTU-sized audio frames.
 329   static const int kOpusMaxPayloadSize = 4000;
 330
 331   DISALLOW_COPY_AND_ASSIGN(OpusImpl);
 332 };
 333 #endif
 334
 335 #if defined(OS_MACOSX)
 336 class AudioEncoder::AppleAacImpl : public AudioEncoder::ImplBase {
 337   // AAC-LC has two access unit sizes (960 and 1024). The Apple encoder only
 338   // supports the latter.
 339   static const int kAccessUnitSamples = 1024;
 340
 341   // Size of an ADTS header (w/o checksum). See
 342   // http://wiki.multimedia.cx/index.php?title=ADTS
 343   static const int kAdtsHeaderSize = 7;
 344
 345  public:
 346   AppleAacImpl(const scoped_refptr<CastEnvironment>& cast_environment,
 347                int num_channels,
 348                int sampling_rate,
 349                int bitrate,
 350                const FrameEncodedCallback& callback)
 351       : ImplBase(cast_environment,
 352                  CODEC_AUDIO_AAC,
 353                  num_channels,
 354                  sampling_rate,
 355                  kAccessUnitSamples,
 356                  callback),
 357         input_buffer_(AudioBus::Create(num_channels, kAccessUnitSamples)),
 358         input_bus_(AudioBus::CreateWrapper(num_channels)),
 359         max_access_unit_size_(0),
 360         output_buffer_(nullptr),
 361         converter_(nullptr),
 362         file_(nullptr),
 363         num_access_units_(0),
 364         can_resume_(true) {
 365     if (ImplBase::operational_status_ != STATUS_UNINITIALIZED) {
 366       return;
 367     }
 368     if (!Initialize(sampling_rate, bitrate)) {
 369       ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
 370       return;
 371     }
 372     ImplBase::operational_status_ = STATUS_INITIALIZED;
 373   }
 374
 375  private:
 376   ~AppleAacImpl() final { Teardown(); }
 377
 378   // Destroys the existing audio converter and file, if any.
 379   void Teardown() {
 380     if (converter_) {
 381       AudioConverterDispose(converter_);
 382       converter_ = nullptr;
 383     }
 384     if (file_) {
 385       AudioFileClose(file_);
 386       file_ = nullptr;
 387     }
 388   }
 389
 390   // Initializes the audio converter and file. Calls Teardown to destroy any
 391   // existing state. This is so that Initialize() may be called to setup another
 392   // converter after a non-resumable interruption.
 393   bool Initialize(int sampling_rate, int bitrate) {
 394     // Teardown previous audio converter and file.
 395     Teardown();
 396
 397     // Input data comes from AudioBus objects, which carry non-interleaved
 398     // packed native-endian float samples. Note that in Core Audio, a frame is
 399     // one sample across all channels at a given point in time. When describing
 400     // a non-interleaved samples format, the "per frame" fields mean "per
 401     // channel" or "per stream", with the exception of |mChannelsPerFrame|. For
 402     // uncompressed formats, one packet contains one frame.
 403     AudioStreamBasicDescription in_asbd;
 404     in_asbd.mSampleRate = sampling_rate;
 405     in_asbd.mFormatID = kAudioFormatLinearPCM;
 406     in_asbd.mFormatFlags =
 407         kAudioFormatFlagsNativeFloatPacked | kAudioFormatFlagIsNonInterleaved;
 408     in_asbd.mChannelsPerFrame = num_channels_;
 409     in_asbd.mBitsPerChannel = sizeof(float) * 8;
 410     in_asbd.mFramesPerPacket = 1;
 411     in_asbd.mBytesPerPacket = in_asbd.mBytesPerFrame = sizeof(float);
 412     in_asbd.mReserved = 0;
 413
 414     // Request AAC-LC encoding, with no downmixing or downsampling.
 415     AudioStreamBasicDescription out_asbd;
 416     memset(&out_asbd, 0, sizeof(AudioStreamBasicDescription));
 417     out_asbd.mSampleRate = sampling_rate;
 418     out_asbd.mFormatID = kAudioFormatMPEG4AAC;
 419     out_asbd.mChannelsPerFrame = num_channels_;
 420     UInt32 prop_size = sizeof(out_asbd);
 421     if (AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
 422                                0,
 423                                nullptr,
 424                                &prop_size,
 425                                &out_asbd) != noErr) {
 426       return false;
 427     }
 428
 429     if (AudioConverterNew(&in_asbd, &out_asbd, &converter_) != noErr) {
 430       return false;
 431     }
 432
 433     // The converter will fully specify the output format and update the
 434     // relevant fields of the structure, which we can now query.
 435     prop_size = sizeof(out_asbd);
 436     if (AudioConverterGetProperty(converter_,
 437                                   kAudioConverterCurrentOutputStreamDescription,
 438                                   &prop_size,
 439                                   &out_asbd) != noErr) {
 440       return false;
 441     }
 442
 443     // If bitrate is <= 0, allow the encoder to pick a suitable value.
 444     // Otherwise, set the bitrate (which can fail if the value is not suitable
 445     // or compatible with the output sampling rate or channels).
 446     if (bitrate > 0) {
 447       prop_size = sizeof(int);
 448       if (AudioConverterSetProperty(
 449               converter_, kAudioConverterEncodeBitRate, prop_size, &bitrate) !=
 450           noErr) {
 451         return false;
 452       }
 453     }
 454
 455 #if defined(OS_IOS)
 456     // See the comment next to |can_resume_| for details on resumption. Some
 457     // converters can return kAudioConverterErr_PropertyNotSupported, in which
 458     // case resumption is implicitly supported. This is the only location where
 459     // the implementation modifies |can_resume_|.
 460     uint32_t can_resume;
 461     prop_size = sizeof(can_resume);
 462     OSStatus oserr = AudioConverterGetProperty(
 463         converter_,
 464         kAudioConverterPropertyCanResumeFromInterruption,
 465         &prop_size,
 466         &can_resume);
 467     if (oserr == noErr) {
 468       const_cast<bool&>(can_resume_) = can_resume != 0;
 469     }
 470 #endif
 471
 472     // Figure out the maximum size of an access unit that the encoder can
 473     // produce. |mBytesPerPacket| will be 0 for variable size configurations,
 474     // in which case we must query the value.
 475     uint32_t max_access_unit_size = out_asbd.mBytesPerPacket;
 476     if (max_access_unit_size == 0) {
 477       prop_size = sizeof(max_access_unit_size);
 478       if (AudioConverterGetProperty(
 479               converter_,
 480               kAudioConverterPropertyMaximumOutputPacketSize,
 481               &prop_size,
 482               &max_access_unit_size) != noErr) {
 483         return false;
 484       }
 485     }
 486
 487     // This is the only location where the implementation modifies
 488     // |max_access_unit_size_|.
 489     const_cast<uint32_t&>(max_access_unit_size_) = max_access_unit_size;
 490
 491     // Allocate a buffer to store one access unit. This is the only location
 492     // where the implementation modifies |access_unit_buffer_|.
 493     const_cast<scoped_ptr<uint8[]>&>(access_unit_buffer_)
 494         .reset(new uint8[max_access_unit_size]);
 495
 496     // Initialize the converter ABL. Note that the buffer size has to be set
 497     // before every encode operation, since the field is modified to indicate
 498     // the size of the output data (on input it indicates the buffer capacity).
 499     converter_abl_.mNumberBuffers = 1;
 500     converter_abl_.mBuffers[0].mNumberChannels = num_channels_;
 501     converter_abl_.mBuffers[0].mData = access_unit_buffer_.get();
 502
 503     // The "magic cookie" is an encoder state vector required for decoding and
 504     // packetization. It is queried now from |converter_| then set on |file_|
 505     // after initialization.
 506     UInt32 cookie_size;
 507     if (AudioConverterGetPropertyInfo(converter_,
 508                                       kAudioConverterCompressionMagicCookie,
 509                                       &cookie_size,
 510                                       nullptr) != noErr) {
 511       return false;
 512     }
 513     scoped_ptr<uint8[]> cookie_data(new uint8[cookie_size]);
 514     if (AudioConverterGetProperty(converter_,
 515                                   kAudioConverterCompressionMagicCookie,
 516                                   &cookie_size,
 517                                   cookie_data.get()) != noErr) {
 518       return false;
 519     }
 520
 521     if (AudioFileInitializeWithCallbacks(this,
 522                                          nullptr,
 523                                          &FileWriteCallback,
 524                                          nullptr,
 525                                          nullptr,
 526                                          kAudioFileAAC_ADTSType,
 527                                          &out_asbd,
 528                                          0,
 529                                          &file_) != noErr) {
 530       return false;
 531     }
 532
 533     if (AudioFileSetProperty(file_,
 534                              kAudioFilePropertyMagicCookieData,
 535                              cookie_size,
 536                              cookie_data.get()) != noErr) {
 537       return false;
 538     }
 539
 540     // Initially the input bus points to the input buffer. See the comment on
 541     // |input_bus_| for more on this optimization.
 542     input_bus_->set_frames(kAccessUnitSamples);
 543     for (int ch = 0; ch < input_buffer_->channels(); ++ch) {
 544       input_bus_->SetChannelData(ch, input_buffer_->channel(ch));
 545     }
 546
 547     return true;
 548   }
 549
 550   void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 551                                  int source_offset,
 552                                  int buffer_fill_offset,
 553                                  int num_samples) final {
 554     DCHECK_EQ(audio_bus->channels(), input_buffer_->channels());
 555
 556     // See the comment on |input_bus_| for more on this optimization. Note that
 557     // we cannot elide the copy if the source offset would result in an
 558     // unaligned pointer.
 559     if (num_samples == kAccessUnitSamples &&
 560         source_offset * sizeof(float) % AudioBus::kChannelAlignment == 0) {
 561       DCHECK_EQ(buffer_fill_offset, 0);
 562       for (int ch = 0; ch < audio_bus->channels(); ++ch) {
 563         auto samples = const_cast<float*>(audio_bus->channel(ch));
 564         input_bus_->SetChannelData(ch, samples + source_offset);
 565       }
 566       return;
 567     }
 568
 569     // Copy the samples into the input buffer.
 570     DCHECK_EQ(input_bus_->channel(0), input_buffer_->channel(0));
 571     audio_bus->CopyPartialFramesTo(
 572         source_offset, num_samples, buffer_fill_offset, input_buffer_.get());
 573   }
 574
 575   bool EncodeFromFilledBuffer(std::string* out) final {
 576     // Reset the buffer size field to the buffer capacity.
 577     converter_abl_.mBuffers[0].mDataByteSize = max_access_unit_size_;
 578
 579     // Encode the current input buffer. This is a sychronous call.
 580     OSStatus oserr;
 581     UInt32 io_num_packets = 1;
 582     AudioStreamPacketDescription packet_description;
 583     oserr = AudioConverterFillComplexBuffer(converter_,
 584                                             &ConverterFillDataCallback,
 585                                             this,
 586                                             &io_num_packets,
 587                                             &converter_abl_,
 588                                             &packet_description);
 589     if (oserr != noErr || io_num_packets == 0) {
 590       return false;
 591     }
 592
 593     // Reserve space in the output buffer to write the packet.
 594     out->reserve(packet_description.mDataByteSize + kAdtsHeaderSize);
 595
 596     // Set the current output buffer and emit an ADTS-wrapped AAC access unit.
 597     // This is a synchronous call. After it returns, reset the output buffer.
 598     output_buffer_ = out;
 599     oserr = AudioFileWritePackets(file_,
 600                                   false,
 601                                   converter_abl_.mBuffers[0].mDataByteSize,
 602                                   &packet_description,
 603                                   num_access_units_,
 604                                   &io_num_packets,
 605                                   converter_abl_.mBuffers[0].mData);
 606     output_buffer_ = nullptr;
 607     if (oserr != noErr || io_num_packets == 0) {
 608       return false;
 609     }
 610     num_access_units_ += io_num_packets;
 611     return true;
 612   }
 613
 614   // The |AudioConverterFillComplexBuffer| input callback function. Configures
 615   // the provided |AudioBufferList| to alias |input_bus_|. The implementation
 616   // can only supply |kAccessUnitSamples| samples as a result of not copying
 617   // samples or tracking read and write positions. Note that this function is
 618   // called synchronously by |AudioConverterFillComplexBuffer|.
 619   static OSStatus ConverterFillDataCallback(
 620       AudioConverterRef in_converter,
 621       UInt32* io_num_packets,
 622       AudioBufferList* io_data,
 623       AudioStreamPacketDescription** out_packet_desc,
 624       void* in_encoder) {
 625     DCHECK(in_encoder);
 626     auto encoder = reinterpret_cast<AppleAacImpl*>(in_encoder);
 627     auto input_buffer = encoder->input_buffer_.get();
 628     auto input_bus = encoder->input_bus_.get();
 629
 630     DCHECK_EQ(static_cast<int>(*io_num_packets), kAccessUnitSamples);
 631     DCHECK_EQ(io_data->mNumberBuffers,
 632               static_cast<unsigned>(input_bus->channels()));
 633     for (int i_buf = 0, end = io_data->mNumberBuffers; i_buf < end; ++i_buf) {
 634       io_data->mBuffers[i_buf].mNumberChannels = 1;
 635       io_data->mBuffers[i_buf].mDataByteSize = sizeof(float) * *io_num_packets;
 636       io_data->mBuffers[i_buf].mData = input_bus->channel(i_buf);
 637
 638       // Reset the input bus back to the input buffer. See the comment on
 639       // |input_bus_| for more on this optimization.
 640       input_bus->SetChannelData(i_buf, input_buffer->channel(i_buf));
 641     }
 642     return noErr;
 643   }
 644
 645   // The AudioFile write callback function. Appends the data to the encoder's
 646   // current |output_buffer_|.
 647   static OSStatus FileWriteCallback(void* in_encoder,
 648                                     SInt64 in_position,
 649                                     UInt32 in_size,
 650                                     const void* in_buffer,
 651                                     UInt32* out_size) {
 652     DCHECK(in_encoder);
 653     DCHECK(in_buffer);
 654     auto encoder = reinterpret_cast<const AppleAacImpl*>(in_encoder);
 655     auto buffer = reinterpret_cast<const std::string::value_type*>(in_buffer);
 656
 657     std::string* const output_buffer = encoder->output_buffer_;
 658     DCHECK(output_buffer);
 659
 660     output_buffer->append(buffer, in_size);
 661     *out_size = in_size;
 662     return noErr;
 663   }
 664
 665   // Buffer that holds one AAC access unit worth of samples. The input callback
 666   // function provides samples from this buffer via |input_bus_| to the encoder.
 667   const scoped_ptr<AudioBus> input_buffer_;
 668
 669   // Wrapper AudioBus used by the input callback function. Normally it wraps
 670   // |input_buffer_|. However, as an optimization when the client submits a
 671   // buffer containing exactly one access unit worth of samples, the bus is
 672   // redirected to the client buffer temporarily. We know that the base
 673   // implementation will call us right after to encode the buffer and thus we
 674   // can eliminate the copy into |input_buffer_|.
 675   const scoped_ptr<AudioBus> input_bus_;
 676
 677   // A buffer that holds one AAC access unit. Initialized in |Initialize| once
 678   // the maximum access unit size is known.
 679   const scoped_ptr<uint8[]> access_unit_buffer_;
 680
 681   // The maximum size of an access unit that the encoder can emit.
 682   const uint32_t max_access_unit_size_;
 683
 684   // A temporary pointer to the current output buffer. Only non-null when
 685   // writing an access unit. Accessed by the AudioFile write callback function.
 686   std::string* output_buffer_;
 687
 688   // The |AudioConverter| is responsible for AAC encoding. This is a Core Audio
 689   // object, not to be confused with |media::AudioConverter|.
 690   AudioConverterRef converter_;
 691
 692   // The |AudioFile| is responsible for ADTS packetization.
 693   AudioFileID file_;
 694
 695   // An |AudioBufferList| passed to the converter to store encoded samples.
 696   AudioBufferList converter_abl_;
 697
 698   // The number of access units emitted so far by the encoder.
 699   uint64_t num_access_units_;
 700
 701   // On iOS, audio codecs can be interrupted by other services (such as an
 702   // audio alert or phone call). Depending on the underlying hardware and
 703   // configuration, the codec may have to be thrown away and re-initialized
 704   // after such an interruption. This flag tracks if we can resume or not from
 705   // such an interruption. It is initialized to true, which is the only possible
 706   // value on OS X and on most modern iOS hardware.
 707   // TODO(jfroy): Implement encoder re-initialization after interruption.
 708   //              https://crbug.com/424787
 709   const bool can_resume_;
 710
 711   DISALLOW_COPY_AND_ASSIGN(AppleAacImpl);
 712 };
 713 #endif  // defined(OS_MACOSX)
 714
 715 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
 716  public:
 717   Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
 718             int num_channels,
 719             int sampling_rate,
 720             const FrameEncodedCallback& callback)
 721       : ImplBase(cast_environment,
 722                  CODEC_AUDIO_PCM16,
 723                  num_channels,
 724                  sampling_rate,
 725                  sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
 726                  callback),
 727         buffer_(new int16[num_channels * samples_per_frame_]) {
 728     if (ImplBase::operational_status_ != STATUS_UNINITIALIZED)
 729       return;
 730     operational_status_ = STATUS_INITIALIZED;
 731   }
 732
 733  private:
 734   ~Pcm16Impl() final {}
 735
 736   void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
 737                                  int source_offset,
 738                                  int buffer_fill_offset,
 739                                  int num_samples) final {
 740     audio_bus->ToInterleavedPartial(
 741         source_offset,
 742         num_samples,
 743         sizeof(int16),
 744         buffer_.get() + buffer_fill_offset * num_channels_);
 745   }
 746
 747   bool EncodeFromFilledBuffer(std::string* out) final {
 748     // Output 16-bit PCM integers in big-endian byte order.
 749     out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
 750     const int16* src = buffer_.get();
 751     const int16* const src_end = src + num_channels_ * samples_per_frame_;
 752     uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
 753     for (; src < src_end; ++src, ++dest)
 754       *dest = base::HostToNet16(*src);
 755     return true;
 756   }
 757
 758  private:
 759   const scoped_ptr<int16[]> buffer_;
 760
 761   DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
 762 };
 763
 764 AudioEncoder::AudioEncoder(
 765     const scoped_refptr<CastEnvironment>& cast_environment,
 766     int num_channels,
 767     int sampling_rate,
 768     int bitrate,
 769     Codec codec,
 770     const FrameEncodedCallback& frame_encoded_callback)
 771     : cast_environment_(cast_environment) {
 772   // Note: It doesn't matter which thread constructs AudioEncoder, just so long
 773   // as all calls to InsertAudio() are by the same thread.
 774   insert_thread_checker_.DetachFromThread();
 775   switch (codec) {
 776 #if !defined(OS_IOS)
 777     case CODEC_AUDIO_OPUS:
 778       impl_ = new OpusImpl(cast_environment,
 779                            num_channels,
 780                            sampling_rate,
 781                            bitrate,
 782                            frame_encoded_callback);
 783       break;
 784 #endif
 785 #if defined(OS_MACOSX)
 786     case CODEC_AUDIO_AAC:
 787       impl_ = new AppleAacImpl(cast_environment,
 788                                num_channels,
 789                                sampling_rate,
 790                                bitrate,
 791                                frame_encoded_callback);
 792       break;
 793 #endif  // defined(OS_MACOSX)
 794     case CODEC_AUDIO_PCM16:
 795       impl_ = new Pcm16Impl(cast_environment,
 796                             num_channels,
 797                             sampling_rate,
 798                             frame_encoded_callback);
 799       break;
 800     default:
 801       NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
 802       break;
 803   }
 804 }
 805
 806 AudioEncoder::~AudioEncoder() {}
 807
 808 OperationalStatus AudioEncoder::InitializationResult() const {
 809   DCHECK(insert_thread_checker_.CalledOnValidThread());
 810   if (impl_.get()) {
 811     return impl_->InitializationResult();
 812   }
 813   return STATUS_UNSUPPORTED_CODEC;
 814 }
 815
 816 int AudioEncoder::GetSamplesPerFrame() const {
 817   DCHECK(insert_thread_checker_.CalledOnValidThread());
 818   if (InitializationResult() != STATUS_INITIALIZED) {
 819     NOTREACHED();
 820     return std::numeric_limits<int>::max();
 821   }
 822   return impl_->samples_per_frame();
 823 }
 824
 825 base::TimeDelta AudioEncoder::GetFrameDuration() const {
 826   DCHECK(insert_thread_checker_.CalledOnValidThread());
 827   if (InitializationResult() != STATUS_INITIALIZED) {
 828     NOTREACHED();
 829     return base::TimeDelta();
 830   }
 831   return impl_->frame_duration();
 832 }
 833
 834 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
 835                                const base::TimeTicks& recorded_time) {
 836   DCHECK(insert_thread_checker_.CalledOnValidThread());
 837   DCHECK(audio_bus.get());
 838   if (InitializationResult() != STATUS_INITIALIZED) {
 839     NOTREACHED();
 840     return;
 841   }
 842   cast_environment_->PostTask(CastEnvironment::AUDIO,
 843                               FROM_HERE,
 844                               base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
 845                                          impl_,
 846                                          base::Passed(&audio_bus),
 847                                          recorded_time));
 848 }
 849
 850 }  // namespace cast
 851 }  // namespace media