Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / media / cast / sender / audio_encoder.cc
blob879b476f5ac89ecc44d8c87f65ba17e5272ba352
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/cast/sender/audio_encoder.h"
7 #include <algorithm>
8 #include <limits>
9 #include <string>
11 #include "base/bind.h"
12 #include "base/bind_helpers.h"
13 #include "base/location.h"
14 #include "base/stl_util.h"
15 #include "base/sys_byteorder.h"
16 #include "base/time/time.h"
17 #include "base/trace_event/trace_event.h"
18 #include "media/cast/cast_defines.h"
20 #if !defined(OS_IOS)
21 #include "third_party/opus/src/include/opus.h"
22 #endif
24 #if defined(OS_MACOSX)
25 #include <AudioToolbox/AudioToolbox.h>
26 #endif
28 namespace media {
29 namespace cast {
31 namespace {
33 const int kUnderrunSkipThreshold = 3;
34 const int kDefaultFramesPerSecond = 100;
36 } // namespace
38 // Base class that handles the common problem of feeding one or more AudioBus'
39 // data into a buffer and then, once the buffer is full, encoding the signal and
40 // emitting a SenderEncodedFrame via the FrameEncodedCallback.
42 // Subclasses complete the implementation by handling the actual encoding
43 // details.
44 class AudioEncoder::ImplBase
45 : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
46 public:
47 ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
48 Codec codec,
49 int num_channels,
50 int sampling_rate,
51 int samples_per_frame,
52 const FrameEncodedCallback& callback)
53 : cast_environment_(cast_environment),
54 codec_(codec),
55 num_channels_(num_channels),
56 samples_per_frame_(samples_per_frame),
57 callback_(callback),
58 operational_status_(STATUS_UNINITIALIZED),
59 frame_duration_(base::TimeDelta::FromMicroseconds(
60 base::Time::kMicrosecondsPerSecond * samples_per_frame_ /
61 sampling_rate)),
62 buffer_fill_end_(0),
63 frame_id_(0),
64 frame_rtp_timestamp_(0),
65 samples_dropped_from_buffer_(0) {
66 // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
67 const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
68 if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
69 frame_duration_ == base::TimeDelta() ||
70 samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
71 operational_status_ = STATUS_INVALID_CONFIGURATION;
75 OperationalStatus InitializationResult() const {
76 return operational_status_;
79 int samples_per_frame() const {
80 return samples_per_frame_;
83 base::TimeDelta frame_duration() const { return frame_duration_; }
85 void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
86 const base::TimeTicks& recorded_time) {
87 DCHECK_EQ(operational_status_, STATUS_INITIALIZED);
88 DCHECK(!recorded_time.is_null());
90 // Determine whether |recorded_time| is consistent with the amount of audio
91 // data having been processed in the past. Resolve the underrun problem by
92 // dropping data from the internal buffer and skipping ahead the next
93 // frame's RTP timestamp by the estimated number of frames missed. On the
94 // other hand, don't attempt to resolve overruns: A receiver should
95 // gracefully deal with an excess of audio data.
96 base::TimeDelta buffer_fill_duration =
97 buffer_fill_end_ * frame_duration_ / samples_per_frame_;
98 if (!frame_capture_time_.is_null()) {
99 const base::TimeDelta amount_ahead_by =
100 recorded_time - (frame_capture_time_ + buffer_fill_duration);
101 const int64 num_frames_missed = amount_ahead_by / frame_duration_;
102 if (num_frames_missed > kUnderrunSkipThreshold) {
103 samples_dropped_from_buffer_ += buffer_fill_end_;
104 buffer_fill_end_ = 0;
105 buffer_fill_duration = base::TimeDelta();
106 frame_rtp_timestamp_ +=
107 static_cast<uint32>(num_frames_missed * samples_per_frame_);
108 DVLOG(1) << "Skipping RTP timestamp ahead to account for "
109 << num_frames_missed * samples_per_frame_
110 << " samples' worth of underrun.";
111 TRACE_EVENT_INSTANT2("cast.stream", "Audio Skip",
112 TRACE_EVENT_SCOPE_THREAD,
113 "frames missed", num_frames_missed,
114 "samples dropped", samples_dropped_from_buffer_);
117 frame_capture_time_ = recorded_time - buffer_fill_duration;
119 // Encode all audio in |audio_bus| into zero or more frames.
120 int src_pos = 0;
121 while (src_pos < audio_bus->frames()) {
122 // Note: This is used to compute the deadline utilization and so it uses
123 // the real-world clock instead of the CastEnvironment clock, the latter
124 // of which might be simulated.
125 const base::TimeTicks start_time = base::TimeTicks::Now();
127 const int num_samples_to_xfer = std::min(
128 samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
129 DCHECK_EQ(audio_bus->channels(), num_channels_);
130 TransferSamplesIntoBuffer(
131 audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
132 src_pos += num_samples_to_xfer;
133 buffer_fill_end_ += num_samples_to_xfer;
135 if (buffer_fill_end_ < samples_per_frame_)
136 break;
138 scoped_ptr<SenderEncodedFrame> audio_frame(
139 new SenderEncodedFrame());
140 audio_frame->dependency = EncodedFrame::KEY;
141 audio_frame->frame_id = frame_id_;
142 audio_frame->referenced_frame_id = frame_id_;
143 audio_frame->rtp_timestamp = frame_rtp_timestamp_;
144 audio_frame->reference_time = frame_capture_time_;
146 TRACE_EVENT_ASYNC_BEGIN2("cast.stream", "Audio Encode", audio_frame.get(),
147 "frame_id", frame_id_,
148 "rtp_timestamp", frame_rtp_timestamp_);
149 if (EncodeFromFilledBuffer(&audio_frame->data)) {
150 // Compute deadline utilization as the real-world time elapsed divided
151 // by the signal duration.
152 audio_frame->deadline_utilization =
153 (base::TimeTicks::Now() - start_time).InSecondsF() /
154 frame_duration_.InSecondsF();
156 TRACE_EVENT_ASYNC_END1("cast.stream", "Audio Encode", audio_frame.get(),
157 "Deadline utilization",
158 audio_frame->deadline_utilization);
159 cast_environment_->PostTask(
160 CastEnvironment::MAIN,
161 FROM_HERE,
162 base::Bind(callback_,
163 base::Passed(&audio_frame),
164 samples_dropped_from_buffer_));
165 samples_dropped_from_buffer_ = 0;
168 // Reset the internal buffer, frame ID, and timestamps for the next frame.
169 buffer_fill_end_ = 0;
170 ++frame_id_;
171 frame_rtp_timestamp_ += samples_per_frame_;
172 frame_capture_time_ += frame_duration_;
176 protected:
177 friend class base::RefCountedThreadSafe<ImplBase>;
178 virtual ~ImplBase() {}
180 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
181 int source_offset,
182 int buffer_fill_offset,
183 int num_samples) = 0;
184 virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
186 const scoped_refptr<CastEnvironment> cast_environment_;
187 const Codec codec_;
188 const int num_channels_;
189 const int samples_per_frame_;
190 const FrameEncodedCallback callback_;
192 // Subclass' ctor is expected to set this to STATUS_INITIALIZED.
193 OperationalStatus operational_status_;
195 // The duration of one frame of encoded audio samples. Derived from
196 // |samples_per_frame_| and the sampling rate.
197 const base::TimeDelta frame_duration_;
199 private:
200 // In the case where a call to EncodeAudio() cannot completely fill the
201 // buffer, this points to the position at which to populate data in a later
202 // call.
203 int buffer_fill_end_;
205 // A counter used to label EncodedFrames.
206 uint32 frame_id_;
208 // The RTP timestamp for the next frame of encoded audio. This is defined as
209 // the number of audio samples encoded so far, plus the estimated number of
210 // samples that were missed due to data underruns. A receiver uses this value
211 // to detect gaps in the audio signal data being provided. Per the spec, RTP
212 // timestamp values are allowed to overflow and roll around past zero.
213 uint32 frame_rtp_timestamp_;
215 // The local system time associated with the start of the next frame of
216 // encoded audio. This value is passed on to a receiver as a reference clock
217 // timestamp for the purposes of synchronizing audio and video. Its
218 // progression is expected to drift relative to the elapsed time implied by
219 // the RTP timestamps.
220 base::TimeTicks frame_capture_time_;
222 // Set to non-zero to indicate the next output frame skipped over audio
223 // samples in order to recover from an input underrun.
224 int samples_dropped_from_buffer_;
226 DISALLOW_COPY_AND_ASSIGN(ImplBase);
229 #if !defined(OS_IOS)
230 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
231 public:
232 OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
233 int num_channels,
234 int sampling_rate,
235 int bitrate,
236 const FrameEncodedCallback& callback)
237 : ImplBase(cast_environment,
238 CODEC_AUDIO_OPUS,
239 num_channels,
240 sampling_rate,
241 sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
242 callback),
243 encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
244 opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
245 buffer_(new float[num_channels * samples_per_frame_]) {
246 if (ImplBase::operational_status_ != STATUS_UNINITIALIZED ||
247 sampling_rate % samples_per_frame_ != 0 ||
248 !IsValidFrameDuration(frame_duration_)) {
249 return;
251 if (opus_encoder_init(opus_encoder_,
252 sampling_rate,
253 num_channels,
254 OPUS_APPLICATION_AUDIO) != OPUS_OK) {
255 ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
256 return;
258 ImplBase::operational_status_ = STATUS_INITIALIZED;
260 if (bitrate <= 0) {
261 // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
262 // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
263 // frame size. The opus library authors may, of course, adjust this in
264 // later versions.
265 bitrate = OPUS_AUTO;
267 CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
268 OPUS_OK);
271 private:
272 ~OpusImpl() final {}
274 void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
275 int source_offset,
276 int buffer_fill_offset,
277 int num_samples) final {
278 // Opus requires channel-interleaved samples in a single array.
279 for (int ch = 0; ch < audio_bus->channels(); ++ch) {
280 const float* src = audio_bus->channel(ch) + source_offset;
281 const float* const src_end = src + num_samples;
282 float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
283 for (; src < src_end; ++src, dest += num_channels_)
284 *dest = *src;
288 bool EncodeFromFilledBuffer(std::string* out) final {
289 out->resize(kOpusMaxPayloadSize);
290 const opus_int32 result =
291 opus_encode_float(opus_encoder_,
292 buffer_.get(),
293 samples_per_frame_,
294 reinterpret_cast<uint8*>(string_as_array(out)),
295 kOpusMaxPayloadSize);
296 if (result > 1) {
297 out->resize(result);
298 return true;
299 } else if (result < 0) {
300 LOG(ERROR) << "Error code from opus_encode_float(): " << result;
301 return false;
302 } else {
303 // Do nothing: The documentation says that a return value of zero or
304 // one byte means the packet does not need to be transmitted.
305 return false;
309 static bool IsValidFrameDuration(base::TimeDelta duration) {
310 // See https://tools.ietf.org/html/rfc6716#section-2.1.4
311 return duration == base::TimeDelta::FromMicroseconds(2500) ||
312 duration == base::TimeDelta::FromMilliseconds(5) ||
313 duration == base::TimeDelta::FromMilliseconds(10) ||
314 duration == base::TimeDelta::FromMilliseconds(20) ||
315 duration == base::TimeDelta::FromMilliseconds(40) ||
316 duration == base::TimeDelta::FromMilliseconds(60);
319 const scoped_ptr<uint8[]> encoder_memory_;
320 OpusEncoder* const opus_encoder_;
321 const scoped_ptr<float[]> buffer_;
323 // This is the recommended value, according to documentation in
324 // third_party/opus/src/include/opus.h, so that the Opus encoder does not
325 // degrade the audio due to memory constraints.
327 // Note: Whereas other RTP implementations do not, the cast library is
328 // perfectly capable of transporting larger than MTU-sized audio frames.
329 static const int kOpusMaxPayloadSize = 4000;
331 DISALLOW_COPY_AND_ASSIGN(OpusImpl);
333 #endif
335 #if defined(OS_MACOSX)
336 class AudioEncoder::AppleAacImpl : public AudioEncoder::ImplBase {
337 // AAC-LC has two access unit sizes (960 and 1024). The Apple encoder only
338 // supports the latter.
339 static const int kAccessUnitSamples = 1024;
341 // Size of an ADTS header (w/o checksum). See
342 // http://wiki.multimedia.cx/index.php?title=ADTS
343 static const int kAdtsHeaderSize = 7;
345 public:
346 AppleAacImpl(const scoped_refptr<CastEnvironment>& cast_environment,
347 int num_channels,
348 int sampling_rate,
349 int bitrate,
350 const FrameEncodedCallback& callback)
351 : ImplBase(cast_environment,
352 CODEC_AUDIO_AAC,
353 num_channels,
354 sampling_rate,
355 kAccessUnitSamples,
356 callback),
357 input_buffer_(AudioBus::Create(num_channels, kAccessUnitSamples)),
358 input_bus_(AudioBus::CreateWrapper(num_channels)),
359 max_access_unit_size_(0),
360 output_buffer_(nullptr),
361 converter_(nullptr),
362 file_(nullptr),
363 num_access_units_(0),
364 can_resume_(true) {
365 if (ImplBase::operational_status_ != STATUS_UNINITIALIZED) {
366 return;
368 if (!Initialize(sampling_rate, bitrate)) {
369 ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
370 return;
372 ImplBase::operational_status_ = STATUS_INITIALIZED;
375 private:
376 ~AppleAacImpl() final { Teardown(); }
378 // Destroys the existing audio converter and file, if any.
379 void Teardown() {
380 if (converter_) {
381 AudioConverterDispose(converter_);
382 converter_ = nullptr;
384 if (file_) {
385 AudioFileClose(file_);
386 file_ = nullptr;
390 // Initializes the audio converter and file. Calls Teardown to destroy any
391 // existing state. This is so that Initialize() may be called to setup another
392 // converter after a non-resumable interruption.
393 bool Initialize(int sampling_rate, int bitrate) {
394 // Teardown previous audio converter and file.
395 Teardown();
397 // Input data comes from AudioBus objects, which carry non-interleaved
398 // packed native-endian float samples. Note that in Core Audio, a frame is
399 // one sample across all channels at a given point in time. When describing
400 // a non-interleaved samples format, the "per frame" fields mean "per
401 // channel" or "per stream", with the exception of |mChannelsPerFrame|. For
402 // uncompressed formats, one packet contains one frame.
403 AudioStreamBasicDescription in_asbd;
404 in_asbd.mSampleRate = sampling_rate;
405 in_asbd.mFormatID = kAudioFormatLinearPCM;
406 in_asbd.mFormatFlags =
407 kAudioFormatFlagsNativeFloatPacked | kAudioFormatFlagIsNonInterleaved;
408 in_asbd.mChannelsPerFrame = num_channels_;
409 in_asbd.mBitsPerChannel = sizeof(float) * 8;
410 in_asbd.mFramesPerPacket = 1;
411 in_asbd.mBytesPerPacket = in_asbd.mBytesPerFrame = sizeof(float);
412 in_asbd.mReserved = 0;
414 // Request AAC-LC encoding, with no downmixing or downsampling.
415 AudioStreamBasicDescription out_asbd;
416 memset(&out_asbd, 0, sizeof(AudioStreamBasicDescription));
417 out_asbd.mSampleRate = sampling_rate;
418 out_asbd.mFormatID = kAudioFormatMPEG4AAC;
419 out_asbd.mChannelsPerFrame = num_channels_;
420 UInt32 prop_size = sizeof(out_asbd);
421 if (AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
423 nullptr,
424 &prop_size,
425 &out_asbd) != noErr) {
426 return false;
429 if (AudioConverterNew(&in_asbd, &out_asbd, &converter_) != noErr) {
430 return false;
433 // The converter will fully specify the output format and update the
434 // relevant fields of the structure, which we can now query.
435 prop_size = sizeof(out_asbd);
436 if (AudioConverterGetProperty(converter_,
437 kAudioConverterCurrentOutputStreamDescription,
438 &prop_size,
439 &out_asbd) != noErr) {
440 return false;
443 // If bitrate is <= 0, allow the encoder to pick a suitable value.
444 // Otherwise, set the bitrate (which can fail if the value is not suitable
445 // or compatible with the output sampling rate or channels).
446 if (bitrate > 0) {
447 prop_size = sizeof(int);
448 if (AudioConverterSetProperty(
449 converter_, kAudioConverterEncodeBitRate, prop_size, &bitrate) !=
450 noErr) {
451 return false;
455 #if defined(OS_IOS)
456 // See the comment next to |can_resume_| for details on resumption. Some
457 // converters can return kAudioConverterErr_PropertyNotSupported, in which
458 // case resumption is implicitly supported. This is the only location where
459 // the implementation modifies |can_resume_|.
460 uint32_t can_resume;
461 prop_size = sizeof(can_resume);
462 OSStatus oserr = AudioConverterGetProperty(
463 converter_,
464 kAudioConverterPropertyCanResumeFromInterruption,
465 &prop_size,
466 &can_resume);
467 if (oserr == noErr) {
468 const_cast<bool&>(can_resume_) = can_resume != 0;
470 #endif
472 // Figure out the maximum size of an access unit that the encoder can
473 // produce. |mBytesPerPacket| will be 0 for variable size configurations,
474 // in which case we must query the value.
475 uint32_t max_access_unit_size = out_asbd.mBytesPerPacket;
476 if (max_access_unit_size == 0) {
477 prop_size = sizeof(max_access_unit_size);
478 if (AudioConverterGetProperty(
479 converter_,
480 kAudioConverterPropertyMaximumOutputPacketSize,
481 &prop_size,
482 &max_access_unit_size) != noErr) {
483 return false;
487 // This is the only location where the implementation modifies
488 // |max_access_unit_size_|.
489 const_cast<uint32_t&>(max_access_unit_size_) = max_access_unit_size;
491 // Allocate a buffer to store one access unit. This is the only location
492 // where the implementation modifies |access_unit_buffer_|.
493 const_cast<scoped_ptr<uint8[]>&>(access_unit_buffer_)
494 .reset(new uint8[max_access_unit_size]);
496 // Initialize the converter ABL. Note that the buffer size has to be set
497 // before every encode operation, since the field is modified to indicate
498 // the size of the output data (on input it indicates the buffer capacity).
499 converter_abl_.mNumberBuffers = 1;
500 converter_abl_.mBuffers[0].mNumberChannels = num_channels_;
501 converter_abl_.mBuffers[0].mData = access_unit_buffer_.get();
503 // The "magic cookie" is an encoder state vector required for decoding and
504 // packetization. It is queried now from |converter_| then set on |file_|
505 // after initialization.
506 UInt32 cookie_size;
507 if (AudioConverterGetPropertyInfo(converter_,
508 kAudioConverterCompressionMagicCookie,
509 &cookie_size,
510 nullptr) != noErr) {
511 return false;
513 scoped_ptr<uint8[]> cookie_data(new uint8[cookie_size]);
514 if (AudioConverterGetProperty(converter_,
515 kAudioConverterCompressionMagicCookie,
516 &cookie_size,
517 cookie_data.get()) != noErr) {
518 return false;
521 if (AudioFileInitializeWithCallbacks(this,
522 nullptr,
523 &FileWriteCallback,
524 nullptr,
525 nullptr,
526 kAudioFileAAC_ADTSType,
527 &out_asbd,
529 &file_) != noErr) {
530 return false;
533 if (AudioFileSetProperty(file_,
534 kAudioFilePropertyMagicCookieData,
535 cookie_size,
536 cookie_data.get()) != noErr) {
537 return false;
540 // Initially the input bus points to the input buffer. See the comment on
541 // |input_bus_| for more on this optimization.
542 input_bus_->set_frames(kAccessUnitSamples);
543 for (int ch = 0; ch < input_buffer_->channels(); ++ch) {
544 input_bus_->SetChannelData(ch, input_buffer_->channel(ch));
547 return true;
550 void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
551 int source_offset,
552 int buffer_fill_offset,
553 int num_samples) final {
554 DCHECK_EQ(audio_bus->channels(), input_buffer_->channels());
556 // See the comment on |input_bus_| for more on this optimization. Note that
557 // we cannot elide the copy if the source offset would result in an
558 // unaligned pointer.
559 if (num_samples == kAccessUnitSamples &&
560 source_offset * sizeof(float) % AudioBus::kChannelAlignment == 0) {
561 DCHECK_EQ(buffer_fill_offset, 0);
562 for (int ch = 0; ch < audio_bus->channels(); ++ch) {
563 auto samples = const_cast<float*>(audio_bus->channel(ch));
564 input_bus_->SetChannelData(ch, samples + source_offset);
566 return;
569 // Copy the samples into the input buffer.
570 DCHECK_EQ(input_bus_->channel(0), input_buffer_->channel(0));
571 audio_bus->CopyPartialFramesTo(
572 source_offset, num_samples, buffer_fill_offset, input_buffer_.get());
575 bool EncodeFromFilledBuffer(std::string* out) final {
576 // Reset the buffer size field to the buffer capacity.
577 converter_abl_.mBuffers[0].mDataByteSize = max_access_unit_size_;
579 // Encode the current input buffer. This is a sychronous call.
580 OSStatus oserr;
581 UInt32 io_num_packets = 1;
582 AudioStreamPacketDescription packet_description;
583 oserr = AudioConverterFillComplexBuffer(converter_,
584 &ConverterFillDataCallback,
585 this,
586 &io_num_packets,
587 &converter_abl_,
588 &packet_description);
589 if (oserr != noErr || io_num_packets == 0) {
590 return false;
593 // Reserve space in the output buffer to write the packet.
594 out->reserve(packet_description.mDataByteSize + kAdtsHeaderSize);
596 // Set the current output buffer and emit an ADTS-wrapped AAC access unit.
597 // This is a synchronous call. After it returns, reset the output buffer.
598 output_buffer_ = out;
599 oserr = AudioFileWritePackets(file_,
600 false,
601 converter_abl_.mBuffers[0].mDataByteSize,
602 &packet_description,
603 num_access_units_,
604 &io_num_packets,
605 converter_abl_.mBuffers[0].mData);
606 output_buffer_ = nullptr;
607 if (oserr != noErr || io_num_packets == 0) {
608 return false;
610 num_access_units_ += io_num_packets;
611 return true;
614 // The |AudioConverterFillComplexBuffer| input callback function. Configures
615 // the provided |AudioBufferList| to alias |input_bus_|. The implementation
616 // can only supply |kAccessUnitSamples| samples as a result of not copying
617 // samples or tracking read and write positions. Note that this function is
618 // called synchronously by |AudioConverterFillComplexBuffer|.
619 static OSStatus ConverterFillDataCallback(
620 AudioConverterRef in_converter,
621 UInt32* io_num_packets,
622 AudioBufferList* io_data,
623 AudioStreamPacketDescription** out_packet_desc,
624 void* in_encoder) {
625 DCHECK(in_encoder);
626 auto encoder = reinterpret_cast<AppleAacImpl*>(in_encoder);
627 auto input_buffer = encoder->input_buffer_.get();
628 auto input_bus = encoder->input_bus_.get();
630 DCHECK_EQ(static_cast<int>(*io_num_packets), kAccessUnitSamples);
631 DCHECK_EQ(io_data->mNumberBuffers,
632 static_cast<unsigned>(input_bus->channels()));
633 for (int i_buf = 0, end = io_data->mNumberBuffers; i_buf < end; ++i_buf) {
634 io_data->mBuffers[i_buf].mNumberChannels = 1;
635 io_data->mBuffers[i_buf].mDataByteSize = sizeof(float) * *io_num_packets;
636 io_data->mBuffers[i_buf].mData = input_bus->channel(i_buf);
638 // Reset the input bus back to the input buffer. See the comment on
639 // |input_bus_| for more on this optimization.
640 input_bus->SetChannelData(i_buf, input_buffer->channel(i_buf));
642 return noErr;
645 // The AudioFile write callback function. Appends the data to the encoder's
646 // current |output_buffer_|.
647 static OSStatus FileWriteCallback(void* in_encoder,
648 SInt64 in_position,
649 UInt32 in_size,
650 const void* in_buffer,
651 UInt32* out_size) {
652 DCHECK(in_encoder);
653 DCHECK(in_buffer);
654 auto encoder = reinterpret_cast<const AppleAacImpl*>(in_encoder);
655 auto buffer = reinterpret_cast<const std::string::value_type*>(in_buffer);
657 std::string* const output_buffer = encoder->output_buffer_;
658 DCHECK(output_buffer);
660 output_buffer->append(buffer, in_size);
661 *out_size = in_size;
662 return noErr;
665 // Buffer that holds one AAC access unit worth of samples. The input callback
666 // function provides samples from this buffer via |input_bus_| to the encoder.
667 const scoped_ptr<AudioBus> input_buffer_;
669 // Wrapper AudioBus used by the input callback function. Normally it wraps
670 // |input_buffer_|. However, as an optimization when the client submits a
671 // buffer containing exactly one access unit worth of samples, the bus is
672 // redirected to the client buffer temporarily. We know that the base
673 // implementation will call us right after to encode the buffer and thus we
674 // can eliminate the copy into |input_buffer_|.
675 const scoped_ptr<AudioBus> input_bus_;
677 // A buffer that holds one AAC access unit. Initialized in |Initialize| once
678 // the maximum access unit size is known.
679 const scoped_ptr<uint8[]> access_unit_buffer_;
681 // The maximum size of an access unit that the encoder can emit.
682 const uint32_t max_access_unit_size_;
684 // A temporary pointer to the current output buffer. Only non-null when
685 // writing an access unit. Accessed by the AudioFile write callback function.
686 std::string* output_buffer_;
688 // The |AudioConverter| is responsible for AAC encoding. This is a Core Audio
689 // object, not to be confused with |media::AudioConverter|.
690 AudioConverterRef converter_;
692 // The |AudioFile| is responsible for ADTS packetization.
693 AudioFileID file_;
695 // An |AudioBufferList| passed to the converter to store encoded samples.
696 AudioBufferList converter_abl_;
698 // The number of access units emitted so far by the encoder.
699 uint64_t num_access_units_;
701 // On iOS, audio codecs can be interrupted by other services (such as an
702 // audio alert or phone call). Depending on the underlying hardware and
703 // configuration, the codec may have to be thrown away and re-initialized
704 // after such an interruption. This flag tracks if we can resume or not from
705 // such an interruption. It is initialized to true, which is the only possible
706 // value on OS X and on most modern iOS hardware.
707 // TODO(jfroy): Implement encoder re-initialization after interruption.
708 // https://crbug.com/424787
709 const bool can_resume_;
711 DISALLOW_COPY_AND_ASSIGN(AppleAacImpl);
713 #endif // defined(OS_MACOSX)
715 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
716 public:
717 Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
718 int num_channels,
719 int sampling_rate,
720 const FrameEncodedCallback& callback)
721 : ImplBase(cast_environment,
722 CODEC_AUDIO_PCM16,
723 num_channels,
724 sampling_rate,
725 sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
726 callback),
727 buffer_(new int16[num_channels * samples_per_frame_]) {
728 if (ImplBase::operational_status_ != STATUS_UNINITIALIZED)
729 return;
730 operational_status_ = STATUS_INITIALIZED;
733 private:
734 ~Pcm16Impl() final {}
736 void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
737 int source_offset,
738 int buffer_fill_offset,
739 int num_samples) final {
740 audio_bus->ToInterleavedPartial(
741 source_offset,
742 num_samples,
743 sizeof(int16),
744 buffer_.get() + buffer_fill_offset * num_channels_);
747 bool EncodeFromFilledBuffer(std::string* out) final {
748 // Output 16-bit PCM integers in big-endian byte order.
749 out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
750 const int16* src = buffer_.get();
751 const int16* const src_end = src + num_channels_ * samples_per_frame_;
752 uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
753 for (; src < src_end; ++src, ++dest)
754 *dest = base::HostToNet16(*src);
755 return true;
758 private:
759 const scoped_ptr<int16[]> buffer_;
761 DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
764 AudioEncoder::AudioEncoder(
765 const scoped_refptr<CastEnvironment>& cast_environment,
766 int num_channels,
767 int sampling_rate,
768 int bitrate,
769 Codec codec,
770 const FrameEncodedCallback& frame_encoded_callback)
771 : cast_environment_(cast_environment) {
772 // Note: It doesn't matter which thread constructs AudioEncoder, just so long
773 // as all calls to InsertAudio() are by the same thread.
774 insert_thread_checker_.DetachFromThread();
775 switch (codec) {
776 #if !defined(OS_IOS)
777 case CODEC_AUDIO_OPUS:
778 impl_ = new OpusImpl(cast_environment,
779 num_channels,
780 sampling_rate,
781 bitrate,
782 frame_encoded_callback);
783 break;
784 #endif
785 #if defined(OS_MACOSX)
786 case CODEC_AUDIO_AAC:
787 impl_ = new AppleAacImpl(cast_environment,
788 num_channels,
789 sampling_rate,
790 bitrate,
791 frame_encoded_callback);
792 break;
793 #endif // defined(OS_MACOSX)
794 case CODEC_AUDIO_PCM16:
795 impl_ = new Pcm16Impl(cast_environment,
796 num_channels,
797 sampling_rate,
798 frame_encoded_callback);
799 break;
800 default:
801 NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
802 break;
806 AudioEncoder::~AudioEncoder() {}
808 OperationalStatus AudioEncoder::InitializationResult() const {
809 DCHECK(insert_thread_checker_.CalledOnValidThread());
810 if (impl_.get()) {
811 return impl_->InitializationResult();
813 return STATUS_UNSUPPORTED_CODEC;
816 int AudioEncoder::GetSamplesPerFrame() const {
817 DCHECK(insert_thread_checker_.CalledOnValidThread());
818 if (InitializationResult() != STATUS_INITIALIZED) {
819 NOTREACHED();
820 return std::numeric_limits<int>::max();
822 return impl_->samples_per_frame();
825 base::TimeDelta AudioEncoder::GetFrameDuration() const {
826 DCHECK(insert_thread_checker_.CalledOnValidThread());
827 if (InitializationResult() != STATUS_INITIALIZED) {
828 NOTREACHED();
829 return base::TimeDelta();
831 return impl_->frame_duration();
834 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
835 const base::TimeTicks& recorded_time) {
836 DCHECK(insert_thread_checker_.CalledOnValidThread());
837 DCHECK(audio_bus.get());
838 if (InitializationResult() != STATUS_INITIALIZED) {
839 NOTREACHED();
840 return;
842 cast_environment_->PostTask(CastEnvironment::AUDIO,
843 FROM_HERE,
844 base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
845 impl_,
846 base::Passed(&audio_bus),
847 recorded_time));
850 } // namespace cast
851 } // namespace media