Updating XTBs based on .GRDs from branch master
[chromium-blink-merge.git] / media / cast / sender / audio_encoder.cc
blob4173ff091b20f3adc8cf9c35cd29db1eb389a16a
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/cast/sender/audio_encoder.h"
7 #include <algorithm>
8 #include <limits>
9 #include <string>
11 #include "base/bind.h"
12 #include "base/bind_helpers.h"
13 #include "base/location.h"
14 #include "base/stl_util.h"
15 #include "base/sys_byteorder.h"
16 #include "base/time/time.h"
17 #include "media/cast/cast_defines.h"
19 #if !defined(OS_IOS)
20 #include "third_party/opus/src/include/opus.h"
21 #endif
23 #if defined(OS_MACOSX)
24 #include <AudioToolbox/AudioToolbox.h>
25 #endif
27 namespace media {
28 namespace cast {
30 namespace {
32 const int kUnderrunSkipThreshold = 3;
33 const int kDefaultFramesPerSecond = 100;
35 } // namespace
37 // Base class that handles the common problem of feeding one or more AudioBus'
38 // data into a buffer and then, once the buffer is full, encoding the signal and
39 // emitting a SenderEncodedFrame via the FrameEncodedCallback.
41 // Subclasses complete the implementation by handling the actual encoding
42 // details.
43 class AudioEncoder::ImplBase
44 : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
45 public:
46 ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
47 Codec codec,
48 int num_channels,
49 int sampling_rate,
50 int samples_per_frame,
51 const FrameEncodedCallback& callback)
52 : cast_environment_(cast_environment),
53 codec_(codec),
54 num_channels_(num_channels),
55 samples_per_frame_(samples_per_frame),
56 callback_(callback),
57 operational_status_(STATUS_UNINITIALIZED),
58 frame_duration_(base::TimeDelta::FromMicroseconds(
59 base::Time::kMicrosecondsPerSecond * samples_per_frame_ /
60 sampling_rate)),
61 buffer_fill_end_(0),
62 frame_id_(0),
63 frame_rtp_timestamp_(0),
64 samples_dropped_from_buffer_(0) {
65 // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
66 const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
67 if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
68 frame_duration_ == base::TimeDelta() ||
69 samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
70 operational_status_ = STATUS_INVALID_CONFIGURATION;
74 OperationalStatus InitializationResult() const {
75 return operational_status_;
78 int samples_per_frame() const {
79 return samples_per_frame_;
82 base::TimeDelta frame_duration() const { return frame_duration_; }
84 void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
85 const base::TimeTicks& recorded_time) {
86 DCHECK_EQ(operational_status_, STATUS_INITIALIZED);
87 DCHECK(!recorded_time.is_null());
89 // Determine whether |recorded_time| is consistent with the amount of audio
90 // data having been processed in the past. Resolve the underrun problem by
91 // dropping data from the internal buffer and skipping ahead the next
92 // frame's RTP timestamp by the estimated number of frames missed. On the
93 // other hand, don't attempt to resolve overruns: A receiver should
94 // gracefully deal with an excess of audio data.
95 base::TimeDelta buffer_fill_duration =
96 buffer_fill_end_ * frame_duration_ / samples_per_frame_;
97 if (!frame_capture_time_.is_null()) {
98 const base::TimeDelta amount_ahead_by =
99 recorded_time - (frame_capture_time_ + buffer_fill_duration);
100 const int64 num_frames_missed = amount_ahead_by / frame_duration_;
101 if (num_frames_missed > kUnderrunSkipThreshold) {
102 samples_dropped_from_buffer_ += buffer_fill_end_;
103 buffer_fill_end_ = 0;
104 buffer_fill_duration = base::TimeDelta();
105 frame_rtp_timestamp_ +=
106 static_cast<uint32>(num_frames_missed * samples_per_frame_);
107 DVLOG(1) << "Skipping RTP timestamp ahead to account for "
108 << num_frames_missed * samples_per_frame_
109 << " samples' worth of underrun.";
112 frame_capture_time_ = recorded_time - buffer_fill_duration;
114 // Encode all audio in |audio_bus| into zero or more frames.
115 int src_pos = 0;
116 while (src_pos < audio_bus->frames()) {
117 // Note: This is used to compute the deadline utilization and so it uses
118 // the real-world clock instead of the CastEnvironment clock, the latter
119 // of which might be simulated.
120 const base::TimeTicks start_time = base::TimeTicks::Now();
122 const int num_samples_to_xfer = std::min(
123 samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
124 DCHECK_EQ(audio_bus->channels(), num_channels_);
125 TransferSamplesIntoBuffer(
126 audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
127 src_pos += num_samples_to_xfer;
128 buffer_fill_end_ += num_samples_to_xfer;
130 if (buffer_fill_end_ < samples_per_frame_)
131 break;
133 scoped_ptr<SenderEncodedFrame> audio_frame(
134 new SenderEncodedFrame());
135 audio_frame->dependency = EncodedFrame::KEY;
136 audio_frame->frame_id = frame_id_;
137 audio_frame->referenced_frame_id = frame_id_;
138 audio_frame->rtp_timestamp = frame_rtp_timestamp_;
139 audio_frame->reference_time = frame_capture_time_;
141 if (EncodeFromFilledBuffer(&audio_frame->data)) {
142 // Compute deadline utilization as the real-world time elapsed divided
143 // by the signal duration.
144 audio_frame->deadline_utilization =
145 (base::TimeTicks::Now() - start_time).InSecondsF() /
146 frame_duration_.InSecondsF();
148 cast_environment_->PostTask(
149 CastEnvironment::MAIN,
150 FROM_HERE,
151 base::Bind(callback_,
152 base::Passed(&audio_frame),
153 samples_dropped_from_buffer_));
154 samples_dropped_from_buffer_ = 0;
157 // Reset the internal buffer, frame ID, and timestamps for the next frame.
158 buffer_fill_end_ = 0;
159 ++frame_id_;
160 frame_rtp_timestamp_ += samples_per_frame_;
161 frame_capture_time_ += frame_duration_;
165 protected:
166 friend class base::RefCountedThreadSafe<ImplBase>;
167 virtual ~ImplBase() {}
169 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
170 int source_offset,
171 int buffer_fill_offset,
172 int num_samples) = 0;
173 virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
175 const scoped_refptr<CastEnvironment> cast_environment_;
176 const Codec codec_;
177 const int num_channels_;
178 const int samples_per_frame_;
179 const FrameEncodedCallback callback_;
181 // Subclass' ctor is expected to set this to STATUS_INITIALIZED.
182 OperationalStatus operational_status_;
184 // The duration of one frame of encoded audio samples. Derived from
185 // |samples_per_frame_| and the sampling rate.
186 const base::TimeDelta frame_duration_;
188 private:
189 // In the case where a call to EncodeAudio() cannot completely fill the
190 // buffer, this points to the position at which to populate data in a later
191 // call.
192 int buffer_fill_end_;
194 // A counter used to label EncodedFrames.
195 uint32 frame_id_;
197 // The RTP timestamp for the next frame of encoded audio. This is defined as
198 // the number of audio samples encoded so far, plus the estimated number of
199 // samples that were missed due to data underruns. A receiver uses this value
200 // to detect gaps in the audio signal data being provided. Per the spec, RTP
201 // timestamp values are allowed to overflow and roll around past zero.
202 uint32 frame_rtp_timestamp_;
204 // The local system time associated with the start of the next frame of
205 // encoded audio. This value is passed on to a receiver as a reference clock
206 // timestamp for the purposes of synchronizing audio and video. Its
207 // progression is expected to drift relative to the elapsed time implied by
208 // the RTP timestamps.
209 base::TimeTicks frame_capture_time_;
211 // Set to non-zero to indicate the next output frame skipped over audio
212 // samples in order to recover from an input underrun.
213 int samples_dropped_from_buffer_;
215 DISALLOW_COPY_AND_ASSIGN(ImplBase);
218 #if !defined(OS_IOS)
219 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
220 public:
221 OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
222 int num_channels,
223 int sampling_rate,
224 int bitrate,
225 const FrameEncodedCallback& callback)
226 : ImplBase(cast_environment,
227 CODEC_AUDIO_OPUS,
228 num_channels,
229 sampling_rate,
230 sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
231 callback),
232 encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
233 opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
234 buffer_(new float[num_channels * samples_per_frame_]) {
235 if (ImplBase::operational_status_ != STATUS_UNINITIALIZED ||
236 sampling_rate % samples_per_frame_ != 0 ||
237 !IsValidFrameDuration(frame_duration_)) {
238 return;
240 if (opus_encoder_init(opus_encoder_,
241 sampling_rate,
242 num_channels,
243 OPUS_APPLICATION_AUDIO) != OPUS_OK) {
244 ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
245 return;
247 ImplBase::operational_status_ = STATUS_INITIALIZED;
249 if (bitrate <= 0) {
250 // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
251 // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
252 // frame size. The opus library authors may, of course, adjust this in
253 // later versions.
254 bitrate = OPUS_AUTO;
256 CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
257 OPUS_OK);
260 private:
261 ~OpusImpl() final {}
263 void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
264 int source_offset,
265 int buffer_fill_offset,
266 int num_samples) final {
267 // Opus requires channel-interleaved samples in a single array.
268 for (int ch = 0; ch < audio_bus->channels(); ++ch) {
269 const float* src = audio_bus->channel(ch) + source_offset;
270 const float* const src_end = src + num_samples;
271 float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
272 for (; src < src_end; ++src, dest += num_channels_)
273 *dest = *src;
277 bool EncodeFromFilledBuffer(std::string* out) final {
278 out->resize(kOpusMaxPayloadSize);
279 const opus_int32 result =
280 opus_encode_float(opus_encoder_,
281 buffer_.get(),
282 samples_per_frame_,
283 reinterpret_cast<uint8*>(string_as_array(out)),
284 kOpusMaxPayloadSize);
285 if (result > 1) {
286 out->resize(result);
287 return true;
288 } else if (result < 0) {
289 LOG(ERROR) << "Error code from opus_encode_float(): " << result;
290 return false;
291 } else {
292 // Do nothing: The documentation says that a return value of zero or
293 // one byte means the packet does not need to be transmitted.
294 return false;
298 static bool IsValidFrameDuration(base::TimeDelta duration) {
299 // See https://tools.ietf.org/html/rfc6716#section-2.1.4
300 return duration == base::TimeDelta::FromMicroseconds(2500) ||
301 duration == base::TimeDelta::FromMilliseconds(5) ||
302 duration == base::TimeDelta::FromMilliseconds(10) ||
303 duration == base::TimeDelta::FromMilliseconds(20) ||
304 duration == base::TimeDelta::FromMilliseconds(40) ||
305 duration == base::TimeDelta::FromMilliseconds(60);
308 const scoped_ptr<uint8[]> encoder_memory_;
309 OpusEncoder* const opus_encoder_;
310 const scoped_ptr<float[]> buffer_;
312 // This is the recommended value, according to documentation in
313 // third_party/opus/src/include/opus.h, so that the Opus encoder does not
314 // degrade the audio due to memory constraints.
316 // Note: Whereas other RTP implementations do not, the cast library is
317 // perfectly capable of transporting larger than MTU-sized audio frames.
318 static const int kOpusMaxPayloadSize = 4000;
320 DISALLOW_COPY_AND_ASSIGN(OpusImpl);
322 #endif
324 #if defined(OS_MACOSX)
325 class AudioEncoder::AppleAacImpl : public AudioEncoder::ImplBase {
326 // AAC-LC has two access unit sizes (960 and 1024). The Apple encoder only
327 // supports the latter.
328 static const int kAccessUnitSamples = 1024;
330 // Size of an ADTS header (w/o checksum). See
331 // http://wiki.multimedia.cx/index.php?title=ADTS
332 static const int kAdtsHeaderSize = 7;
334 public:
335 AppleAacImpl(const scoped_refptr<CastEnvironment>& cast_environment,
336 int num_channels,
337 int sampling_rate,
338 int bitrate,
339 const FrameEncodedCallback& callback)
340 : ImplBase(cast_environment,
341 CODEC_AUDIO_AAC,
342 num_channels,
343 sampling_rate,
344 kAccessUnitSamples,
345 callback),
346 input_buffer_(AudioBus::Create(num_channels, kAccessUnitSamples)),
347 input_bus_(AudioBus::CreateWrapper(num_channels)),
348 max_access_unit_size_(0),
349 output_buffer_(nullptr),
350 converter_(nullptr),
351 file_(nullptr),
352 num_access_units_(0),
353 can_resume_(true) {
354 if (ImplBase::operational_status_ != STATUS_UNINITIALIZED) {
355 return;
357 if (!Initialize(sampling_rate, bitrate)) {
358 ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
359 return;
361 ImplBase::operational_status_ = STATUS_INITIALIZED;
364 private:
365 ~AppleAacImpl() final { Teardown(); }
367 // Destroys the existing audio converter and file, if any.
368 void Teardown() {
369 if (converter_) {
370 AudioConverterDispose(converter_);
371 converter_ = nullptr;
373 if (file_) {
374 AudioFileClose(file_);
375 file_ = nullptr;
379 // Initializes the audio converter and file. Calls Teardown to destroy any
380 // existing state. This is so that Initialize() may be called to setup another
381 // converter after a non-resumable interruption.
382 bool Initialize(int sampling_rate, int bitrate) {
383 // Teardown previous audio converter and file.
384 Teardown();
386 // Input data comes from AudioBus objects, which carry non-interleaved
387 // packed native-endian float samples. Note that in Core Audio, a frame is
388 // one sample across all channels at a given point in time. When describing
389 // a non-interleaved samples format, the "per frame" fields mean "per
390 // channel" or "per stream", with the exception of |mChannelsPerFrame|. For
391 // uncompressed formats, one packet contains one frame.
392 AudioStreamBasicDescription in_asbd;
393 in_asbd.mSampleRate = sampling_rate;
394 in_asbd.mFormatID = kAudioFormatLinearPCM;
395 in_asbd.mFormatFlags =
396 kAudioFormatFlagsNativeFloatPacked | kAudioFormatFlagIsNonInterleaved;
397 in_asbd.mChannelsPerFrame = num_channels_;
398 in_asbd.mBitsPerChannel = sizeof(float) * 8;
399 in_asbd.mFramesPerPacket = 1;
400 in_asbd.mBytesPerPacket = in_asbd.mBytesPerFrame = sizeof(float);
401 in_asbd.mReserved = 0;
403 // Request AAC-LC encoding, with no downmixing or downsampling.
404 AudioStreamBasicDescription out_asbd;
405 memset(&out_asbd, 0, sizeof(AudioStreamBasicDescription));
406 out_asbd.mSampleRate = sampling_rate;
407 out_asbd.mFormatID = kAudioFormatMPEG4AAC;
408 out_asbd.mChannelsPerFrame = num_channels_;
409 UInt32 prop_size = sizeof(out_asbd);
410 if (AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
412 nullptr,
413 &prop_size,
414 &out_asbd) != noErr) {
415 return false;
418 if (AudioConverterNew(&in_asbd, &out_asbd, &converter_) != noErr) {
419 return false;
422 // The converter will fully specify the output format and update the
423 // relevant fields of the structure, which we can now query.
424 prop_size = sizeof(out_asbd);
425 if (AudioConverterGetProperty(converter_,
426 kAudioConverterCurrentOutputStreamDescription,
427 &prop_size,
428 &out_asbd) != noErr) {
429 return false;
432 // If bitrate is <= 0, allow the encoder to pick a suitable value.
433 // Otherwise, set the bitrate (which can fail if the value is not suitable
434 // or compatible with the output sampling rate or channels).
435 if (bitrate > 0) {
436 prop_size = sizeof(int);
437 if (AudioConverterSetProperty(
438 converter_, kAudioConverterEncodeBitRate, prop_size, &bitrate) !=
439 noErr) {
440 return false;
444 #if defined(OS_IOS)
445 // See the comment next to |can_resume_| for details on resumption. Some
446 // converters can return kAudioConverterErr_PropertyNotSupported, in which
447 // case resumption is implicitly supported. This is the only location where
448 // the implementation modifies |can_resume_|.
449 uint32_t can_resume;
450 prop_size = sizeof(can_resume);
451 OSStatus oserr = AudioConverterGetProperty(
452 converter_,
453 kAudioConverterPropertyCanResumeFromInterruption,
454 &prop_size,
455 &can_resume);
456 if (oserr == noErr) {
457 const_cast<bool&>(can_resume_) = can_resume != 0;
459 #endif
461 // Figure out the maximum size of an access unit that the encoder can
462 // produce. |mBytesPerPacket| will be 0 for variable size configurations,
463 // in which case we must query the value.
464 uint32_t max_access_unit_size = out_asbd.mBytesPerPacket;
465 if (max_access_unit_size == 0) {
466 prop_size = sizeof(max_access_unit_size);
467 if (AudioConverterGetProperty(
468 converter_,
469 kAudioConverterPropertyMaximumOutputPacketSize,
470 &prop_size,
471 &max_access_unit_size) != noErr) {
472 return false;
476 // This is the only location where the implementation modifies
477 // |max_access_unit_size_|.
478 const_cast<uint32_t&>(max_access_unit_size_) = max_access_unit_size;
480 // Allocate a buffer to store one access unit. This is the only location
481 // where the implementation modifies |access_unit_buffer_|.
482 const_cast<scoped_ptr<uint8[]>&>(access_unit_buffer_)
483 .reset(new uint8[max_access_unit_size]);
485 // Initialize the converter ABL. Note that the buffer size has to be set
486 // before every encode operation, since the field is modified to indicate
487 // the size of the output data (on input it indicates the buffer capacity).
488 converter_abl_.mNumberBuffers = 1;
489 converter_abl_.mBuffers[0].mNumberChannels = num_channels_;
490 converter_abl_.mBuffers[0].mData = access_unit_buffer_.get();
492 // The "magic cookie" is an encoder state vector required for decoding and
493 // packetization. It is queried now from |converter_| then set on |file_|
494 // after initialization.
495 UInt32 cookie_size;
496 if (AudioConverterGetPropertyInfo(converter_,
497 kAudioConverterCompressionMagicCookie,
498 &cookie_size,
499 nullptr) != noErr) {
500 return false;
502 scoped_ptr<uint8[]> cookie_data(new uint8[cookie_size]);
503 if (AudioConverterGetProperty(converter_,
504 kAudioConverterCompressionMagicCookie,
505 &cookie_size,
506 cookie_data.get()) != noErr) {
507 return false;
510 if (AudioFileInitializeWithCallbacks(this,
511 nullptr,
512 &FileWriteCallback,
513 nullptr,
514 nullptr,
515 kAudioFileAAC_ADTSType,
516 &out_asbd,
518 &file_) != noErr) {
519 return false;
522 if (AudioFileSetProperty(file_,
523 kAudioFilePropertyMagicCookieData,
524 cookie_size,
525 cookie_data.get()) != noErr) {
526 return false;
529 // Initially the input bus points to the input buffer. See the comment on
530 // |input_bus_| for more on this optimization.
531 input_bus_->set_frames(kAccessUnitSamples);
532 for (int ch = 0; ch < input_buffer_->channels(); ++ch) {
533 input_bus_->SetChannelData(ch, input_buffer_->channel(ch));
536 return true;
539 void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
540 int source_offset,
541 int buffer_fill_offset,
542 int num_samples) final {
543 DCHECK_EQ(audio_bus->channels(), input_buffer_->channels());
545 // See the comment on |input_bus_| for more on this optimization. Note that
546 // we cannot elide the copy if the source offset would result in an
547 // unaligned pointer.
548 if (num_samples == kAccessUnitSamples &&
549 source_offset * sizeof(float) % AudioBus::kChannelAlignment == 0) {
550 DCHECK_EQ(buffer_fill_offset, 0);
551 for (int ch = 0; ch < audio_bus->channels(); ++ch) {
552 auto samples = const_cast<float*>(audio_bus->channel(ch));
553 input_bus_->SetChannelData(ch, samples + source_offset);
555 return;
558 // Copy the samples into the input buffer.
559 DCHECK_EQ(input_bus_->channel(0), input_buffer_->channel(0));
560 audio_bus->CopyPartialFramesTo(
561 source_offset, num_samples, buffer_fill_offset, input_buffer_.get());
564 bool EncodeFromFilledBuffer(std::string* out) final {
565 // Reset the buffer size field to the buffer capacity.
566 converter_abl_.mBuffers[0].mDataByteSize = max_access_unit_size_;
568 // Encode the current input buffer. This is a sychronous call.
569 OSStatus oserr;
570 UInt32 io_num_packets = 1;
571 AudioStreamPacketDescription packet_description;
572 oserr = AudioConverterFillComplexBuffer(converter_,
573 &ConverterFillDataCallback,
574 this,
575 &io_num_packets,
576 &converter_abl_,
577 &packet_description);
578 if (oserr != noErr || io_num_packets == 0) {
579 return false;
582 // Reserve space in the output buffer to write the packet.
583 out->reserve(packet_description.mDataByteSize + kAdtsHeaderSize);
585 // Set the current output buffer and emit an ADTS-wrapped AAC access unit.
586 // This is a synchronous call. After it returns, reset the output buffer.
587 output_buffer_ = out;
588 oserr = AudioFileWritePackets(file_,
589 false,
590 converter_abl_.mBuffers[0].mDataByteSize,
591 &packet_description,
592 num_access_units_,
593 &io_num_packets,
594 converter_abl_.mBuffers[0].mData);
595 output_buffer_ = nullptr;
596 if (oserr != noErr || io_num_packets == 0) {
597 return false;
599 num_access_units_ += io_num_packets;
600 return true;
603 // The |AudioConverterFillComplexBuffer| input callback function. Configures
604 // the provided |AudioBufferList| to alias |input_bus_|. The implementation
605 // can only supply |kAccessUnitSamples| samples as a result of not copying
606 // samples or tracking read and write positions. Note that this function is
607 // called synchronously by |AudioConverterFillComplexBuffer|.
608 static OSStatus ConverterFillDataCallback(
609 AudioConverterRef in_converter,
610 UInt32* io_num_packets,
611 AudioBufferList* io_data,
612 AudioStreamPacketDescription** out_packet_desc,
613 void* in_encoder) {
614 DCHECK(in_encoder);
615 auto encoder = reinterpret_cast<AppleAacImpl*>(in_encoder);
616 auto input_buffer = encoder->input_buffer_.get();
617 auto input_bus = encoder->input_bus_.get();
619 DCHECK_EQ(static_cast<int>(*io_num_packets), kAccessUnitSamples);
620 DCHECK_EQ(io_data->mNumberBuffers,
621 static_cast<unsigned>(input_bus->channels()));
622 for (int i_buf = 0, end = io_data->mNumberBuffers; i_buf < end; ++i_buf) {
623 io_data->mBuffers[i_buf].mNumberChannels = 1;
624 io_data->mBuffers[i_buf].mDataByteSize = sizeof(float) * *io_num_packets;
625 io_data->mBuffers[i_buf].mData = input_bus->channel(i_buf);
627 // Reset the input bus back to the input buffer. See the comment on
628 // |input_bus_| for more on this optimization.
629 input_bus->SetChannelData(i_buf, input_buffer->channel(i_buf));
631 return noErr;
634 // The AudioFile write callback function. Appends the data to the encoder's
635 // current |output_buffer_|.
636 static OSStatus FileWriteCallback(void* in_encoder,
637 SInt64 in_position,
638 UInt32 in_size,
639 const void* in_buffer,
640 UInt32* out_size) {
641 DCHECK(in_encoder);
642 DCHECK(in_buffer);
643 auto encoder = reinterpret_cast<const AppleAacImpl*>(in_encoder);
644 auto buffer = reinterpret_cast<const std::string::value_type*>(in_buffer);
646 std::string* const output_buffer = encoder->output_buffer_;
647 DCHECK(output_buffer);
649 output_buffer->append(buffer, in_size);
650 *out_size = in_size;
651 return noErr;
654 // Buffer that holds one AAC access unit worth of samples. The input callback
655 // function provides samples from this buffer via |input_bus_| to the encoder.
656 const scoped_ptr<AudioBus> input_buffer_;
658 // Wrapper AudioBus used by the input callback function. Normally it wraps
659 // |input_buffer_|. However, as an optimization when the client submits a
660 // buffer containing exactly one access unit worth of samples, the bus is
661 // redirected to the client buffer temporarily. We know that the base
662 // implementation will call us right after to encode the buffer and thus we
663 // can eliminate the copy into |input_buffer_|.
664 const scoped_ptr<AudioBus> input_bus_;
666 // A buffer that holds one AAC access unit. Initialized in |Initialize| once
667 // the maximum access unit size is known.
668 const scoped_ptr<uint8[]> access_unit_buffer_;
670 // The maximum size of an access unit that the encoder can emit.
671 const uint32_t max_access_unit_size_;
673 // A temporary pointer to the current output buffer. Only non-null when
674 // writing an access unit. Accessed by the AudioFile write callback function.
675 std::string* output_buffer_;
677 // The |AudioConverter| is responsible for AAC encoding. This is a Core Audio
678 // object, not to be confused with |media::AudioConverter|.
679 AudioConverterRef converter_;
681 // The |AudioFile| is responsible for ADTS packetization.
682 AudioFileID file_;
684 // An |AudioBufferList| passed to the converter to store encoded samples.
685 AudioBufferList converter_abl_;
687 // The number of access units emitted so far by the encoder.
688 uint64_t num_access_units_;
690 // On iOS, audio codecs can be interrupted by other services (such as an
691 // audio alert or phone call). Depending on the underlying hardware and
692 // configuration, the codec may have to be thrown away and re-initialized
693 // after such an interruption. This flag tracks if we can resume or not from
694 // such an interruption. It is initialized to true, which is the only possible
695 // value on OS X and on most modern iOS hardware.
696 // TODO(jfroy): Implement encoder re-initialization after interruption.
697 // https://crbug.com/424787
698 const bool can_resume_;
700 DISALLOW_COPY_AND_ASSIGN(AppleAacImpl);
702 #endif // defined(OS_MACOSX)
704 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
705 public:
706 Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
707 int num_channels,
708 int sampling_rate,
709 const FrameEncodedCallback& callback)
710 : ImplBase(cast_environment,
711 CODEC_AUDIO_PCM16,
712 num_channels,
713 sampling_rate,
714 sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
715 callback),
716 buffer_(new int16[num_channels * samples_per_frame_]) {
717 if (ImplBase::operational_status_ != STATUS_UNINITIALIZED)
718 return;
719 operational_status_ = STATUS_INITIALIZED;
722 private:
723 ~Pcm16Impl() final {}
725 void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
726 int source_offset,
727 int buffer_fill_offset,
728 int num_samples) final {
729 audio_bus->ToInterleavedPartial(
730 source_offset,
731 num_samples,
732 sizeof(int16),
733 buffer_.get() + buffer_fill_offset * num_channels_);
736 bool EncodeFromFilledBuffer(std::string* out) final {
737 // Output 16-bit PCM integers in big-endian byte order.
738 out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
739 const int16* src = buffer_.get();
740 const int16* const src_end = src + num_channels_ * samples_per_frame_;
741 uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
742 for (; src < src_end; ++src, ++dest)
743 *dest = base::HostToNet16(*src);
744 return true;
747 private:
748 const scoped_ptr<int16[]> buffer_;
750 DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
753 AudioEncoder::AudioEncoder(
754 const scoped_refptr<CastEnvironment>& cast_environment,
755 int num_channels,
756 int sampling_rate,
757 int bitrate,
758 Codec codec,
759 const FrameEncodedCallback& frame_encoded_callback)
760 : cast_environment_(cast_environment) {
761 // Note: It doesn't matter which thread constructs AudioEncoder, just so long
762 // as all calls to InsertAudio() are by the same thread.
763 insert_thread_checker_.DetachFromThread();
764 switch (codec) {
765 #if !defined(OS_IOS)
766 case CODEC_AUDIO_OPUS:
767 impl_ = new OpusImpl(cast_environment,
768 num_channels,
769 sampling_rate,
770 bitrate,
771 frame_encoded_callback);
772 break;
773 #endif
774 #if defined(OS_MACOSX)
775 case CODEC_AUDIO_AAC:
776 impl_ = new AppleAacImpl(cast_environment,
777 num_channels,
778 sampling_rate,
779 bitrate,
780 frame_encoded_callback);
781 break;
782 #endif // defined(OS_MACOSX)
783 case CODEC_AUDIO_PCM16:
784 impl_ = new Pcm16Impl(cast_environment,
785 num_channels,
786 sampling_rate,
787 frame_encoded_callback);
788 break;
789 default:
790 NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
791 break;
795 AudioEncoder::~AudioEncoder() {}
797 OperationalStatus AudioEncoder::InitializationResult() const {
798 DCHECK(insert_thread_checker_.CalledOnValidThread());
799 if (impl_.get()) {
800 return impl_->InitializationResult();
802 return STATUS_UNSUPPORTED_CODEC;
805 int AudioEncoder::GetSamplesPerFrame() const {
806 DCHECK(insert_thread_checker_.CalledOnValidThread());
807 if (InitializationResult() != STATUS_INITIALIZED) {
808 NOTREACHED();
809 return std::numeric_limits<int>::max();
811 return impl_->samples_per_frame();
814 base::TimeDelta AudioEncoder::GetFrameDuration() const {
815 DCHECK(insert_thread_checker_.CalledOnValidThread());
816 if (InitializationResult() != STATUS_INITIALIZED) {
817 NOTREACHED();
818 return base::TimeDelta();
820 return impl_->frame_duration();
823 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
824 const base::TimeTicks& recorded_time) {
825 DCHECK(insert_thread_checker_.CalledOnValidThread());
826 DCHECK(audio_bus.get());
827 if (InitializationResult() != STATUS_INITIALIZED) {
828 NOTREACHED();
829 return;
831 cast_environment_->PostTask(CastEnvironment::AUDIO,
832 FROM_HERE,
833 base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
834 impl_,
835 base::Passed(&audio_bus),
836 recorded_time));
839 } // namespace cast
840 } // namespace media