Supervised user whitelists: Cleanup
[chromium-blink-merge.git] / media / cast / sender / audio_encoder.cc
blob588b942c34b9bd6e287cb5160e3dd987682792b9
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/cast/sender/audio_encoder.h"
7 #include <algorithm>
8 #include <limits>
9 #include <string>
11 #include "base/bind.h"
12 #include "base/bind_helpers.h"
13 #include "base/location.h"
14 #include "base/stl_util.h"
15 #include "base/sys_byteorder.h"
16 #include "base/time/time.h"
17 #include "media/base/audio_bus.h"
18 #include "media/cast/cast_defines.h"
19 #include "media/cast/cast_environment.h"
21 #if !defined(OS_IOS)
22 #include "third_party/opus/src/include/opus.h"
23 #endif
25 #if defined(OS_MACOSX)
26 #include <AudioToolbox/AudioToolbox.h>
27 #endif
29 namespace media {
30 namespace cast {
32 namespace {
34 const int kUnderrunSkipThreshold = 3;
35 const int kDefaultFramesPerSecond = 100;
37 } // namespace
39 // Base class that handles the common problem of feeding one or more AudioBus'
40 // data into a buffer and then, once the buffer is full, encoding the signal and
41 // emitting an EncodedFrame via the FrameEncodedCallback.
43 // Subclasses complete the implementation by handling the actual encoding
44 // details.
45 class AudioEncoder::ImplBase
46 : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
47 public:
48 ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
49 Codec codec,
50 int num_channels,
51 int sampling_rate,
52 int samples_per_frame,
53 const FrameEncodedCallback& callback)
54 : cast_environment_(cast_environment),
55 codec_(codec),
56 num_channels_(num_channels),
57 samples_per_frame_(samples_per_frame),
58 callback_(callback),
59 operational_status_(STATUS_UNINITIALIZED),
60 frame_duration_(base::TimeDelta::FromMicroseconds(
61 base::Time::kMicrosecondsPerSecond * samples_per_frame_ /
62 sampling_rate)),
63 buffer_fill_end_(0),
64 frame_id_(0),
65 frame_rtp_timestamp_(0),
66 samples_dropped_from_buffer_(0) {
67 // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
68 const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
69 if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
70 frame_duration_ == base::TimeDelta() ||
71 samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
72 operational_status_ = STATUS_INVALID_CONFIGURATION;
76 OperationalStatus InitializationResult() const {
77 return operational_status_;
80 int samples_per_frame() const {
81 return samples_per_frame_;
84 base::TimeDelta frame_duration() const { return frame_duration_; }
86 void EncodeAudio(scoped_ptr<AudioBus> audio_bus,
87 const base::TimeTicks& recorded_time) {
88 DCHECK_EQ(operational_status_, STATUS_INITIALIZED);
89 DCHECK(!recorded_time.is_null());
91 // Determine whether |recorded_time| is consistent with the amount of audio
92 // data having been processed in the past. Resolve the underrun problem by
93 // dropping data from the internal buffer and skipping ahead the next
94 // frame's RTP timestamp by the estimated number of frames missed. On the
95 // other hand, don't attempt to resolve overruns: A receiver should
96 // gracefully deal with an excess of audio data.
97 base::TimeDelta buffer_fill_duration =
98 buffer_fill_end_ * frame_duration_ / samples_per_frame_;
99 if (!frame_capture_time_.is_null()) {
100 const base::TimeDelta amount_ahead_by =
101 recorded_time - (frame_capture_time_ + buffer_fill_duration);
102 const int64 num_frames_missed = amount_ahead_by / frame_duration_;
103 if (num_frames_missed > kUnderrunSkipThreshold) {
104 samples_dropped_from_buffer_ += buffer_fill_end_;
105 buffer_fill_end_ = 0;
106 buffer_fill_duration = base::TimeDelta();
107 frame_rtp_timestamp_ +=
108 static_cast<uint32>(num_frames_missed * samples_per_frame_);
109 DVLOG(1) << "Skipping RTP timestamp ahead to account for "
110 << num_frames_missed * samples_per_frame_
111 << " samples' worth of underrun.";
114 frame_capture_time_ = recorded_time - buffer_fill_duration;
116 // Encode all audio in |audio_bus| into zero or more frames.
117 int src_pos = 0;
118 while (src_pos < audio_bus->frames()) {
119 const int num_samples_to_xfer = std::min(
120 samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
121 DCHECK_EQ(audio_bus->channels(), num_channels_);
122 TransferSamplesIntoBuffer(
123 audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
124 src_pos += num_samples_to_xfer;
125 buffer_fill_end_ += num_samples_to_xfer;
127 if (buffer_fill_end_ < samples_per_frame_)
128 break;
130 scoped_ptr<EncodedFrame> audio_frame(
131 new EncodedFrame());
132 audio_frame->dependency = EncodedFrame::KEY;
133 audio_frame->frame_id = frame_id_;
134 audio_frame->referenced_frame_id = frame_id_;
135 audio_frame->rtp_timestamp = frame_rtp_timestamp_;
136 audio_frame->reference_time = frame_capture_time_;
138 if (EncodeFromFilledBuffer(&audio_frame->data)) {
139 cast_environment_->PostTask(
140 CastEnvironment::MAIN,
141 FROM_HERE,
142 base::Bind(callback_,
143 base::Passed(&audio_frame),
144 samples_dropped_from_buffer_));
145 samples_dropped_from_buffer_ = 0;
148 // Reset the internal buffer, frame ID, and timestamps for the next frame.
149 buffer_fill_end_ = 0;
150 ++frame_id_;
151 frame_rtp_timestamp_ += samples_per_frame_;
152 frame_capture_time_ += frame_duration_;
156 protected:
157 friend class base::RefCountedThreadSafe<ImplBase>;
158 virtual ~ImplBase() {}
160 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
161 int source_offset,
162 int buffer_fill_offset,
163 int num_samples) = 0;
164 virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
166 const scoped_refptr<CastEnvironment> cast_environment_;
167 const Codec codec_;
168 const int num_channels_;
169 const int samples_per_frame_;
170 const FrameEncodedCallback callback_;
172 // Subclass' ctor is expected to set this to STATUS_INITIALIZED.
173 OperationalStatus operational_status_;
175 // The duration of one frame of encoded audio samples. Derived from
176 // |samples_per_frame_| and the sampling rate.
177 const base::TimeDelta frame_duration_;
179 private:
180 // In the case where a call to EncodeAudio() cannot completely fill the
181 // buffer, this points to the position at which to populate data in a later
182 // call.
183 int buffer_fill_end_;
185 // A counter used to label EncodedFrames.
186 uint32 frame_id_;
188 // The RTP timestamp for the next frame of encoded audio. This is defined as
189 // the number of audio samples encoded so far, plus the estimated number of
190 // samples that were missed due to data underruns. A receiver uses this value
191 // to detect gaps in the audio signal data being provided. Per the spec, RTP
192 // timestamp values are allowed to overflow and roll around past zero.
193 uint32 frame_rtp_timestamp_;
195 // The local system time associated with the start of the next frame of
196 // encoded audio. This value is passed on to a receiver as a reference clock
197 // timestamp for the purposes of synchronizing audio and video. Its
198 // progression is expected to drift relative to the elapsed time implied by
199 // the RTP timestamps.
200 base::TimeTicks frame_capture_time_;
202 // Set to non-zero to indicate the next output frame skipped over audio
203 // samples in order to recover from an input underrun.
204 int samples_dropped_from_buffer_;
206 DISALLOW_COPY_AND_ASSIGN(ImplBase);
209 #if !defined(OS_IOS)
210 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
211 public:
212 OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
213 int num_channels,
214 int sampling_rate,
215 int bitrate,
216 const FrameEncodedCallback& callback)
217 : ImplBase(cast_environment,
218 CODEC_AUDIO_OPUS,
219 num_channels,
220 sampling_rate,
221 sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
222 callback),
223 encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
224 opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
225 buffer_(new float[num_channels * samples_per_frame_]) {
226 if (ImplBase::operational_status_ != STATUS_UNINITIALIZED ||
227 sampling_rate % samples_per_frame_ != 0 ||
228 !IsValidFrameDuration(frame_duration_)) {
229 return;
231 if (opus_encoder_init(opus_encoder_,
232 sampling_rate,
233 num_channels,
234 OPUS_APPLICATION_AUDIO) != OPUS_OK) {
235 ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
236 return;
238 ImplBase::operational_status_ = STATUS_INITIALIZED;
240 if (bitrate <= 0) {
241 // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
242 // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
243 // frame size. The opus library authors may, of course, adjust this in
244 // later versions.
245 bitrate = OPUS_AUTO;
247 CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
248 OPUS_OK);
251 private:
252 ~OpusImpl() override {}
254 void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
255 int source_offset,
256 int buffer_fill_offset,
257 int num_samples) override {
258 // Opus requires channel-interleaved samples in a single array.
259 for (int ch = 0; ch < audio_bus->channels(); ++ch) {
260 const float* src = audio_bus->channel(ch) + source_offset;
261 const float* const src_end = src + num_samples;
262 float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
263 for (; src < src_end; ++src, dest += num_channels_)
264 *dest = *src;
268 bool EncodeFromFilledBuffer(std::string* out) override {
269 out->resize(kOpusMaxPayloadSize);
270 const opus_int32 result =
271 opus_encode_float(opus_encoder_,
272 buffer_.get(),
273 samples_per_frame_,
274 reinterpret_cast<uint8*>(string_as_array(out)),
275 kOpusMaxPayloadSize);
276 if (result > 1) {
277 out->resize(result);
278 return true;
279 } else if (result < 0) {
280 LOG(ERROR) << "Error code from opus_encode_float(): " << result;
281 return false;
282 } else {
283 // Do nothing: The documentation says that a return value of zero or
284 // one byte means the packet does not need to be transmitted.
285 return false;
289 static bool IsValidFrameDuration(base::TimeDelta duration) {
290 // See https://tools.ietf.org/html/rfc6716#section-2.1.4
291 return duration == base::TimeDelta::FromMicroseconds(2500) ||
292 duration == base::TimeDelta::FromMilliseconds(5) ||
293 duration == base::TimeDelta::FromMilliseconds(10) ||
294 duration == base::TimeDelta::FromMilliseconds(20) ||
295 duration == base::TimeDelta::FromMilliseconds(40) ||
296 duration == base::TimeDelta::FromMilliseconds(60);
299 const scoped_ptr<uint8[]> encoder_memory_;
300 OpusEncoder* const opus_encoder_;
301 const scoped_ptr<float[]> buffer_;
303 // This is the recommended value, according to documentation in
304 // third_party/opus/src/include/opus.h, so that the Opus encoder does not
305 // degrade the audio due to memory constraints.
307 // Note: Whereas other RTP implementations do not, the cast library is
308 // perfectly capable of transporting larger than MTU-sized audio frames.
309 static const int kOpusMaxPayloadSize = 4000;
311 DISALLOW_COPY_AND_ASSIGN(OpusImpl);
313 #endif
315 #if defined(OS_MACOSX)
316 class AudioEncoder::AppleAacImpl : public AudioEncoder::ImplBase {
317 // AAC-LC has two access unit sizes (960 and 1024). The Apple encoder only
318 // supports the latter.
319 static const int kAccessUnitSamples = 1024;
321 // Size of an ADTS header (w/o checksum). See
322 // http://wiki.multimedia.cx/index.php?title=ADTS
323 static const int kAdtsHeaderSize = 7;
325 public:
326 AppleAacImpl(const scoped_refptr<CastEnvironment>& cast_environment,
327 int num_channels,
328 int sampling_rate,
329 int bitrate,
330 const FrameEncodedCallback& callback)
331 : ImplBase(cast_environment,
332 CODEC_AUDIO_AAC,
333 num_channels,
334 sampling_rate,
335 kAccessUnitSamples,
336 callback),
337 input_buffer_(AudioBus::Create(num_channels, kAccessUnitSamples)),
338 input_bus_(AudioBus::CreateWrapper(num_channels)),
339 max_access_unit_size_(0),
340 output_buffer_(nullptr),
341 converter_(nullptr),
342 file_(nullptr),
343 num_access_units_(0),
344 can_resume_(true) {
345 if (ImplBase::operational_status_ != STATUS_UNINITIALIZED) {
346 return;
348 if (!Initialize(sampling_rate, bitrate)) {
349 ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
350 return;
352 ImplBase::operational_status_ = STATUS_INITIALIZED;
355 private:
356 ~AppleAacImpl() override { Teardown(); }
358 // Destroys the existing audio converter and file, if any.
359 void Teardown() {
360 if (converter_) {
361 AudioConverterDispose(converter_);
362 converter_ = nullptr;
364 if (file_) {
365 AudioFileClose(file_);
366 file_ = nullptr;
370 // Initializes the audio converter and file. Calls Teardown to destroy any
371 // existing state. This is so that Initialize() may be called to setup another
372 // converter after a non-resumable interruption.
373 bool Initialize(int sampling_rate, int bitrate) {
374 // Teardown previous audio converter and file.
375 Teardown();
377 // Input data comes from AudioBus objects, which carry non-interleaved
378 // packed native-endian float samples. Note that in Core Audio, a frame is
379 // one sample across all channels at a given point in time. When describing
380 // a non-interleaved samples format, the "per frame" fields mean "per
381 // channel" or "per stream", with the exception of |mChannelsPerFrame|. For
382 // uncompressed formats, one packet contains one frame.
383 AudioStreamBasicDescription in_asbd;
384 in_asbd.mSampleRate = sampling_rate;
385 in_asbd.mFormatID = kAudioFormatLinearPCM;
386 in_asbd.mFormatFlags =
387 kAudioFormatFlagsNativeFloatPacked | kAudioFormatFlagIsNonInterleaved;
388 in_asbd.mChannelsPerFrame = num_channels_;
389 in_asbd.mBitsPerChannel = sizeof(float) * 8;
390 in_asbd.mFramesPerPacket = 1;
391 in_asbd.mBytesPerPacket = in_asbd.mBytesPerFrame = sizeof(float);
392 in_asbd.mReserved = 0;
394 // Request AAC-LC encoding, with no downmixing or downsampling.
395 AudioStreamBasicDescription out_asbd;
396 memset(&out_asbd, 0, sizeof(AudioStreamBasicDescription));
397 out_asbd.mSampleRate = sampling_rate;
398 out_asbd.mFormatID = kAudioFormatMPEG4AAC;
399 out_asbd.mChannelsPerFrame = num_channels_;
400 UInt32 prop_size = sizeof(out_asbd);
401 if (AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
403 nullptr,
404 &prop_size,
405 &out_asbd) != noErr) {
406 return false;
409 if (AudioConverterNew(&in_asbd, &out_asbd, &converter_) != noErr) {
410 return false;
413 // The converter will fully specify the output format and update the
414 // relevant fields of the structure, which we can now query.
415 prop_size = sizeof(out_asbd);
416 if (AudioConverterGetProperty(converter_,
417 kAudioConverterCurrentOutputStreamDescription,
418 &prop_size,
419 &out_asbd) != noErr) {
420 return false;
423 // If bitrate is <= 0, allow the encoder to pick a suitable value.
424 // Otherwise, set the bitrate (which can fail if the value is not suitable
425 // or compatible with the output sampling rate or channels).
426 if (bitrate > 0) {
427 prop_size = sizeof(int);
428 if (AudioConverterSetProperty(
429 converter_, kAudioConverterEncodeBitRate, prop_size, &bitrate) !=
430 noErr) {
431 return false;
435 #if defined(OS_IOS)
436 // See the comment next to |can_resume_| for details on resumption. Some
437 // converters can return kAudioConverterErr_PropertyNotSupported, in which
438 // case resumption is implicitly supported. This is the only location where
439 // the implementation modifies |can_resume_|.
440 uint32_t can_resume;
441 prop_size = sizeof(can_resume);
442 OSStatus oserr = AudioConverterGetProperty(
443 converter_,
444 kAudioConverterPropertyCanResumeFromInterruption,
445 &prop_size,
446 &can_resume);
447 if (oserr == noErr) {
448 const_cast<bool&>(can_resume_) = can_resume != 0;
450 #endif
452 // Figure out the maximum size of an access unit that the encoder can
453 // produce. |mBytesPerPacket| will be 0 for variable size configurations,
454 // in which case we must query the value.
455 uint32_t max_access_unit_size = out_asbd.mBytesPerPacket;
456 if (max_access_unit_size == 0) {
457 prop_size = sizeof(max_access_unit_size);
458 if (AudioConverterGetProperty(
459 converter_,
460 kAudioConverterPropertyMaximumOutputPacketSize,
461 &prop_size,
462 &max_access_unit_size) != noErr) {
463 return false;
467 // This is the only location where the implementation modifies
468 // |max_access_unit_size_|.
469 const_cast<uint32_t&>(max_access_unit_size_) = max_access_unit_size;
471 // Allocate a buffer to store one access unit. This is the only location
472 // where the implementation modifies |access_unit_buffer_|.
473 const_cast<scoped_ptr<uint8[]>&>(access_unit_buffer_)
474 .reset(new uint8[max_access_unit_size]);
476 // Initialize the converter ABL. Note that the buffer size has to be set
477 // before every encode operation, since the field is modified to indicate
478 // the size of the output data (on input it indicates the buffer capacity).
479 converter_abl_.mNumberBuffers = 1;
480 converter_abl_.mBuffers[0].mNumberChannels = num_channels_;
481 converter_abl_.mBuffers[0].mData = access_unit_buffer_.get();
483 // The "magic cookie" is an encoder state vector required for decoding and
484 // packetization. It is queried now from |converter_| then set on |file_|
485 // after initialization.
486 UInt32 cookie_size;
487 if (AudioConverterGetPropertyInfo(converter_,
488 kAudioConverterCompressionMagicCookie,
489 &cookie_size,
490 nullptr) != noErr) {
491 return false;
493 scoped_ptr<uint8[]> cookie_data(new uint8[cookie_size]);
494 if (AudioConverterGetProperty(converter_,
495 kAudioConverterCompressionMagicCookie,
496 &cookie_size,
497 cookie_data.get()) != noErr) {
498 return false;
501 if (AudioFileInitializeWithCallbacks(this,
502 nullptr,
503 &FileWriteCallback,
504 nullptr,
505 nullptr,
506 kAudioFileAAC_ADTSType,
507 &out_asbd,
509 &file_) != noErr) {
510 return false;
513 if (AudioFileSetProperty(file_,
514 kAudioFilePropertyMagicCookieData,
515 cookie_size,
516 cookie_data.get()) != noErr) {
517 return false;
520 // Initially the input bus points to the input buffer. See the comment on
521 // |input_bus_| for more on this optimization.
522 input_bus_->set_frames(kAccessUnitSamples);
523 for (int ch = 0; ch < input_buffer_->channels(); ++ch) {
524 input_bus_->SetChannelData(ch, input_buffer_->channel(ch));
527 return true;
530 void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
531 int source_offset,
532 int buffer_fill_offset,
533 int num_samples) override {
534 DCHECK_EQ(audio_bus->channels(), input_buffer_->channels());
536 // See the comment on |input_bus_| for more on this optimization. Note that
537 // we cannot elide the copy if the source offset would result in an
538 // unaligned pointer.
539 if (num_samples == kAccessUnitSamples &&
540 source_offset * sizeof(float) % AudioBus::kChannelAlignment == 0) {
541 DCHECK_EQ(buffer_fill_offset, 0);
542 for (int ch = 0; ch < audio_bus->channels(); ++ch) {
543 auto samples = const_cast<float*>(audio_bus->channel(ch));
544 input_bus_->SetChannelData(ch, samples + source_offset);
546 return;
549 // Copy the samples into the input buffer.
550 DCHECK_EQ(input_bus_->channel(0), input_buffer_->channel(0));
551 audio_bus->CopyPartialFramesTo(
552 source_offset, num_samples, buffer_fill_offset, input_buffer_.get());
555 bool EncodeFromFilledBuffer(std::string* out) override {
556 // Reset the buffer size field to the buffer capacity.
557 converter_abl_.mBuffers[0].mDataByteSize = max_access_unit_size_;
559 // Encode the current input buffer. This is a sychronous call.
560 OSStatus oserr;
561 UInt32 io_num_packets = 1;
562 AudioStreamPacketDescription packet_description;
563 oserr = AudioConverterFillComplexBuffer(converter_,
564 &ConverterFillDataCallback,
565 this,
566 &io_num_packets,
567 &converter_abl_,
568 &packet_description);
569 if (oserr != noErr || io_num_packets == 0) {
570 return false;
573 // Reserve space in the output buffer to write the packet.
574 out->reserve(packet_description.mDataByteSize + kAdtsHeaderSize);
576 // Set the current output buffer and emit an ADTS-wrapped AAC access unit.
577 // This is a synchronous call. After it returns, reset the output buffer.
578 output_buffer_ = out;
579 oserr = AudioFileWritePackets(file_,
580 false,
581 converter_abl_.mBuffers[0].mDataByteSize,
582 &packet_description,
583 num_access_units_,
584 &io_num_packets,
585 converter_abl_.mBuffers[0].mData);
586 output_buffer_ = nullptr;
587 if (oserr != noErr || io_num_packets == 0) {
588 return false;
590 num_access_units_ += io_num_packets;
591 return true;
594 // The |AudioConverterFillComplexBuffer| input callback function. Configures
595 // the provided |AudioBufferList| to alias |input_bus_|. The implementation
596 // can only supply |kAccessUnitSamples| samples as a result of not copying
597 // samples or tracking read and write positions. Note that this function is
598 // called synchronously by |AudioConverterFillComplexBuffer|.
599 static OSStatus ConverterFillDataCallback(
600 AudioConverterRef in_converter,
601 UInt32* io_num_packets,
602 AudioBufferList* io_data,
603 AudioStreamPacketDescription** out_packet_desc,
604 void* in_encoder) {
605 DCHECK(in_encoder);
606 auto encoder = reinterpret_cast<AppleAacImpl*>(in_encoder);
607 auto input_buffer = encoder->input_buffer_.get();
608 auto input_bus = encoder->input_bus_.get();
610 DCHECK_EQ(static_cast<int>(*io_num_packets), kAccessUnitSamples);
611 DCHECK_EQ(io_data->mNumberBuffers,
612 static_cast<unsigned>(input_bus->channels()));
613 for (int i_buf = 0, end = io_data->mNumberBuffers; i_buf < end; ++i_buf) {
614 io_data->mBuffers[i_buf].mNumberChannels = 1;
615 io_data->mBuffers[i_buf].mDataByteSize = sizeof(float) * *io_num_packets;
616 io_data->mBuffers[i_buf].mData = input_bus->channel(i_buf);
618 // Reset the input bus back to the input buffer. See the comment on
619 // |input_bus_| for more on this optimization.
620 input_bus->SetChannelData(i_buf, input_buffer->channel(i_buf));
622 return noErr;
625 // The AudioFile write callback function. Appends the data to the encoder's
626 // current |output_buffer_|.
627 static OSStatus FileWriteCallback(void* in_encoder,
628 SInt64 in_position,
629 UInt32 in_size,
630 const void* in_buffer,
631 UInt32* out_size) {
632 DCHECK(in_encoder);
633 DCHECK(in_buffer);
634 auto encoder = reinterpret_cast<const AppleAacImpl*>(in_encoder);
635 auto buffer = reinterpret_cast<const std::string::value_type*>(in_buffer);
637 std::string* const output_buffer = encoder->output_buffer_;
638 DCHECK(output_buffer);
640 output_buffer->append(buffer, in_size);
641 *out_size = in_size;
642 return noErr;
645 // Buffer that holds one AAC access unit worth of samples. The input callback
646 // function provides samples from this buffer via |input_bus_| to the encoder.
647 const scoped_ptr<AudioBus> input_buffer_;
649 // Wrapper AudioBus used by the input callback function. Normally it wraps
650 // |input_buffer_|. However, as an optimization when the client submits a
651 // buffer containing exactly one access unit worth of samples, the bus is
652 // redirected to the client buffer temporarily. We know that the base
653 // implementation will call us right after to encode the buffer and thus we
654 // can eliminate the copy into |input_buffer_|.
655 const scoped_ptr<AudioBus> input_bus_;
657 // A buffer that holds one AAC access unit. Initialized in |Initialize| once
658 // the maximum access unit size is known.
659 const scoped_ptr<uint8[]> access_unit_buffer_;
661 // The maximum size of an access unit that the encoder can emit.
662 const uint32_t max_access_unit_size_;
664 // A temporary pointer to the current output buffer. Only non-null when
665 // writing an access unit. Accessed by the AudioFile write callback function.
666 std::string* output_buffer_;
668 // The |AudioConverter| is responsible for AAC encoding. This is a Core Audio
669 // object, not to be confused with |media::AudioConverter|.
670 AudioConverterRef converter_;
672 // The |AudioFile| is responsible for ADTS packetization.
673 AudioFileID file_;
675 // An |AudioBufferList| passed to the converter to store encoded samples.
676 AudioBufferList converter_abl_;
678 // The number of access units emitted so far by the encoder.
679 uint64_t num_access_units_;
681 // On iOS, audio codecs can be interrupted by other services (such as an
682 // audio alert or phone call). Depending on the underlying hardware and
683 // configuration, the codec may have to be thrown away and re-initialized
684 // after such an interruption. This flag tracks if we can resume or not from
685 // such an interruption. It is initialized to true, which is the only possible
686 // value on OS X and on most modern iOS hardware.
687 // TODO(jfroy): Implement encoder re-initialization after interruption.
688 // https://crbug.com/424787
689 const bool can_resume_;
691 DISALLOW_COPY_AND_ASSIGN(AppleAacImpl);
693 #endif // defined(OS_MACOSX)
695 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
696 public:
697 Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
698 int num_channels,
699 int sampling_rate,
700 const FrameEncodedCallback& callback)
701 : ImplBase(cast_environment,
702 CODEC_AUDIO_PCM16,
703 num_channels,
704 sampling_rate,
705 sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
706 callback),
707 buffer_(new int16[num_channels * samples_per_frame_]) {
708 if (ImplBase::operational_status_ != STATUS_UNINITIALIZED)
709 return;
710 operational_status_ = STATUS_INITIALIZED;
713 private:
714 ~Pcm16Impl() override {}
716 void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
717 int source_offset,
718 int buffer_fill_offset,
719 int num_samples) override {
720 audio_bus->ToInterleavedPartial(
721 source_offset,
722 num_samples,
723 sizeof(int16),
724 buffer_.get() + buffer_fill_offset * num_channels_);
727 bool EncodeFromFilledBuffer(std::string* out) override {
728 // Output 16-bit PCM integers in big-endian byte order.
729 out->resize(num_channels_ * samples_per_frame_ * sizeof(int16));
730 const int16* src = buffer_.get();
731 const int16* const src_end = src + num_channels_ * samples_per_frame_;
732 uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
733 for (; src < src_end; ++src, ++dest)
734 *dest = base::HostToNet16(*src);
735 return true;
738 private:
739 const scoped_ptr<int16[]> buffer_;
741 DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
744 AudioEncoder::AudioEncoder(
745 const scoped_refptr<CastEnvironment>& cast_environment,
746 int num_channels,
747 int sampling_rate,
748 int bitrate,
749 Codec codec,
750 const FrameEncodedCallback& frame_encoded_callback)
751 : cast_environment_(cast_environment) {
752 // Note: It doesn't matter which thread constructs AudioEncoder, just so long
753 // as all calls to InsertAudio() are by the same thread.
754 insert_thread_checker_.DetachFromThread();
755 switch (codec) {
756 #if !defined(OS_IOS)
757 case CODEC_AUDIO_OPUS:
758 impl_ = new OpusImpl(cast_environment,
759 num_channels,
760 sampling_rate,
761 bitrate,
762 frame_encoded_callback);
763 break;
764 #endif
765 #if defined(OS_MACOSX)
766 case CODEC_AUDIO_AAC:
767 impl_ = new AppleAacImpl(cast_environment,
768 num_channels,
769 sampling_rate,
770 bitrate,
771 frame_encoded_callback);
772 break;
773 #endif // defined(OS_MACOSX)
774 case CODEC_AUDIO_PCM16:
775 impl_ = new Pcm16Impl(cast_environment,
776 num_channels,
777 sampling_rate,
778 frame_encoded_callback);
779 break;
780 default:
781 NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
782 break;
786 AudioEncoder::~AudioEncoder() {}
788 OperationalStatus AudioEncoder::InitializationResult() const {
789 DCHECK(insert_thread_checker_.CalledOnValidThread());
790 if (impl_.get()) {
791 return impl_->InitializationResult();
793 return STATUS_UNSUPPORTED_CODEC;
796 int AudioEncoder::GetSamplesPerFrame() const {
797 DCHECK(insert_thread_checker_.CalledOnValidThread());
798 if (InitializationResult() != STATUS_INITIALIZED) {
799 NOTREACHED();
800 return std::numeric_limits<int>::max();
802 return impl_->samples_per_frame();
805 base::TimeDelta AudioEncoder::GetFrameDuration() const {
806 DCHECK(insert_thread_checker_.CalledOnValidThread());
807 if (InitializationResult() != STATUS_INITIALIZED) {
808 NOTREACHED();
809 return base::TimeDelta();
811 return impl_->frame_duration();
814 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus,
815 const base::TimeTicks& recorded_time) {
816 DCHECK(insert_thread_checker_.CalledOnValidThread());
817 DCHECK(audio_bus.get());
818 if (InitializationResult() != STATUS_INITIALIZED) {
819 NOTREACHED();
820 return;
822 cast_environment_->PostTask(CastEnvironment::AUDIO,
823 FROM_HERE,
824 base::Bind(&AudioEncoder::ImplBase::EncodeAudio,
825 impl_,
826 base::Passed(&audio_bus),
827 recorded_time));
830 } // namespace cast
831 } // namespace media