1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/cast/sender/audio_encoder.h"
10 #include "base/bind_helpers.h"
11 #include "base/location.h"
12 #include "base/stl_util.h"
13 #include "base/sys_byteorder.h"
14 #include "base/time/time.h"
15 #include "media/base/audio_bus.h"
16 #include "media/cast/cast_defines.h"
17 #include "media/cast/cast_environment.h"
18 #include "third_party/opus/src/include/opus.h"
25 // The fixed number of audio frames per second and, inversely, the duration of
26 // one frame's worth of samples.
27 const int kFramesPerSecond
= 100;
28 const int kFrameDurationMillis
= 1000 / kFramesPerSecond
; // No remainder!
30 // Threshold used to decide whether audio being delivered to the encoder is
31 // coming in too slow with respect to the capture timestamps.
32 const int kUnderrunThresholdMillis
= 3 * kFrameDurationMillis
;
37 // Base class that handles the common problem of feeding one or more AudioBus'
38 // data into a buffer and then, once the buffer is full, encoding the signal and
39 // emitting an EncodedFrame via the FrameEncodedCallback.
41 // Subclasses complete the implementation by handling the actual encoding
43 class AudioEncoder::ImplBase
44 : public base::RefCountedThreadSafe
<AudioEncoder::ImplBase
> {
46 ImplBase(const scoped_refptr
<CastEnvironment
>& cast_environment
,
50 const FrameEncodedCallback
& callback
)
51 : cast_environment_(cast_environment
),
53 num_channels_(num_channels
),
54 samples_per_frame_(sampling_rate
/ kFramesPerSecond
),
56 cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED
),
59 frame_rtp_timestamp_(0) {
60 // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
61 const int kMaxSamplesTimesChannelsPerFrame
= 48 * 2 * 100;
62 if (num_channels_
<= 0 || samples_per_frame_
<= 0 ||
63 sampling_rate
% kFramesPerSecond
!= 0 ||
64 samples_per_frame_
* num_channels_
> kMaxSamplesTimesChannelsPerFrame
) {
65 cast_initialization_status_
= STATUS_INVALID_AUDIO_CONFIGURATION
;
69 CastInitializationStatus
InitializationResult() const {
70 return cast_initialization_status_
;
73 void EncodeAudio(scoped_ptr
<AudioBus
> audio_bus
,
74 const base::TimeTicks
& recorded_time
) {
75 DCHECK_EQ(cast_initialization_status_
, STATUS_AUDIO_INITIALIZED
);
76 DCHECK(!recorded_time
.is_null());
78 // Determine whether |recorded_time| is consistent with the amount of audio
79 // data having been processed in the past. Resolve the underrun problem by
80 // dropping data from the internal buffer and skipping ahead the next
81 // frame's RTP timestamp by the estimated number of frames missed. On the
82 // other hand, don't attempt to resolve overruns: A receiver should
83 // gracefully deal with an excess of audio data.
84 const base::TimeDelta frame_duration
=
85 base::TimeDelta::FromMilliseconds(kFrameDurationMillis
);
86 base::TimeDelta buffer_fill_duration
=
87 buffer_fill_end_
* frame_duration
/ samples_per_frame_
;
88 if (!frame_capture_time_
.is_null()) {
89 const base::TimeDelta amount_ahead_by
=
90 recorded_time
- (frame_capture_time_
+ buffer_fill_duration
);
92 base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis
)) {
94 buffer_fill_duration
= base::TimeDelta();
95 const int64 num_frames_missed
= amount_ahead_by
/
96 base::TimeDelta::FromMilliseconds(kFrameDurationMillis
);
97 frame_rtp_timestamp_
+=
98 static_cast<uint32
>(num_frames_missed
* samples_per_frame_
);
99 DVLOG(1) << "Skipping RTP timestamp ahead to account for "
100 << num_frames_missed
* samples_per_frame_
101 << " samples' worth of underrun.";
104 frame_capture_time_
= recorded_time
- buffer_fill_duration
;
106 // Encode all audio in |audio_bus| into zero or more frames.
108 while (src_pos
< audio_bus
->frames()) {
109 const int num_samples_to_xfer
= std::min(
110 samples_per_frame_
- buffer_fill_end_
, audio_bus
->frames() - src_pos
);
111 DCHECK_EQ(audio_bus
->channels(), num_channels_
);
112 TransferSamplesIntoBuffer(
113 audio_bus
.get(), src_pos
, buffer_fill_end_
, num_samples_to_xfer
);
114 src_pos
+= num_samples_to_xfer
;
115 buffer_fill_end_
+= num_samples_to_xfer
;
117 if (buffer_fill_end_
< samples_per_frame_
)
120 scoped_ptr
<EncodedFrame
> audio_frame(
122 audio_frame
->dependency
= EncodedFrame::KEY
;
123 audio_frame
->frame_id
= frame_id_
;
124 audio_frame
->referenced_frame_id
= frame_id_
;
125 audio_frame
->rtp_timestamp
= frame_rtp_timestamp_
;
126 audio_frame
->reference_time
= frame_capture_time_
;
128 if (EncodeFromFilledBuffer(&audio_frame
->data
)) {
129 cast_environment_
->PostTask(
130 CastEnvironment::MAIN
,
132 base::Bind(callback_
, base::Passed(&audio_frame
)));
135 // Reset the internal buffer, frame ID, and timestamps for the next frame.
136 buffer_fill_end_
= 0;
138 frame_rtp_timestamp_
+= samples_per_frame_
;
139 frame_capture_time_
+= frame_duration
;
144 friend class base::RefCountedThreadSafe
<ImplBase
>;
145 virtual ~ImplBase() {}
147 virtual void TransferSamplesIntoBuffer(const AudioBus
* audio_bus
,
149 int buffer_fill_offset
,
150 int num_samples
) = 0;
151 virtual bool EncodeFromFilledBuffer(std::string
* out
) = 0;
153 const scoped_refptr
<CastEnvironment
> cast_environment_
;
155 const int num_channels_
;
156 const int samples_per_frame_
;
157 const FrameEncodedCallback callback_
;
159 // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED.
160 CastInitializationStatus cast_initialization_status_
;
163 // In the case where a call to EncodeAudio() cannot completely fill the
164 // buffer, this points to the position at which to populate data in a later
166 int buffer_fill_end_
;
168 // A counter used to label EncodedFrames.
171 // The RTP timestamp for the next frame of encoded audio. This is defined as
172 // the number of audio samples encoded so far, plus the estimated number of
173 // samples that were missed due to data underruns. A receiver uses this value
174 // to detect gaps in the audio signal data being provided. Per the spec, RTP
175 // timestamp values are allowed to overflow and roll around past zero.
176 uint32 frame_rtp_timestamp_
;
178 // The local system time associated with the start of the next frame of
179 // encoded audio. This value is passed on to a receiver as a reference clock
180 // timestamp for the purposes of synchronizing audio and video. Its
181 // progression is expected to drift relative to the elapsed time implied by
182 // the RTP timestamps.
183 base::TimeTicks frame_capture_time_
;
185 DISALLOW_COPY_AND_ASSIGN(ImplBase
);
188 class AudioEncoder::OpusImpl
: public AudioEncoder::ImplBase
{
190 OpusImpl(const scoped_refptr
<CastEnvironment
>& cast_environment
,
194 const FrameEncodedCallback
& callback
)
195 : ImplBase(cast_environment
,
200 encoder_memory_(new uint8
[opus_encoder_get_size(num_channels
)]),
201 opus_encoder_(reinterpret_cast<OpusEncoder
*>(encoder_memory_
.get())),
202 buffer_(new float[num_channels
* samples_per_frame_
]) {
203 if (ImplBase::cast_initialization_status_
!= STATUS_AUDIO_UNINITIALIZED
)
205 if (opus_encoder_init(opus_encoder_
,
208 OPUS_APPLICATION_AUDIO
) != OPUS_OK
) {
209 ImplBase::cast_initialization_status_
=
210 STATUS_INVALID_AUDIO_CONFIGURATION
;
213 ImplBase::cast_initialization_status_
= STATUS_AUDIO_INITIALIZED
;
216 // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
217 // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
218 // frame size. The opus library authors may, of course, adjust this in
222 CHECK_EQ(opus_encoder_ctl(opus_encoder_
, OPUS_SET_BITRATE(bitrate
)),
227 virtual ~OpusImpl() {}
229 virtual void TransferSamplesIntoBuffer(const AudioBus
* audio_bus
,
231 int buffer_fill_offset
,
232 int num_samples
) OVERRIDE
{
233 // Opus requires channel-interleaved samples in a single array.
234 for (int ch
= 0; ch
< audio_bus
->channels(); ++ch
) {
235 const float* src
= audio_bus
->channel(ch
) + source_offset
;
236 const float* const src_end
= src
+ num_samples
;
237 float* dest
= buffer_
.get() + buffer_fill_offset
* num_channels_
+ ch
;
238 for (; src
< src_end
; ++src
, dest
+= num_channels_
)
243 virtual bool EncodeFromFilledBuffer(std::string
* out
) OVERRIDE
{
244 out
->resize(kOpusMaxPayloadSize
);
245 const opus_int32 result
=
246 opus_encode_float(opus_encoder_
,
249 reinterpret_cast<uint8
*>(string_as_array(out
)),
250 kOpusMaxPayloadSize
);
254 } else if (result
< 0) {
255 LOG(ERROR
) << "Error code from opus_encode_float(): " << result
;
258 // Do nothing: The documentation says that a return value of zero or
259 // one byte means the packet does not need to be transmitted.
264 const scoped_ptr
<uint8
[]> encoder_memory_
;
265 OpusEncoder
* const opus_encoder_
;
266 const scoped_ptr
<float[]> buffer_
;
268 // This is the recommended value, according to documentation in
269 // third_party/opus/src/include/opus.h, so that the Opus encoder does not
270 // degrade the audio due to memory constraints.
272 // Note: Whereas other RTP implementations do not, the cast library is
273 // perfectly capable of transporting larger than MTU-sized audio frames.
274 static const int kOpusMaxPayloadSize
= 4000;
276 DISALLOW_COPY_AND_ASSIGN(OpusImpl
);
279 class AudioEncoder::Pcm16Impl
: public AudioEncoder::ImplBase
{
281 Pcm16Impl(const scoped_refptr
<CastEnvironment
>& cast_environment
,
284 const FrameEncodedCallback
& callback
)
285 : ImplBase(cast_environment
,
290 buffer_(new int16
[num_channels
* samples_per_frame_
]) {
291 if (ImplBase::cast_initialization_status_
!= STATUS_AUDIO_UNINITIALIZED
)
293 cast_initialization_status_
= STATUS_AUDIO_INITIALIZED
;
297 virtual ~Pcm16Impl() {}
299 virtual void TransferSamplesIntoBuffer(const AudioBus
* audio_bus
,
301 int buffer_fill_offset
,
302 int num_samples
) OVERRIDE
{
303 audio_bus
->ToInterleavedPartial(
307 buffer_
.get() + buffer_fill_offset
* num_channels_
);
310 virtual bool EncodeFromFilledBuffer(std::string
* out
) OVERRIDE
{
311 // Output 16-bit PCM integers in big-endian byte order.
312 out
->resize(num_channels_
* samples_per_frame_
* sizeof(int16
));
313 const int16
* src
= buffer_
.get();
314 const int16
* const src_end
= src
+ num_channels_
* samples_per_frame_
;
315 uint16
* dest
= reinterpret_cast<uint16
*>(&out
->at(0));
316 for (; src
< src_end
; ++src
, ++dest
)
317 *dest
= base::HostToNet16(*src
);
322 const scoped_ptr
<int16
[]> buffer_
;
324 DISALLOW_COPY_AND_ASSIGN(Pcm16Impl
);
327 AudioEncoder::AudioEncoder(
328 const scoped_refptr
<CastEnvironment
>& cast_environment
,
333 const FrameEncodedCallback
& frame_encoded_callback
)
334 : cast_environment_(cast_environment
) {
335 // Note: It doesn't matter which thread constructs AudioEncoder, just so long
336 // as all calls to InsertAudio() are by the same thread.
337 insert_thread_checker_
.DetachFromThread();
339 case CODEC_AUDIO_OPUS
:
340 impl_
= new OpusImpl(cast_environment
,
344 frame_encoded_callback
);
346 case CODEC_AUDIO_PCM16
:
347 impl_
= new Pcm16Impl(cast_environment
,
350 frame_encoded_callback
);
353 NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
358 AudioEncoder::~AudioEncoder() {}
360 CastInitializationStatus
AudioEncoder::InitializationResult() const {
361 DCHECK(insert_thread_checker_
.CalledOnValidThread());
363 return impl_
->InitializationResult();
365 return STATUS_UNSUPPORTED_AUDIO_CODEC
;
368 void AudioEncoder::InsertAudio(scoped_ptr
<AudioBus
> audio_bus
,
369 const base::TimeTicks
& recorded_time
) {
370 DCHECK(insert_thread_checker_
.CalledOnValidThread());
371 DCHECK(audio_bus
.get());
376 cast_environment_
->PostTask(CastEnvironment::AUDIO
,
378 base::Bind(&AudioEncoder::ImplBase::EncodeAudio
,
380 base::Passed(&audio_bus
),