1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/cast/audio_sender/audio_encoder.h"
10 #include "base/bind_helpers.h"
11 #include "base/location.h"
12 #include "base/logging.h"
13 #include "base/stl_util.h"
14 #include "base/sys_byteorder.h"
15 #include "base/time/time.h"
16 #include "media/base/audio_bus.h"
17 #include "media/cast/cast_defines.h"
18 #include "media/cast/cast_environment.h"
19 #include "media/cast/logging/logging_defines.h"
20 #include "third_party/opus/src/include/opus.h"
27 // The fixed number of audio frames per second and, inversely, the duration of
28 // one frame's worth of samples.
29 const int kFramesPerSecond
= 100;
30 const int kFrameDurationMillis
= 1000 / kFramesPerSecond
; // No remainder!
32 // Threshold used to decide whether audio being delivered to the encoder is
33 // coming in too slow with respect to the capture timestamps.
34 const int kUnderrunThresholdMillis
= 3 * kFrameDurationMillis
;
36 void LogAudioFrameEncodedEvent(
37 const scoped_refptr
<media::cast::CastEnvironment
>& cast_environment
,
38 base::TimeTicks event_time
,
39 media::cast::RtpTimestamp rtp_timestamp
,
42 if (!cast_environment
->CurrentlyOn(CastEnvironment::MAIN
)) {
43 cast_environment
->PostTask(
44 CastEnvironment::MAIN
,
46 base::Bind(&LogAudioFrameEncodedEvent
,
47 cast_environment
, event_time
,
48 rtp_timestamp
, frame_id
, frame_size
));
51 cast_environment
->Logging()->InsertEncodedFrameEvent(
52 event_time
, media::cast::FRAME_ENCODED
, media::cast::AUDIO_EVENT
,
53 rtp_timestamp
, frame_id
,
54 static_cast<int>(frame_size
), /* key_frame - unused */ false,
55 /*target_bitrate - unused*/ 0);
61 // Base class that handles the common problem of feeding one or more AudioBus'
62 // data into a buffer and then, once the buffer is full, encoding the signal and
63 // emitting an EncodedAudioFrame via the FrameEncodedCallback.
65 // Subclasses complete the implementation by handling the actual encoding
67 class AudioEncoder::ImplBase
68 : public base::RefCountedThreadSafe
<AudioEncoder::ImplBase
> {
70 ImplBase(const scoped_refptr
<CastEnvironment
>& cast_environment
,
71 transport::AudioCodec codec
,
74 const FrameEncodedCallback
& callback
)
75 : cast_environment_(cast_environment
),
77 num_channels_(num_channels
),
78 samples_per_frame_(sampling_rate
/ kFramesPerSecond
),
80 cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED
),
83 frame_rtp_timestamp_(0) {
84 if (num_channels_
<= 0 || samples_per_frame_
<= 0 ||
85 sampling_rate
% kFramesPerSecond
!= 0 ||
86 samples_per_frame_
* num_channels_
>
87 transport::EncodedAudioFrame::kMaxNumberOfSamples
) {
88 cast_initialization_status_
= STATUS_INVALID_AUDIO_CONFIGURATION
;
92 CastInitializationStatus
InitializationResult() const {
93 return cast_initialization_status_
;
96 void EncodeAudio(scoped_ptr
<AudioBus
> audio_bus
,
97 const base::TimeTicks
& recorded_time
) {
98 DCHECK_EQ(cast_initialization_status_
, STATUS_AUDIO_INITIALIZED
);
99 DCHECK(!recorded_time
.is_null());
101 // Determine whether |recorded_time| is consistent with the amount of audio
102 // data having been processed in the past. Resolve the underrun problem by
103 // dropping data from the internal buffer and skipping ahead the next
104 // frame's RTP timestamp by the estimated number of frames missed. On the
105 // other hand, don't attempt to resolve overruns: A receiver should
106 // gracefully deal with an excess of audio data.
107 const base::TimeDelta frame_duration
=
108 base::TimeDelta::FromMilliseconds(kFrameDurationMillis
);
109 base::TimeDelta buffer_fill_duration
=
110 buffer_fill_end_
* frame_duration
/ samples_per_frame_
;
111 if (!frame_capture_time_
.is_null()) {
112 const base::TimeDelta amount_ahead_by
=
113 recorded_time
- (frame_capture_time_
+ buffer_fill_duration
);
114 if (amount_ahead_by
>
115 base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis
)) {
116 buffer_fill_end_
= 0;
117 buffer_fill_duration
= base::TimeDelta();
118 const int64 num_frames_missed
= amount_ahead_by
/
119 base::TimeDelta::FromMilliseconds(kFrameDurationMillis
);
120 frame_rtp_timestamp_
+=
121 static_cast<uint32
>(num_frames_missed
* samples_per_frame_
);
122 DVLOG(1) << "Skipping RTP timestamp ahead to account for "
123 << num_frames_missed
* samples_per_frame_
124 << " samples' worth of underrun.";
127 frame_capture_time_
= recorded_time
- buffer_fill_duration
;
129 // Encode all audio in |audio_bus| into zero or more frames.
131 while (src_pos
< audio_bus
->frames()) {
132 const int num_samples_to_xfer
= std::min(
133 samples_per_frame_
- buffer_fill_end_
, audio_bus
->frames() - src_pos
);
134 DCHECK_EQ(audio_bus
->channels(), num_channels_
);
135 TransferSamplesIntoBuffer(
136 audio_bus
.get(), src_pos
, buffer_fill_end_
, num_samples_to_xfer
);
137 src_pos
+= num_samples_to_xfer
;
138 buffer_fill_end_
+= num_samples_to_xfer
;
140 if (buffer_fill_end_
< samples_per_frame_
)
143 scoped_ptr
<transport::EncodedAudioFrame
> audio_frame(
144 new transport::EncodedAudioFrame());
145 audio_frame
->codec
= codec_
;
146 audio_frame
->frame_id
= frame_id_
;
147 audio_frame
->rtp_timestamp
= frame_rtp_timestamp_
;
149 if (EncodeFromFilledBuffer(&audio_frame
->data
)) {
150 LogAudioFrameEncodedEvent(cast_environment_
,
151 cast_environment_
->Clock()->NowTicks(),
152 audio_frame
->rtp_timestamp
,
153 audio_frame
->frame_id
,
154 audio_frame
->data
.size());
155 cast_environment_
->PostTask(
156 CastEnvironment::MAIN
,
158 base::Bind(callback_
,
159 base::Passed(&audio_frame
),
160 frame_capture_time_
));
163 // Reset the internal buffer, frame ID, and timestamps for the next frame.
164 buffer_fill_end_
= 0;
166 frame_rtp_timestamp_
+= samples_per_frame_
;
167 frame_capture_time_
+= frame_duration
;
172 friend class base::RefCountedThreadSafe
<ImplBase
>;
173 virtual ~ImplBase() {}
175 virtual void TransferSamplesIntoBuffer(const AudioBus
* audio_bus
,
177 int buffer_fill_offset
,
178 int num_samples
) = 0;
179 virtual bool EncodeFromFilledBuffer(std::string
* out
) = 0;
181 const scoped_refptr
<CastEnvironment
> cast_environment_
;
182 const transport::AudioCodec codec_
;
183 const int num_channels_
;
184 const int samples_per_frame_
;
185 const FrameEncodedCallback callback_
;
187 // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED.
188 CastInitializationStatus cast_initialization_status_
;
191 // In the case where a call to EncodeAudio() cannot completely fill the
192 // buffer, this points to the position at which to populate data in a later
194 int buffer_fill_end_
;
196 // A counter used to label EncodedAudioFrames.
199 // The RTP timestamp for the next frame of encoded audio. This is defined as
200 // the number of audio samples encoded so far, plus the estimated number of
201 // samples that were missed due to data underruns. A receiver uses this value
202 // to detect gaps in the audio signal data being provided. Per the spec, RTP
203 // timestamp values are allowed to overflow and roll around past zero.
204 uint32 frame_rtp_timestamp_
;
206 // The local system time associated with the start of the next frame of
207 // encoded audio. This value is passed on to a receiver as a reference clock
208 // timestamp for the purposes of synchronizing audio and video. Its
209 // progression is expected to drift relative to the elapsed time implied by
210 // the RTP timestamps.
211 base::TimeTicks frame_capture_time_
;
213 DISALLOW_COPY_AND_ASSIGN(ImplBase
);
216 class AudioEncoder::OpusImpl
: public AudioEncoder::ImplBase
{
218 OpusImpl(const scoped_refptr
<CastEnvironment
>& cast_environment
,
222 const FrameEncodedCallback
& callback
)
223 : ImplBase(cast_environment
,
228 encoder_memory_(new uint8
[opus_encoder_get_size(num_channels
)]),
229 opus_encoder_(reinterpret_cast<OpusEncoder
*>(encoder_memory_
.get())),
230 buffer_(new float[num_channels
* samples_per_frame_
]) {
231 if (ImplBase::cast_initialization_status_
!= STATUS_AUDIO_UNINITIALIZED
)
233 if (opus_encoder_init(opus_encoder_
,
236 OPUS_APPLICATION_AUDIO
) != OPUS_OK
) {
237 ImplBase::cast_initialization_status_
=
238 STATUS_INVALID_AUDIO_CONFIGURATION
;
241 ImplBase::cast_initialization_status_
= STATUS_AUDIO_INITIALIZED
;
244 // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
245 // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
246 // frame size. The opus library authors may, of course, adjust this in
250 CHECK_EQ(opus_encoder_ctl(opus_encoder_
, OPUS_SET_BITRATE(bitrate
)),
255 virtual ~OpusImpl() {}
257 virtual void TransferSamplesIntoBuffer(const AudioBus
* audio_bus
,
259 int buffer_fill_offset
,
260 int num_samples
) OVERRIDE
{
261 // Opus requires channel-interleaved samples in a single array.
262 for (int ch
= 0; ch
< audio_bus
->channels(); ++ch
) {
263 const float* src
= audio_bus
->channel(ch
) + source_offset
;
264 const float* const src_end
= src
+ num_samples
;
265 float* dest
= buffer_
.get() + buffer_fill_offset
* num_channels_
+ ch
;
266 for (; src
< src_end
; ++src
, dest
+= num_channels_
)
271 virtual bool EncodeFromFilledBuffer(std::string
* out
) OVERRIDE
{
272 out
->resize(kOpusMaxPayloadSize
);
273 const opus_int32 result
=
274 opus_encode_float(opus_encoder_
,
277 reinterpret_cast<uint8
*>(string_as_array(out
)),
278 kOpusMaxPayloadSize
);
282 } else if (result
< 0) {
283 LOG(ERROR
) << "Error code from opus_encode_float(): " << result
;
286 // Do nothing: The documentation says that a return value of zero or
287 // one byte means the packet does not need to be transmitted.
292 const scoped_ptr
<uint8
[]> encoder_memory_
;
293 OpusEncoder
* const opus_encoder_
;
294 const scoped_ptr
<float[]> buffer_
;
296 // This is the recommended value, according to documentation in
297 // third_party/opus/src/include/opus.h, so that the Opus encoder does not
298 // degrade the audio due to memory constraints.
300 // Note: Whereas other RTP implementations do not, the cast library is
301 // perfectly capable of transporting larger than MTU-sized audio frames.
302 static const int kOpusMaxPayloadSize
= 4000;
304 DISALLOW_COPY_AND_ASSIGN(OpusImpl
);
307 class AudioEncoder::Pcm16Impl
: public AudioEncoder::ImplBase
{
309 Pcm16Impl(const scoped_refptr
<CastEnvironment
>& cast_environment
,
312 const FrameEncodedCallback
& callback
)
313 : ImplBase(cast_environment
,
318 buffer_(new int16
[num_channels
* samples_per_frame_
]) {
319 if (ImplBase::cast_initialization_status_
!= STATUS_AUDIO_UNINITIALIZED
)
321 cast_initialization_status_
= STATUS_AUDIO_INITIALIZED
;
325 virtual ~Pcm16Impl() {}
327 virtual void TransferSamplesIntoBuffer(const AudioBus
* audio_bus
,
329 int buffer_fill_offset
,
330 int num_samples
) OVERRIDE
{
331 audio_bus
->ToInterleavedPartial(
335 buffer_
.get() + buffer_fill_offset
* num_channels_
);
338 virtual bool EncodeFromFilledBuffer(std::string
* out
) OVERRIDE
{
339 // Output 16-bit PCM integers in big-endian byte order.
340 out
->resize(num_channels_
* samples_per_frame_
* sizeof(int16
));
341 const int16
* src
= buffer_
.get();
342 const int16
* const src_end
= src
+ num_channels_
* samples_per_frame_
;
343 uint16
* dest
= reinterpret_cast<uint16
*>(&out
->at(0));
344 for (; src
< src_end
; ++src
, ++dest
)
345 *dest
= base::HostToNet16(*src
);
350 const scoped_ptr
<int16
[]> buffer_
;
352 DISALLOW_COPY_AND_ASSIGN(Pcm16Impl
);
355 AudioEncoder::AudioEncoder(
356 const scoped_refptr
<CastEnvironment
>& cast_environment
,
357 const AudioSenderConfig
& audio_config
,
358 const FrameEncodedCallback
& frame_encoded_callback
)
359 : cast_environment_(cast_environment
) {
360 // Note: It doesn't matter which thread constructs AudioEncoder, just so long
361 // as all calls to InsertAudio() are by the same thread.
362 insert_thread_checker_
.DetachFromThread();
363 switch (audio_config
.codec
) {
364 case transport::kOpus
:
365 impl_
= new OpusImpl(cast_environment
,
366 audio_config
.channels
,
367 audio_config
.frequency
,
368 audio_config
.bitrate
,
369 frame_encoded_callback
);
371 case transport::kPcm16
:
372 impl_
= new Pcm16Impl(cast_environment
,
373 audio_config
.channels
,
374 audio_config
.frequency
,
375 frame_encoded_callback
);
378 NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
383 AudioEncoder::~AudioEncoder() {}
385 CastInitializationStatus
AudioEncoder::InitializationResult() const {
386 DCHECK(insert_thread_checker_
.CalledOnValidThread());
388 return impl_
->InitializationResult();
390 return STATUS_UNSUPPORTED_AUDIO_CODEC
;
393 void AudioEncoder::InsertAudio(scoped_ptr
<AudioBus
> audio_bus
,
394 const base::TimeTicks
& recorded_time
) {
395 DCHECK(insert_thread_checker_
.CalledOnValidThread());
396 DCHECK(audio_bus
.get());
401 cast_environment_
->PostTask(CastEnvironment::AUDIO
,
403 base::Bind(&AudioEncoder::ImplBase::EncodeAudio
,
405 base::Passed(&audio_bus
),