Roll ANGLE bc75f36:ef9d63e
[chromium-blink-merge.git] / content / renderer / media / media_stream_audio_processor.cc
blob915b4d30753f88b8040c0fbf49ee551bf6a7772c
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/media/media_stream_audio_processor.h"
7 #include "base/command_line.h"
8 #include "base/debug/trace_event.h"
9 #if defined(OS_MACOSX)
10 #include "base/metrics/field_trial.h"
11 #endif
12 #include "base/metrics/histogram.h"
13 #include "content/public/common/content_switches.h"
14 #include "content/renderer/media/media_stream_audio_processor_options.h"
15 #include "content/renderer/media/rtc_media_constraints.h"
16 #include "content/renderer/media/webrtc_audio_device_impl.h"
17 #include "media/audio/audio_parameters.h"
18 #include "media/base/audio_converter.h"
19 #include "media/base/audio_fifo.h"
20 #include "media/base/channel_layout.h"
21 #include "third_party/WebKit/public/platform/WebMediaConstraints.h"
22 #include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h"
23 #include "third_party/webrtc/modules/audio_processing/typing_detection.h"
25 namespace content {
27 namespace {
29 using webrtc::AudioProcessing;
31 #if defined(OS_ANDROID)
32 const int kAudioProcessingSampleRate = 16000;
33 #else
34 const int kAudioProcessingSampleRate = 32000;
35 #endif
36 const int kAudioProcessingNumberOfChannels = 1;
38 AudioProcessing::ChannelLayout MapLayout(media::ChannelLayout media_layout) {
39 switch (media_layout) {
40 case media::CHANNEL_LAYOUT_MONO:
41 return AudioProcessing::kMono;
42 case media::CHANNEL_LAYOUT_STEREO:
43 return AudioProcessing::kStereo;
44 case media::CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC:
45 return AudioProcessing::kStereoAndKeyboard;
46 default:
47 NOTREACHED() << "Layout not supported: " << media_layout;
48 return AudioProcessing::kMono;
52 AudioProcessing::ChannelLayout ChannelsToLayout(int num_channels) {
53 switch (num_channels) {
54 case 1:
55 return AudioProcessing::kMono;
56 case 2:
57 return AudioProcessing::kStereo;
58 default:
59 NOTREACHED() << "Channels not supported: " << num_channels;
60 return AudioProcessing::kMono;
64 // Used by UMA histograms and entries shouldn't be re-ordered or removed.
65 enum AudioTrackProcessingStates {
66 AUDIO_PROCESSING_ENABLED = 0,
67 AUDIO_PROCESSING_DISABLED,
68 AUDIO_PROCESSING_IN_WEBRTC,
69 AUDIO_PROCESSING_MAX
72 void RecordProcessingState(AudioTrackProcessingStates state) {
73 UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates",
74 state, AUDIO_PROCESSING_MAX);
77 } // namespace
79 // Wraps AudioBus to provide access to the array of channel pointers, since this
80 // is the type webrtc::AudioProcessing deals in. The array is refreshed on every
81 // channel_ptrs() call, and will be valid until the underlying AudioBus pointers
82 // are changed, e.g. through calls to SetChannelData() or SwapChannels().
84 // All methods are called on one of the capture or render audio threads
85 // exclusively.
86 class MediaStreamAudioBus {
87 public:
88 MediaStreamAudioBus(int channels, int frames)
89 : bus_(media::AudioBus::Create(channels, frames)),
90 channel_ptrs_(new float*[channels]) {
91 // May be created in the main render thread and used in the audio threads.
92 thread_checker_.DetachFromThread();
95 media::AudioBus* bus() {
96 DCHECK(thread_checker_.CalledOnValidThread());
97 return bus_.get();
100 float* const* channel_ptrs() {
101 DCHECK(thread_checker_.CalledOnValidThread());
102 for (int i = 0; i < bus_->channels(); ++i) {
103 channel_ptrs_[i] = bus_->channel(i);
105 return channel_ptrs_.get();
108 private:
109 base::ThreadChecker thread_checker_;
110 scoped_ptr<media::AudioBus> bus_;
111 scoped_ptr<float*[]> channel_ptrs_;
114 // Wraps AudioFifo to provide a cleaner interface to MediaStreamAudioProcessor.
115 // It avoids the FIFO when the source and destination frames match. All methods
116 // are called on one of the capture or render audio threads exclusively.
117 class MediaStreamAudioFifo {
118 public:
119 MediaStreamAudioFifo(int channels, int source_frames,
120 int destination_frames)
121 : source_frames_(source_frames),
122 destination_(new MediaStreamAudioBus(channels, destination_frames)),
123 data_available_(false) {
124 if (source_frames != destination_frames) {
125 // Since we require every Push to be followed by as many Consumes as
126 // possible, twice the larger of the two is a (probably) loose upper bound
127 // on the FIFO size.
128 const int fifo_frames = 2 * std::max(source_frames, destination_frames);
129 fifo_.reset(new media::AudioFifo(channels, fifo_frames));
132 // May be created in the main render thread and used in the audio threads.
133 thread_checker_.DetachFromThread();
136 void Push(const media::AudioBus* source) {
137 DCHECK(thread_checker_.CalledOnValidThread());
138 DCHECK_EQ(source->channels(), destination_->bus()->channels());
139 DCHECK_EQ(source->frames(), source_frames_);
141 if (fifo_) {
142 fifo_->Push(source);
143 } else {
144 source->CopyTo(destination_->bus());
145 data_available_ = true;
149 // Returns true if there are destination_frames() of data available to be
150 // consumed, and otherwise false.
151 bool Consume(MediaStreamAudioBus** destination) {
152 DCHECK(thread_checker_.CalledOnValidThread());
154 if (fifo_) {
155 if (fifo_->frames() < destination_->bus()->frames())
156 return false;
158 fifo_->Consume(destination_->bus(), 0, destination_->bus()->frames());
159 } else {
160 if (!data_available_)
161 return false;
163 // The data was already copied to |destination_| in this case.
164 data_available_ = false;
167 *destination = destination_.get();
168 return true;
171 private:
172 base::ThreadChecker thread_checker_;
173 const int source_frames_; // For a DCHECK.
174 scoped_ptr<MediaStreamAudioBus> destination_;
175 scoped_ptr<media::AudioFifo> fifo_;
176 // Only used when the FIFO is disabled;
177 bool data_available_;
180 bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() {
181 return !CommandLine::ForCurrentProcess()->HasSwitch(
182 switches::kDisableAudioTrackProcessing);
185 MediaStreamAudioProcessor::MediaStreamAudioProcessor(
186 const blink::WebMediaConstraints& constraints,
187 int effects,
188 WebRtcPlayoutDataSource* playout_data_source)
189 : render_delay_ms_(0),
190 playout_data_source_(playout_data_source),
191 audio_mirroring_(false),
192 typing_detected_(false),
193 stopped_(false) {
194 capture_thread_checker_.DetachFromThread();
195 render_thread_checker_.DetachFromThread();
196 InitializeAudioProcessingModule(constraints, effects);
197 if (IsAudioTrackProcessingEnabled()) {
198 aec_dump_message_filter_ = AecDumpMessageFilter::Get();
199 // In unit tests not creating a message filter, |aec_dump_message_filter_|
200 // will be NULL. We can just ignore that. Other unit tests and browser tests
201 // ensure that we do get the filter when we should.
202 if (aec_dump_message_filter_)
203 aec_dump_message_filter_->AddDelegate(this);
207 MediaStreamAudioProcessor::~MediaStreamAudioProcessor() {
208 DCHECK(main_thread_checker_.CalledOnValidThread());
209 Stop();
212 void MediaStreamAudioProcessor::OnCaptureFormatChanged(
213 const media::AudioParameters& input_format) {
214 DCHECK(main_thread_checker_.CalledOnValidThread());
215 // There is no need to hold a lock here since the caller guarantees that
216 // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks
217 // on the capture thread.
218 InitializeCaptureFifo(input_format);
220 // Reset the |capture_thread_checker_| since the capture data will come from
221 // a new capture thread.
222 capture_thread_checker_.DetachFromThread();
225 void MediaStreamAudioProcessor::PushCaptureData(
226 const media::AudioBus* audio_source) {
227 DCHECK(capture_thread_checker_.CalledOnValidThread());
229 capture_fifo_->Push(audio_source);
232 bool MediaStreamAudioProcessor::ProcessAndConsumeData(
233 base::TimeDelta capture_delay, int volume, bool key_pressed,
234 int* new_volume, int16** out) {
235 DCHECK(capture_thread_checker_.CalledOnValidThread());
236 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData");
238 MediaStreamAudioBus* process_bus;
239 if (!capture_fifo_->Consume(&process_bus))
240 return false;
242 // Use the process bus directly if audio processing is disabled.
243 MediaStreamAudioBus* output_bus = process_bus;
244 *new_volume = 0;
245 if (audio_processing_) {
246 output_bus = output_bus_.get();
247 *new_volume = ProcessData(process_bus->channel_ptrs(),
248 process_bus->bus()->frames(), capture_delay,
249 volume, key_pressed, output_bus->channel_ptrs());
252 // Swap channels before interleaving the data.
253 if (audio_mirroring_ &&
254 output_format_.channel_layout() == media::CHANNEL_LAYOUT_STEREO) {
255 // Swap the first and second channels.
256 output_bus->bus()->SwapChannels(0, 1);
259 output_bus->bus()->ToInterleaved(output_bus->bus()->frames(),
260 sizeof(int16),
261 output_data_.get());
262 *out = output_data_.get();
264 return true;
267 void MediaStreamAudioProcessor::Stop() {
268 DCHECK(main_thread_checker_.CalledOnValidThread());
269 if (stopped_)
270 return;
272 stopped_ = true;
274 if (aec_dump_message_filter_) {
275 aec_dump_message_filter_->RemoveDelegate(this);
276 aec_dump_message_filter_ = NULL;
279 if (!audio_processing_.get())
280 return;
282 StopEchoCancellationDump(audio_processing_.get());
284 if (playout_data_source_) {
285 playout_data_source_->RemovePlayoutSink(this);
286 playout_data_source_ = NULL;
290 const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const {
291 return input_format_;
294 const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const {
295 return output_format_;
298 void MediaStreamAudioProcessor::OnAecDumpFile(
299 const IPC::PlatformFileForTransit& file_handle) {
300 DCHECK(main_thread_checker_.CalledOnValidThread());
302 base::File file = IPC::PlatformFileForTransitToFile(file_handle);
303 DCHECK(file.IsValid());
305 if (audio_processing_)
306 StartEchoCancellationDump(audio_processing_.get(), file.Pass());
307 else
308 file.Close();
311 void MediaStreamAudioProcessor::OnDisableAecDump() {
312 DCHECK(main_thread_checker_.CalledOnValidThread());
313 if (audio_processing_)
314 StopEchoCancellationDump(audio_processing_.get());
317 void MediaStreamAudioProcessor::OnIpcClosing() {
318 DCHECK(main_thread_checker_.CalledOnValidThread());
319 aec_dump_message_filter_ = NULL;
322 void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus,
323 int sample_rate,
324 int audio_delay_milliseconds) {
325 DCHECK(render_thread_checker_.CalledOnValidThread());
326 DCHECK(audio_processing_->echo_control_mobile()->is_enabled() ^
327 audio_processing_->echo_cancellation()->is_enabled());
329 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData");
330 DCHECK_LT(audio_delay_milliseconds,
331 std::numeric_limits<base::subtle::Atomic32>::max());
332 base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds);
334 InitializeRenderFifoIfNeeded(sample_rate, audio_bus->channels(),
335 audio_bus->frames());
337 render_fifo_->Push(audio_bus);
338 MediaStreamAudioBus* analysis_bus;
339 while (render_fifo_->Consume(&analysis_bus)) {
340 audio_processing_->AnalyzeReverseStream(
341 analysis_bus->channel_ptrs(),
342 analysis_bus->bus()->frames(),
343 sample_rate,
344 ChannelsToLayout(audio_bus->channels()));
348 void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() {
349 DCHECK(main_thread_checker_.CalledOnValidThread());
350 // There is no need to hold a lock here since the caller guarantees that
351 // there is no more OnPlayoutData() callback on the render thread.
352 render_thread_checker_.DetachFromThread();
353 render_fifo_.reset();
356 void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) {
357 stats->typing_noise_detected =
358 (base::subtle::Acquire_Load(&typing_detected_) != false);
359 GetAecStats(audio_processing_.get(), stats);
362 void MediaStreamAudioProcessor::InitializeAudioProcessingModule(
363 const blink::WebMediaConstraints& constraints, int effects) {
364 DCHECK(!audio_processing_);
366 MediaAudioConstraints audio_constraints(constraints, effects);
368 // Audio mirroring can be enabled even though audio processing is otherwise
369 // disabled.
370 audio_mirroring_ = audio_constraints.GetProperty(
371 MediaAudioConstraints::kGoogAudioMirroring);
373 if (!IsAudioTrackProcessingEnabled()) {
374 RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC);
375 return;
378 #if defined(OS_IOS)
379 // On iOS, VPIO provides built-in AGC and AEC.
380 const bool echo_cancellation = false;
381 const bool goog_agc = false;
382 #else
383 const bool echo_cancellation =
384 audio_constraints.GetEchoCancellationProperty();
385 const bool goog_agc = audio_constraints.GetProperty(
386 MediaAudioConstraints::kGoogAutoGainControl);
387 #endif
389 #if defined(OS_IOS) || defined(OS_ANDROID)
390 const bool goog_experimental_aec = false;
391 const bool goog_typing_detection = false;
392 #else
393 const bool goog_experimental_aec = audio_constraints.GetProperty(
394 MediaAudioConstraints::kGoogExperimentalEchoCancellation);
395 const bool goog_typing_detection = audio_constraints.GetProperty(
396 MediaAudioConstraints::kGoogTypingNoiseDetection);
397 #endif
399 const bool goog_ns = audio_constraints.GetProperty(
400 MediaAudioConstraints::kGoogNoiseSuppression);
401 const bool goog_experimental_ns = audio_constraints.GetProperty(
402 MediaAudioConstraints::kGoogExperimentalNoiseSuppression);
403 const bool goog_high_pass_filter = audio_constraints.GetProperty(
404 MediaAudioConstraints::kGoogHighpassFilter);
406 // Return immediately if no goog constraint is enabled.
407 if (!echo_cancellation && !goog_experimental_aec && !goog_ns &&
408 !goog_high_pass_filter && !goog_typing_detection &&
409 !goog_agc && !goog_experimental_ns) {
410 RecordProcessingState(AUDIO_PROCESSING_DISABLED);
411 return;
414 // Experimental options provided at creation.
415 webrtc::Config config;
416 if (goog_experimental_aec)
417 config.Set<webrtc::DelayCorrection>(new webrtc::DelayCorrection(true));
418 if (goog_experimental_ns)
419 config.Set<webrtc::ExperimentalNs>(new webrtc::ExperimentalNs(true));
420 #if defined(OS_MACOSX)
421 if (base::FieldTrialList::FindFullName("NoReportedDelayOnMac") == "Enabled")
422 config.Set<webrtc::ReportedDelay>(new webrtc::ReportedDelay(false));
423 #endif
425 // Create and configure the webrtc::AudioProcessing.
426 audio_processing_.reset(webrtc::AudioProcessing::Create(config));
428 // Enable the audio processing components.
429 if (echo_cancellation) {
430 EnableEchoCancellation(audio_processing_.get());
432 if (playout_data_source_)
433 playout_data_source_->AddPlayoutSink(this);
436 if (goog_ns)
437 EnableNoiseSuppression(audio_processing_.get());
439 if (goog_high_pass_filter)
440 EnableHighPassFilter(audio_processing_.get());
442 if (goog_typing_detection) {
443 // TODO(xians): Remove this |typing_detector_| after the typing suppression
444 // is enabled by default.
445 typing_detector_.reset(new webrtc::TypingDetection());
446 EnableTypingDetection(audio_processing_.get(), typing_detector_.get());
449 if (goog_agc)
450 EnableAutomaticGainControl(audio_processing_.get());
452 RecordProcessingState(AUDIO_PROCESSING_ENABLED);
455 void MediaStreamAudioProcessor::InitializeCaptureFifo(
456 const media::AudioParameters& input_format) {
457 DCHECK(main_thread_checker_.CalledOnValidThread());
458 DCHECK(input_format.IsValid());
459 input_format_ = input_format;
461 // TODO(ajm): For now, we assume fixed parameters for the output when audio
462 // processing is enabled, to match the previous behavior. We should either
463 // use the input parameters (in which case, audio processing will convert
464 // at output) or ideally, have a backchannel from the sink to know what
465 // format it would prefer.
466 const int output_sample_rate = audio_processing_ ?
467 kAudioProcessingSampleRate : input_format.sample_rate();
468 const media::ChannelLayout output_channel_layout = audio_processing_ ?
469 media::GuessChannelLayout(kAudioProcessingNumberOfChannels) :
470 input_format.channel_layout();
472 // webrtc::AudioProcessing requires a 10 ms chunk size. We use this native
473 // size when processing is enabled. When disabled we use the same size as
474 // the source if less than 10 ms.
476 // TODO(ajm): This conditional buffer size appears to be assuming knowledge of
477 // the sink based on the source parameters. PeerConnection sinks seem to want
478 // 10 ms chunks regardless, while WebAudio sinks want less, and we're assuming
479 // we can identify WebAudio sinks by the input chunk size. Less fragile would
480 // be to have the sink actually tell us how much it wants (as in the above
481 // TODO).
482 int processing_frames = input_format.sample_rate() / 100;
483 int output_frames = output_sample_rate / 100;
484 if (!audio_processing_ && input_format.frames_per_buffer() < output_frames) {
485 processing_frames = input_format.frames_per_buffer();
486 output_frames = processing_frames;
489 output_format_ = media::AudioParameters(
490 media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
491 output_channel_layout,
492 output_sample_rate,
494 output_frames);
496 capture_fifo_.reset(
497 new MediaStreamAudioFifo(input_format.channels(),
498 input_format.frames_per_buffer(),
499 processing_frames));
501 if (audio_processing_) {
502 output_bus_.reset(new MediaStreamAudioBus(output_format_.channels(),
503 output_frames));
505 output_data_.reset(new int16[output_format_.GetBytesPerBuffer() /
506 sizeof(int16)]);
509 void MediaStreamAudioProcessor::InitializeRenderFifoIfNeeded(
510 int sample_rate, int number_of_channels, int frames_per_buffer) {
511 DCHECK(render_thread_checker_.CalledOnValidThread());
512 if (render_fifo_.get() &&
513 render_format_.sample_rate() == sample_rate &&
514 render_format_.channels() == number_of_channels &&
515 render_format_.frames_per_buffer() == frames_per_buffer) {
516 // Do nothing if the |render_fifo_| has been setup properly.
517 return;
520 render_format_ = media::AudioParameters(
521 media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
522 media::GuessChannelLayout(number_of_channels),
523 sample_rate,
525 frames_per_buffer);
527 const int analysis_frames = sample_rate / 100; // 10 ms chunks.
528 render_fifo_.reset(
529 new MediaStreamAudioFifo(number_of_channels,
530 frames_per_buffer,
531 analysis_frames));
534 int MediaStreamAudioProcessor::ProcessData(const float* const* process_ptrs,
535 int process_frames,
536 base::TimeDelta capture_delay,
537 int volume,
538 bool key_pressed,
539 float* const* output_ptrs) {
540 DCHECK(audio_processing_);
541 DCHECK(capture_thread_checker_.CalledOnValidThread());
543 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData");
545 base::subtle::Atomic32 render_delay_ms =
546 base::subtle::Acquire_Load(&render_delay_ms_);
547 int64 capture_delay_ms = capture_delay.InMilliseconds();
548 DCHECK_LT(capture_delay_ms,
549 std::numeric_limits<base::subtle::Atomic32>::max());
550 int total_delay_ms = capture_delay_ms + render_delay_ms;
551 if (total_delay_ms > 300) {
552 LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms
553 << "ms; render delay: " << render_delay_ms << "ms";
556 webrtc::AudioProcessing* ap = audio_processing_.get();
557 ap->set_stream_delay_ms(total_delay_ms);
559 DCHECK_LE(volume, WebRtcAudioDeviceImpl::kMaxVolumeLevel);
560 webrtc::GainControl* agc = ap->gain_control();
561 int err = agc->set_stream_analog_level(volume);
562 DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err;
564 ap->set_stream_key_pressed(key_pressed);
566 err = ap->ProcessStream(process_ptrs,
567 process_frames,
568 input_format_.sample_rate(),
569 MapLayout(input_format_.channel_layout()),
570 output_format_.sample_rate(),
571 MapLayout(output_format_.channel_layout()),
572 output_ptrs);
573 DCHECK_EQ(err, 0) << "ProcessStream() error: " << err;
575 if (typing_detector_) {
576 webrtc::VoiceDetection* vad = ap->voice_detection();
577 DCHECK(vad->is_enabled());
578 bool detected = typing_detector_->Process(key_pressed,
579 vad->stream_has_voice());
580 base::subtle::Release_Store(&typing_detected_, detected);
583 // Return 0 if the volume hasn't been changed, and otherwise the new volume.
584 return (agc->stream_analog_level() == volume) ?
585 0 : agc->stream_analog_level();
588 } // namespace content