Report errors from ChromiumEnv::GetChildren in Posix.
[chromium-blink-merge.git] / media / audio / win / audio_unified_win.cc
blob5c1594ef8f8d5e98499ff5bcc055a4cd60e66132
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/audio/win/audio_unified_win.h"
7 #include <Functiondiscoverykeys_devpkey.h>
9 #include "base/debug/trace_event.h"
10 #ifndef NDEBUG
11 #include "base/file_util.h"
12 #include "base/path_service.h"
13 #endif
14 #include "base/time/time.h"
15 #include "base/win/scoped_com_initializer.h"
16 #include "media/audio/win/audio_manager_win.h"
17 #include "media/audio/win/avrt_wrapper_win.h"
18 #include "media/audio/win/core_audio_util_win.h"
20 using base::win::ScopedComPtr;
21 using base::win::ScopedCOMInitializer;
22 using base::win::ScopedCoMem;
24 // Smoothing factor in exponential smoothing filter where 0 < alpha < 1.
25 // Larger values of alpha reduce the level of smoothing.
26 // See http://en.wikipedia.org/wiki/Exponential_smoothing for details.
27 static const double kAlpha = 0.1;
29 // Compute a rate compensation which always attracts us back to a specified
30 // target level over a period of |kCorrectionTimeSeconds|.
31 static const double kCorrectionTimeSeconds = 0.1;
33 #ifndef NDEBUG
34 // Max number of columns in the output text file |kUnifiedAudioDebugFileName|.
35 // See LogElementNames enumerator for details on what each column represents.
36 static const size_t kMaxNumSampleTypes = 4;
38 static const size_t kMaxNumParams = 2;
40 // Max number of rows in the output file |kUnifiedAudioDebugFileName|.
41 // Each row corresponds to one set of sample values for (approximately) the
42 // same time instant (stored in the first column).
43 static const size_t kMaxFileSamples = 10000;
45 // Name of output debug file used for off-line analysis of measurements which
46 // can be utilized for performance tuning of this class.
47 static const char kUnifiedAudioDebugFileName[] = "unified_win_debug.txt";
49 // Name of output debug file used for off-line analysis of measurements.
50 // This file will contain a list of audio parameters.
51 static const char kUnifiedAudioParamsFileName[] = "unified_win_params.txt";
52 #endif
54 typedef uint32 ChannelConfig;
56 // Retrieves an integer mask which corresponds to the channel layout the
57 // audio engine uses for its internal processing/mixing of shared-mode
58 // streams. This mask indicates which channels are present in the multi-
59 // channel stream. The least significant bit corresponds with the Front Left
60 // speaker, the next least significant bit corresponds to the Front Right
61 // speaker, and so on, continuing in the order defined in KsMedia.h.
62 // See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx
63 // for more details.
64 static ChannelConfig GetChannelConfig(EDataFlow data_flow) {
65 WAVEFORMATPCMEX format;
66 return SUCCEEDED(media::CoreAudioUtil::GetDefaultSharedModeMixFormat(
67 data_flow, eConsole, &format)) ?
68 static_cast<int>(format.dwChannelMask) : 0;
71 // Use the acquired IAudioClock interface to derive a time stamp of the audio
72 // sample which is currently playing through the speakers.
73 static double SpeakerStreamPosInMilliseconds(IAudioClock* clock) {
74 UINT64 device_frequency = 0, position = 0;
75 if (FAILED(clock->GetFrequency(&device_frequency)) ||
76 FAILED(clock->GetPosition(&position, NULL))) {
77 return 0.0;
79 return base::Time::kMillisecondsPerSecond *
80 (static_cast<double>(position) / device_frequency);
83 // Get a time stamp in milliseconds given number of audio frames in |num_frames|
84 // using the current sample rate |fs| as scale factor.
85 // Example: |num_frames| = 960 and |fs| = 48000 => 20 [ms].
86 static double CurrentStreamPosInMilliseconds(UINT64 num_frames, DWORD fs) {
87 return base::Time::kMillisecondsPerSecond *
88 (static_cast<double>(num_frames) / fs);
91 // Convert a timestamp in milliseconds to byte units given the audio format
92 // in |format|.
93 // Example: |ts_milliseconds| equals 10, sample rate is 48000 and frame size
94 // is 4 bytes per audio frame => 480 * 4 = 1920 [bytes].
95 static int MillisecondsToBytes(double ts_milliseconds,
96 const WAVEFORMATPCMEX& format) {
97 double seconds = ts_milliseconds / base::Time::kMillisecondsPerSecond;
98 return static_cast<int>(seconds * format.Format.nSamplesPerSec *
99 format.Format.nBlockAlign + 0.5);
102 // Convert frame count to milliseconds given the audio format in |format|.
103 static double FrameCountToMilliseconds(int num_frames,
104 const WAVEFORMATPCMEX& format) {
105 return (base::Time::kMillisecondsPerSecond * num_frames) /
106 static_cast<double>(format.Format.nSamplesPerSec);
109 namespace media {
111 WASAPIUnifiedStream::WASAPIUnifiedStream(AudioManagerWin* manager,
112 const AudioParameters& params,
113 const std::string& input_device_id)
114 : creating_thread_id_(base::PlatformThread::CurrentId()),
115 manager_(manager),
116 params_(params),
117 input_channels_(params.input_channels()),
118 output_channels_(params.channels()),
119 input_device_id_(input_device_id),
120 share_mode_(CoreAudioUtil::GetShareMode()),
121 opened_(false),
122 volume_(1.0),
123 output_buffer_size_frames_(0),
124 input_buffer_size_frames_(0),
125 endpoint_render_buffer_size_frames_(0),
126 endpoint_capture_buffer_size_frames_(0),
127 num_written_frames_(0),
128 total_delay_ms_(0.0),
129 total_delay_bytes_(0),
130 source_(NULL),
131 input_callback_received_(false),
132 io_sample_rate_ratio_(1),
133 target_fifo_frames_(0),
134 average_delta_(0),
135 fifo_rate_compensation_(1),
136 update_output_delay_(false),
137 capture_delay_ms_(0) {
138 TRACE_EVENT0("audio", "WASAPIUnifiedStream::WASAPIUnifiedStream");
139 VLOG(1) << "WASAPIUnifiedStream::WASAPIUnifiedStream()";
140 DCHECK(manager_);
142 VLOG(1) << "Input channels : " << input_channels_;
143 VLOG(1) << "Output channels: " << output_channels_;
144 VLOG(1) << "Sample rate : " << params_.sample_rate();
145 VLOG(1) << "Buffer size : " << params.frames_per_buffer();
147 #ifndef NDEBUG
148 input_time_stamps_.reset(new int64[kMaxFileSamples]);
149 num_frames_in_fifo_.reset(new int[kMaxFileSamples]);
150 resampler_margin_.reset(new int[kMaxFileSamples]);
151 fifo_rate_comps_.reset(new double[kMaxFileSamples]);
152 num_elements_.reset(new int[kMaxNumSampleTypes]);
153 std::fill(num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes, 0);
154 input_params_.reset(new int[kMaxNumParams]);
155 output_params_.reset(new int[kMaxNumParams]);
156 #endif
158 DVLOG_IF(1, share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE)
159 << "Core Audio (WASAPI) EXCLUSIVE MODE is enabled.";
161 // Load the Avrt DLL if not already loaded. Required to support MMCSS.
162 bool avrt_init = avrt::Initialize();
163 DCHECK(avrt_init) << "Failed to load the avrt.dll";
165 // All events are auto-reset events and non-signaled initially.
167 // Create the event which the audio engine will signal each time a buffer
168 // has been recorded.
169 capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
171 // Create the event which will be set in Stop() when straeming shall stop.
172 stop_streaming_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
175 WASAPIUnifiedStream::~WASAPIUnifiedStream() {
176 VLOG(1) << "WASAPIUnifiedStream::~WASAPIUnifiedStream()";
177 #ifndef NDEBUG
178 base::FilePath data_file_name;
179 PathService::Get(base::DIR_EXE, &data_file_name);
180 data_file_name = data_file_name.AppendASCII(kUnifiedAudioDebugFileName);
181 data_file_ = file_util::OpenFile(data_file_name, "wt");
182 DVLOG(1) << ">> Output file " << data_file_name.value() << " is created.";
184 size_t n = 0;
185 size_t elements_to_write = *std::min_element(
186 num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes);
187 while (n < elements_to_write) {
188 fprintf(data_file_, "%I64d %d %d %10.9f\n",
189 input_time_stamps_[n],
190 num_frames_in_fifo_[n],
191 resampler_margin_[n],
192 fifo_rate_comps_[n]);
193 ++n;
195 file_util::CloseFile(data_file_);
197 base::FilePath param_file_name;
198 PathService::Get(base::DIR_EXE, &param_file_name);
199 param_file_name = param_file_name.AppendASCII(kUnifiedAudioParamsFileName);
200 param_file_ = file_util::OpenFile(param_file_name, "wt");
201 DVLOG(1) << ">> Output file " << param_file_name.value() << " is created.";
202 fprintf(param_file_, "%d %d\n", input_params_[0], input_params_[1]);
203 fprintf(param_file_, "%d %d\n", output_params_[0], output_params_[1]);
204 file_util::CloseFile(param_file_);
205 #endif
208 bool WASAPIUnifiedStream::Open() {
209 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Open");
210 DVLOG(1) << "WASAPIUnifiedStream::Open()";
211 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
212 if (opened_)
213 return true;
215 AudioParameters hw_output_params;
216 HRESULT hr = CoreAudioUtil::GetPreferredAudioParameters(
217 eRender, eConsole, &hw_output_params);
218 if (FAILED(hr)) {
219 LOG(ERROR) << "Failed to get preferred output audio parameters.";
220 return false;
223 AudioParameters hw_input_params;
224 if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) {
225 // Query native parameters for the default capture device.
226 hr = CoreAudioUtil::GetPreferredAudioParameters(
227 eCapture, eConsole, &hw_input_params);
228 } else {
229 // Query native parameters for the capture device given by
230 // |input_device_id_|.
231 hr = CoreAudioUtil::GetPreferredAudioParameters(
232 input_device_id_, &hw_input_params);
234 if (FAILED(hr)) {
235 LOG(ERROR) << "Failed to get preferred input audio parameters.";
236 return false;
239 // It is currently only possible to open up the output audio device using
240 // the native number of channels.
241 if (output_channels_ != hw_output_params.channels()) {
242 LOG(ERROR) << "Audio device does not support requested output channels.";
243 return false;
246 // It is currently only possible to open up the input audio device using
247 // the native number of channels. If the client asks for a higher channel
248 // count, we will do channel upmixing in this class. The most typical
249 // example is that the client provides stereo but the hardware can only be
250 // opened in mono mode. We will do mono to stereo conversion in this case.
251 if (input_channels_ < hw_input_params.channels()) {
252 LOG(ERROR) << "Audio device does not support requested input channels.";
253 return false;
254 } else if (input_channels_ > hw_input_params.channels()) {
255 ChannelLayout input_layout =
256 GuessChannelLayout(hw_input_params.channels());
257 ChannelLayout output_layout = GuessChannelLayout(input_channels_);
258 channel_mixer_.reset(new ChannelMixer(input_layout, output_layout));
259 DVLOG(1) << "Remixing input channel layout from " << input_layout
260 << " to " << output_layout << "; from "
261 << hw_input_params.channels() << " channels to "
262 << input_channels_;
265 if (hw_output_params.sample_rate() != params_.sample_rate()) {
266 LOG(ERROR) << "Requested sample-rate: " << params_.sample_rate()
267 << " must match the hardware sample-rate: "
268 << hw_output_params.sample_rate();
269 return false;
272 if (hw_output_params.frames_per_buffer() != params_.frames_per_buffer()) {
273 LOG(ERROR) << "Requested buffer size: " << params_.frames_per_buffer()
274 << " must match the hardware buffer size: "
275 << hw_output_params.frames_per_buffer();
276 return false;
279 // Set up WAVEFORMATPCMEX structures for input and output given the specified
280 // audio parameters.
281 SetIOFormats(hw_input_params, params_);
283 // Create the input and output busses.
284 input_bus_ = AudioBus::Create(
285 hw_input_params.channels(), input_buffer_size_frames_);
286 output_bus_ = AudioBus::Create(params_);
288 // One extra bus is needed for the input channel mixing case.
289 if (channel_mixer_) {
290 DCHECK_LT(hw_input_params.channels(), input_channels_);
291 // The size of the |channel_bus_| must be the same as the size of the
292 // output bus to ensure that the channel manager can deal with both
293 // resampled and non-resampled data as input.
294 channel_bus_ = AudioBus::Create(
295 input_channels_, params_.frames_per_buffer());
298 // Check if FIFO and resampling is required to match the input rate to the
299 // output rate. If so, a special thread loop, optimized for this case, will
300 // be used. This mode is also called varispeed mode.
301 // Note that we can also use this mode when input and output rates are the
302 // same but native buffer sizes differ (can happen if two different audio
303 // devices are used). For this case, the resampler uses a target ratio of
304 // 1.0 but SetRatio is called to compensate for clock-drift. The FIFO is
305 // required to compensate for the difference in buffer sizes.
306 // TODO(henrika): we could perhaps improve the performance for the second
307 // case here by only using the FIFO and avoid resampling. Not sure how much
308 // that would give and we risk not compensation for clock drift.
309 if (hw_input_params.sample_rate() != params_.sample_rate() ||
310 hw_input_params.frames_per_buffer() != params_.frames_per_buffer()) {
311 DoVarispeedInitialization(hw_input_params, params_);
314 // Render side (event driven only in varispeed mode):
316 ScopedComPtr<IAudioClient> audio_output_client =
317 CoreAudioUtil::CreateDefaultClient(eRender, eConsole);
318 if (!audio_output_client)
319 return false;
321 if (!CoreAudioUtil::IsFormatSupported(audio_output_client,
322 share_mode_,
323 &output_format_)) {
324 return false;
327 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
328 // The |render_event_| will be NULL unless varispeed mode is utilized.
329 hr = CoreAudioUtil::SharedModeInitialize(
330 audio_output_client, &output_format_, render_event_.Get(),
331 &endpoint_render_buffer_size_frames_);
332 } else {
333 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
335 if (FAILED(hr))
336 return false;
338 ScopedComPtr<IAudioRenderClient> audio_render_client =
339 CoreAudioUtil::CreateRenderClient(audio_output_client);
340 if (!audio_render_client)
341 return false;
343 // Capture side (always event driven but format depends on varispeed or not):
345 ScopedComPtr<IAudioClient> audio_input_client;
346 if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) {
347 audio_input_client = CoreAudioUtil::CreateDefaultClient(eCapture, eConsole);
348 } else {
349 ScopedComPtr<IMMDevice> audio_input_device(
350 CoreAudioUtil::CreateDevice(input_device_id_));
351 audio_input_client = CoreAudioUtil::CreateClient(audio_input_device);
353 if (!audio_input_client)
354 return false;
356 if (!CoreAudioUtil::IsFormatSupported(audio_input_client,
357 share_mode_,
358 &input_format_)) {
359 return false;
362 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
363 // Include valid event handle for event-driven initialization.
364 // The input side is always event driven independent of if varispeed is
365 // used or not.
366 hr = CoreAudioUtil::SharedModeInitialize(
367 audio_input_client, &input_format_, capture_event_.Get(),
368 &endpoint_capture_buffer_size_frames_);
369 } else {
370 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
372 if (FAILED(hr))
373 return false;
375 ScopedComPtr<IAudioCaptureClient> audio_capture_client =
376 CoreAudioUtil::CreateCaptureClient(audio_input_client);
377 if (!audio_capture_client)
378 return false;
380 // Varispeed mode requires additional preparations.
381 if (VarispeedMode())
382 ResetVarispeed();
384 // Store all valid COM interfaces.
385 audio_output_client_ = audio_output_client;
386 audio_render_client_ = audio_render_client;
387 audio_input_client_ = audio_input_client;
388 audio_capture_client_ = audio_capture_client;
390 opened_ = true;
391 return SUCCEEDED(hr);
394 void WASAPIUnifiedStream::Start(AudioSourceCallback* callback) {
395 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Start");
396 DVLOG(1) << "WASAPIUnifiedStream::Start()";
397 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
398 CHECK(callback);
399 CHECK(opened_);
401 if (audio_io_thread_) {
402 CHECK_EQ(callback, source_);
403 return;
406 source_ = callback;
408 if (VarispeedMode()) {
409 ResetVarispeed();
410 fifo_rate_compensation_ = 1.0;
411 average_delta_ = 0.0;
412 input_callback_received_ = false;
413 update_output_delay_ = false;
416 // Create and start the thread that will listen for capture events.
417 // We will also listen on render events on the same thread if varispeed
418 // mode is utilized.
419 audio_io_thread_.reset(
420 new base::DelegateSimpleThread(this, "wasapi_io_thread"));
421 audio_io_thread_->Start();
422 if (!audio_io_thread_->HasBeenStarted()) {
423 DLOG(ERROR) << "Failed to start WASAPI IO thread.";
424 return;
427 // Start input streaming data between the endpoint buffer and the audio
428 // engine.
429 HRESULT hr = audio_input_client_->Start();
430 if (FAILED(hr)) {
431 StopAndJoinThread(hr);
432 return;
435 // Ensure that the endpoint buffer is prepared with silence.
436 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
437 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
438 audio_output_client_, audio_render_client_)) {
439 DLOG(WARNING) << "Failed to prepare endpoint buffers with silence.";
440 return;
443 num_written_frames_ = endpoint_render_buffer_size_frames_;
445 // Start output streaming data between the endpoint buffer and the audio
446 // engine.
447 hr = audio_output_client_->Start();
448 if (FAILED(hr)) {
449 StopAndJoinThread(hr);
450 return;
454 void WASAPIUnifiedStream::Stop() {
455 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Stop");
456 DVLOG(1) << "WASAPIUnifiedStream::Stop()";
457 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
458 if (!audio_io_thread_)
459 return;
461 // Stop input audio streaming.
462 HRESULT hr = audio_input_client_->Stop();
463 if (FAILED(hr)) {
464 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
465 << "Failed to stop input streaming: " << std::hex << hr;
468 // Stop output audio streaming.
469 hr = audio_output_client_->Stop();
470 if (FAILED(hr)) {
471 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
472 << "Failed to stop output streaming: " << std::hex << hr;
475 // Wait until the thread completes and perform cleanup.
476 SetEvent(stop_streaming_event_.Get());
477 audio_io_thread_->Join();
478 audio_io_thread_.reset();
480 // Ensure that we don't quit the main thread loop immediately next
481 // time Start() is called.
482 ResetEvent(stop_streaming_event_.Get());
484 // Clear source callback, it'll be set again on the next Start() call.
485 source_ = NULL;
487 // Flush all pending data and reset the audio clock stream position to 0.
488 hr = audio_output_client_->Reset();
489 if (FAILED(hr)) {
490 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
491 << "Failed to reset output streaming: " << std::hex << hr;
494 audio_input_client_->Reset();
495 if (FAILED(hr)) {
496 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
497 << "Failed to reset input streaming: " << std::hex << hr;
500 // Extra safety check to ensure that the buffers are cleared.
501 // If the buffers are not cleared correctly, the next call to Start()
502 // would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer().
503 // TODO(henrika): this check is is only needed for shared-mode streams.
504 UINT32 num_queued_frames = 0;
505 audio_output_client_->GetCurrentPadding(&num_queued_frames);
506 DCHECK_EQ(0u, num_queued_frames);
509 void WASAPIUnifiedStream::Close() {
510 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Close");
511 DVLOG(1) << "WASAPIUnifiedStream::Close()";
512 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
514 // It is valid to call Close() before calling open or Start().
515 // It is also valid to call Close() after Start() has been called.
516 Stop();
518 // Inform the audio manager that we have been closed. This will cause our
519 // destruction.
520 manager_->ReleaseOutputStream(this);
523 void WASAPIUnifiedStream::SetVolume(double volume) {
524 DVLOG(1) << "SetVolume(volume=" << volume << ")";
525 if (volume < 0 || volume > 1)
526 return;
527 volume_ = volume;
530 void WASAPIUnifiedStream::GetVolume(double* volume) {
531 DVLOG(1) << "GetVolume()";
532 *volume = static_cast<double>(volume_);
536 void WASAPIUnifiedStream::ProvideInput(int frame_delay, AudioBus* audio_bus) {
537 // TODO(henrika): utilize frame_delay?
538 // A non-zero framed delay means multiple callbacks were necessary to
539 // fulfill the requested number of frames.
540 if (frame_delay > 0)
541 DVLOG(3) << "frame_delay: " << frame_delay;
543 #ifndef NDEBUG
544 resampler_margin_[num_elements_[RESAMPLER_MARGIN]] =
545 fifo_->frames() - audio_bus->frames();
546 num_elements_[RESAMPLER_MARGIN]++;
547 #endif
549 if (fifo_->frames() < audio_bus->frames()) {
550 DVLOG(ERROR) << "Not enough data in the FIFO ("
551 << fifo_->frames() << " < " << audio_bus->frames() << ")";
552 audio_bus->Zero();
553 return;
556 fifo_->Consume(audio_bus, 0, audio_bus->frames());
559 void WASAPIUnifiedStream::SetIOFormats(const AudioParameters& input_params,
560 const AudioParameters& output_params) {
561 for (int n = 0; n < 2; ++n) {
562 const AudioParameters& params = (n == 0) ? input_params : output_params;
563 WAVEFORMATPCMEX* xformat = (n == 0) ? &input_format_ : &output_format_;
564 WAVEFORMATEX* format = &xformat->Format;
566 // Begin with the WAVEFORMATEX structure that specifies the basic format.
567 format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
568 format->nChannels = params.channels();
569 format->nSamplesPerSec = params.sample_rate();
570 format->wBitsPerSample = params.bits_per_sample();
571 format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
572 format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign;
573 format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
575 // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE.
576 // Note that we always open up using the native channel layout.
577 (*xformat).Samples.wValidBitsPerSample = format->wBitsPerSample;
578 (*xformat).dwChannelMask = (n == 0) ?
579 GetChannelConfig(eCapture) : GetChannelConfig(eRender);
580 (*xformat).SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
583 input_buffer_size_frames_ = input_params.frames_per_buffer();
584 output_buffer_size_frames_ = output_params.frames_per_buffer();
585 VLOG(1) << "#audio frames per input buffer : " << input_buffer_size_frames_;
586 VLOG(1) << "#audio frames per output buffer: " << output_buffer_size_frames_;
588 #ifndef NDEBUG
589 input_params_[0] = input_format_.Format.nSamplesPerSec;
590 input_params_[1] = input_buffer_size_frames_;
591 output_params_[0] = output_format_.Format.nSamplesPerSec;
592 output_params_[1] = output_buffer_size_frames_;
593 #endif
596 void WASAPIUnifiedStream::DoVarispeedInitialization(
597 const AudioParameters& input_params, const AudioParameters& output_params) {
598 DVLOG(1) << "WASAPIUnifiedStream::DoVarispeedInitialization()";
600 // A FIFO is required in this mode for input to output buffering.
601 // Note that it will add some latency.
602 fifo_.reset(new AudioFifo(input_params.channels(), kFifoSize));
603 VLOG(1) << "Using FIFO of size " << fifo_->max_frames()
604 << " (#channels=" << input_params.channels() << ")";
606 // Create the multi channel resampler using the initial sample rate ratio.
607 // We will call MultiChannelResampler::SetRatio() during runtime to
608 // allow arbitrary combinations of input and output devices running off
609 // different clocks and using different drivers, with potentially
610 // differing sample-rates. Note that the requested block size is given by
611 // the native input buffer size |input_buffer_size_frames_|.
612 io_sample_rate_ratio_ = input_params.sample_rate() /
613 static_cast<double>(output_params.sample_rate());
614 DVLOG(2) << "io_sample_rate_ratio: " << io_sample_rate_ratio_;
615 resampler_.reset(new MultiChannelResampler(
616 input_params.channels(), io_sample_rate_ratio_, input_buffer_size_frames_,
617 base::Bind(&WASAPIUnifiedStream::ProvideInput, base::Unretained(this))));
618 VLOG(1) << "Resampling from " << input_params.sample_rate() << " to "
619 << output_params.sample_rate();
621 // The optimal number of frames we'd like to keep in the FIFO at all times.
622 // The actual size will vary but the goal is to ensure that the average size
623 // is given by this value.
624 target_fifo_frames_ = kTargetFifoSafetyFactor * input_buffer_size_frames_;
625 VLOG(1) << "Target FIFO size: " << target_fifo_frames_;
627 // Create the event which the audio engine will signal each time it
628 // wants an audio buffer to render.
629 render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
631 // Allocate memory for temporary audio bus used to store resampled input
632 // audio.
633 resampled_bus_ = AudioBus::Create(
634 input_params.channels(), output_buffer_size_frames_);
636 // Buffer initial silence corresponding to target I/O buffering.
637 ResetVarispeed();
640 void WASAPIUnifiedStream::ResetVarispeed() {
641 DCHECK(VarispeedMode());
643 // Buffer initial silence corresponding to target I/O buffering.
644 fifo_->Clear();
645 scoped_ptr<AudioBus> silence =
646 AudioBus::Create(input_format_.Format.nChannels,
647 target_fifo_frames_);
648 silence->Zero();
649 fifo_->Push(silence.get());
650 resampler_->Flush();
653 void WASAPIUnifiedStream::Run() {
654 ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
656 // Increase the thread priority.
657 audio_io_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);
659 // Enable MMCSS to ensure that this thread receives prioritized access to
660 // CPU resources.
661 // TODO(henrika): investigate if it is possible to include these additional
662 // settings in SetThreadPriority() as well.
663 DWORD task_index = 0;
664 HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
665 &task_index);
666 bool mmcss_is_ok =
667 (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
668 if (!mmcss_is_ok) {
669 // Failed to enable MMCSS on this thread. It is not fatal but can lead
670 // to reduced QoS at high load.
671 DWORD err = GetLastError();
672 LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
675 // The IAudioClock interface enables us to monitor a stream's data
676 // rate and the current position in the stream. Allocate it before we
677 // start spinning.
678 ScopedComPtr<IAudioClock> audio_output_clock;
679 HRESULT hr = audio_output_client_->GetService(
680 __uuidof(IAudioClock), audio_output_clock.ReceiveVoid());
681 LOG_IF(WARNING, FAILED(hr)) << "Failed to create IAudioClock: "
682 << std::hex << hr;
684 bool streaming = true;
685 bool error = false;
687 HANDLE wait_array[3];
688 size_t num_handles = 0;
689 wait_array[num_handles++] = stop_streaming_event_;
690 wait_array[num_handles++] = capture_event_;
691 if (render_event_) {
692 // One extra event handle is needed in varispeed mode.
693 wait_array[num_handles++] = render_event_;
696 // Keep streaming audio until stop event is signaled.
697 // Capture events are always used but render events are only active in
698 // varispeed mode.
699 while (streaming && !error) {
700 // Wait for a close-down event, or a new capture event.
701 DWORD wait_result = WaitForMultipleObjects(num_handles,
702 wait_array,
703 FALSE,
704 INFINITE);
705 switch (wait_result) {
706 case WAIT_OBJECT_0 + 0:
707 // |stop_streaming_event_| has been set.
708 streaming = false;
709 break;
710 case WAIT_OBJECT_0 + 1:
711 // |capture_event_| has been set
712 if (VarispeedMode()) {
713 ProcessInputAudio();
714 } else {
715 ProcessInputAudio();
716 ProcessOutputAudio(audio_output_clock);
718 break;
719 case WAIT_OBJECT_0 + 2:
720 DCHECK(VarispeedMode());
721 // |render_event_| has been set
722 ProcessOutputAudio(audio_output_clock);
723 break;
724 default:
725 error = true;
726 break;
730 if (streaming && error) {
731 // Stop audio streaming since something has gone wrong in our main thread
732 // loop. Note that, we are still in a "started" state, hence a Stop() call
733 // is required to join the thread properly.
734 audio_input_client_->Stop();
735 audio_output_client_->Stop();
736 PLOG(ERROR) << "WASAPI streaming failed.";
739 // Disable MMCSS.
740 if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
741 PLOG(WARNING) << "Failed to disable MMCSS";
745 void WASAPIUnifiedStream::ProcessInputAudio() {
746 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessInputAudio");
748 BYTE* data_ptr = NULL;
749 UINT32 num_captured_frames = 0;
750 DWORD flags = 0;
751 UINT64 device_position = 0;
752 UINT64 capture_time_stamp = 0;
754 const int bytes_per_sample = input_format_.Format.wBitsPerSample >> 3;
756 base::TimeTicks now_tick = base::TimeTicks::HighResNow();
758 #ifndef NDEBUG
759 if (VarispeedMode()) {
760 input_time_stamps_[num_elements_[INPUT_TIME_STAMP]] =
761 now_tick.ToInternalValue();
762 num_elements_[INPUT_TIME_STAMP]++;
764 #endif
766 // Retrieve the amount of data in the capture endpoint buffer.
767 // |endpoint_capture_time_stamp| is the value of the performance
768 // counter at the time that the audio endpoint device recorded
769 // the device position of the first audio frame in the data packet.
770 HRESULT hr = audio_capture_client_->GetBuffer(&data_ptr,
771 &num_captured_frames,
772 &flags,
773 &device_position,
774 &capture_time_stamp);
775 if (FAILED(hr)) {
776 DLOG(ERROR) << "Failed to get data from the capture buffer";
777 return;
780 if (hr == AUDCLNT_S_BUFFER_EMPTY) {
781 // The return coded is a success code but a new packet is *not* available
782 // and none of the output parameters in the GetBuffer() call contains valid
783 // values. Best we can do is to deliver silence and avoid setting
784 // |input_callback_received_| since this only seems to happen for the
785 // initial event(s) on some devices.
786 input_bus_->Zero();
787 } else {
788 // Valid data has been recorded and it is now OK to set the flag which
789 // informs the render side that capturing has started.
790 input_callback_received_ = true;
793 if (num_captured_frames != 0) {
794 if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
795 // Clear out the capture buffer since silence is reported.
796 input_bus_->Zero();
797 } else {
798 // Store captured data in an audio bus after de-interleaving
799 // the data to match the audio bus structure.
800 input_bus_->FromInterleaved(
801 data_ptr, num_captured_frames, bytes_per_sample);
805 hr = audio_capture_client_->ReleaseBuffer(num_captured_frames);
806 DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";
808 // Buffer input into FIFO if varispeed mode is used. The render event
809 // will drive resampling of this data to match the output side.
810 if (VarispeedMode()) {
811 int available_frames = fifo_->max_frames() - fifo_->frames();
812 if (input_bus_->frames() <= available_frames) {
813 fifo_->Push(input_bus_.get());
815 #ifndef NDEBUG
816 num_frames_in_fifo_[num_elements_[NUM_FRAMES_IN_FIFO]] =
817 fifo_->frames();
818 num_elements_[NUM_FRAMES_IN_FIFO]++;
819 #endif
822 // Save resource by not asking for new delay estimates each time.
823 // These estimates are fairly stable and it is perfectly safe to only
824 // sample at a rate of ~1Hz.
825 // TODO(henrika): we might have to increase the update rate in varispeed
826 // mode since the delay variations are higher in this mode.
827 if ((now_tick - last_delay_sample_time_).InMilliseconds() >
828 kTimeDiffInMillisecondsBetweenDelayMeasurements &&
829 input_callback_received_) {
830 // Calculate the estimated capture delay, i.e., the latency between
831 // the recording time and the time we when we are notified about
832 // the recorded data. Note that the capture time stamp is given in
833 // 100-nanosecond (0.1 microseconds) units.
834 base::TimeDelta diff =
835 now_tick - base::TimeTicks::FromInternalValue(0.1 * capture_time_stamp);
836 capture_delay_ms_ = diff.InMillisecondsF();
838 last_delay_sample_time_ = now_tick;
839 update_output_delay_ = true;
843 void WASAPIUnifiedStream::ProcessOutputAudio(IAudioClock* audio_output_clock) {
844 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessOutputAudio");
846 if (!input_callback_received_) {
847 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
848 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
849 audio_output_client_, audio_render_client_))
850 DLOG(WARNING) << "Failed to prepare endpoint buffers with silence.";
852 return;
855 // Rate adjusted resampling is required in varispeed mode. It means that
856 // recorded audio samples will be read from the FIFO, resampled to match the
857 // output sample-rate and then stored in |resampled_bus_|.
858 if (VarispeedMode()) {
859 // Calculate a varispeed rate scalar factor to compensate for drift between
860 // input and output. We use the actual number of frames still in the FIFO
861 // compared with the ideal value of |target_fifo_frames_|.
862 int delta = fifo_->frames() - target_fifo_frames_;
864 // Average |delta| because it can jitter back/forth quite frequently
865 // by +/- the hardware buffer-size *if* the input and output callbacks are
866 // happening at almost exactly the same time. Also, if the input and output
867 // sample-rates are different then |delta| will jitter quite a bit due to
868 // the rate conversion happening in the varispeed, plus the jittering of
869 // the callbacks. The average value is what's important here.
870 // We use an exponential smoothing filter to reduce the variations.
871 average_delta_ += kAlpha * (delta - average_delta_);
873 // Compute a rate compensation which always attracts us back to the
874 // |target_fifo_frames_| over a period of kCorrectionTimeSeconds.
875 double correction_time_frames =
876 kCorrectionTimeSeconds * output_format_.Format.nSamplesPerSec;
877 fifo_rate_compensation_ =
878 (correction_time_frames + average_delta_) / correction_time_frames;
880 #ifndef NDEBUG
881 fifo_rate_comps_[num_elements_[RATE_COMPENSATION]] =
882 fifo_rate_compensation_;
883 num_elements_[RATE_COMPENSATION]++;
884 #endif
886 // Adjust for FIFO drift.
887 const double new_ratio = io_sample_rate_ratio_ * fifo_rate_compensation_;
888 resampler_->SetRatio(new_ratio);
889 // Get resampled input audio from FIFO where the size is given by the
890 // output side.
891 resampler_->Resample(resampled_bus_->frames(), resampled_bus_.get());
894 // Derive a new total delay estimate if the capture side has set the
895 // |update_output_delay_| flag.
896 if (update_output_delay_) {
897 // Calculate the estimated render delay, i.e., the time difference
898 // between the time when data is added to the endpoint buffer and
899 // when the data is played out on the actual speaker.
900 const double stream_pos = CurrentStreamPosInMilliseconds(
901 num_written_frames_ + output_buffer_size_frames_,
902 output_format_.Format.nSamplesPerSec);
903 const double speaker_pos =
904 SpeakerStreamPosInMilliseconds(audio_output_clock);
905 const double render_delay_ms = stream_pos - speaker_pos;
906 const double fifo_delay_ms = VarispeedMode() ?
907 FrameCountToMilliseconds(target_fifo_frames_, input_format_) : 0;
909 // Derive the total delay, i.e., the sum of the input and output
910 // delays. Also convert the value into byte units. An extra FIFO delay
911 // is added for varispeed usage cases.
912 total_delay_ms_ = VarispeedMode() ?
913 capture_delay_ms_ + render_delay_ms + fifo_delay_ms :
914 capture_delay_ms_ + render_delay_ms;
915 DVLOG(2) << "total_delay_ms : " << total_delay_ms_;
916 DVLOG(3) << " capture_delay_ms: " << capture_delay_ms_;
917 DVLOG(3) << " render_delay_ms : " << render_delay_ms;
918 DVLOG(3) << " fifo_delay_ms : " << fifo_delay_ms;
919 total_delay_bytes_ = MillisecondsToBytes(total_delay_ms_, output_format_);
921 // Wait for new signal from the capture side.
922 update_output_delay_ = false;
925 // Select source depending on if varispeed is utilized or not.
926 // Also, the source might be the output of a channel mixer if channel mixing
927 // is required to match the native input channels to the number of input
928 // channels used by the client (given by |input_channels_| in this case).
929 AudioBus* input_bus = VarispeedMode() ?
930 resampled_bus_.get() : input_bus_.get();
931 if (channel_mixer_) {
932 DCHECK_EQ(input_bus->frames(), channel_bus_->frames());
933 // Most common case is 1->2 channel upmixing.
934 channel_mixer_->Transform(input_bus, channel_bus_.get());
935 // Use the output from the channel mixer as new input bus.
936 input_bus = channel_bus_.get();
939 // Prepare for rendering by calling OnMoreIOData().
940 int frames_filled = source_->OnMoreIOData(
941 input_bus,
942 output_bus_.get(),
943 AudioBuffersState(0, total_delay_bytes_));
944 DCHECK_EQ(frames_filled, output_bus_->frames());
946 // Keep track of number of rendered frames since we need it for
947 // our delay calculations.
948 num_written_frames_ += frames_filled;
950 // Derive the the amount of available space in the endpoint buffer.
951 // Avoid render attempt if there is no room for a captured packet.
952 UINT32 num_queued_frames = 0;
953 audio_output_client_->GetCurrentPadding(&num_queued_frames);
954 if (endpoint_render_buffer_size_frames_ - num_queued_frames <
955 output_buffer_size_frames_)
956 return;
958 // Grab all available space in the rendering endpoint buffer
959 // into which the client can write a data packet.
960 uint8* audio_data = NULL;
961 HRESULT hr = audio_render_client_->GetBuffer(output_buffer_size_frames_,
962 &audio_data);
963 if (FAILED(hr)) {
964 DLOG(ERROR) << "Failed to access render buffer";
965 return;
968 const int bytes_per_sample = output_format_.Format.wBitsPerSample >> 3;
970 // Convert the audio bus content to interleaved integer data using
971 // |audio_data| as destination.
972 output_bus_->Scale(volume_);
973 output_bus_->ToInterleaved(
974 output_buffer_size_frames_, bytes_per_sample, audio_data);
976 // Release the buffer space acquired in the GetBuffer() call.
977 audio_render_client_->ReleaseBuffer(output_buffer_size_frames_, 0);
978 DLOG_IF(ERROR, FAILED(hr)) << "Failed to release render buffer";
980 return;
983 void WASAPIUnifiedStream::HandleError(HRESULT err) {
984 CHECK((started() && GetCurrentThreadId() == audio_io_thread_->tid()) ||
985 (!started() && GetCurrentThreadId() == creating_thread_id_));
986 NOTREACHED() << "Error code: " << std::hex << err;
987 if (source_)
988 source_->OnError(this);
991 void WASAPIUnifiedStream::StopAndJoinThread(HRESULT err) {
992 CHECK(GetCurrentThreadId() == creating_thread_id_);
993 DCHECK(audio_io_thread_.get());
994 SetEvent(stop_streaming_event_.Get());
995 audio_io_thread_->Join();
996 audio_io_thread_.reset();
997 HandleError(err);
1000 } // namespace media