1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/audio/win/audio_unified_win.h"
7 #include <Functiondiscoverykeys_devpkey.h>
9 #include "base/debug/trace_event.h"
11 #include "base/file_util.h"
12 #include "base/path_service.h"
14 #include "base/time/time.h"
15 #include "base/win/scoped_com_initializer.h"
16 #include "media/audio/win/audio_manager_win.h"
17 #include "media/audio/win/avrt_wrapper_win.h"
18 #include "media/audio/win/core_audio_util_win.h"
20 using base::win::ScopedComPtr
;
21 using base::win::ScopedCOMInitializer
;
22 using base::win::ScopedCoMem
;
24 // Smoothing factor in exponential smoothing filter where 0 < alpha < 1.
25 // Larger values of alpha reduce the level of smoothing.
26 // See http://en.wikipedia.org/wiki/Exponential_smoothing for details.
27 static const double kAlpha
= 0.1;
29 // Compute a rate compensation which always attracts us back to a specified
30 // target level over a period of |kCorrectionTimeSeconds|.
31 static const double kCorrectionTimeSeconds
= 0.1;
34 // Max number of columns in the output text file |kUnifiedAudioDebugFileName|.
35 // See LogElementNames enumerator for details on what each column represents.
36 static const size_t kMaxNumSampleTypes
= 4;
38 static const size_t kMaxNumParams
= 2;
40 // Max number of rows in the output file |kUnifiedAudioDebugFileName|.
41 // Each row corresponds to one set of sample values for (approximately) the
42 // same time instant (stored in the first column).
43 static const size_t kMaxFileSamples
= 10000;
45 // Name of output debug file used for off-line analysis of measurements which
46 // can be utilized for performance tuning of this class.
47 static const char kUnifiedAudioDebugFileName
[] = "unified_win_debug.txt";
49 // Name of output debug file used for off-line analysis of measurements.
50 // This file will contain a list of audio parameters.
51 static const char kUnifiedAudioParamsFileName
[] = "unified_win_params.txt";
54 typedef uint32 ChannelConfig
;
56 // Retrieves an integer mask which corresponds to the channel layout the
57 // audio engine uses for its internal processing/mixing of shared-mode
58 // streams. This mask indicates which channels are present in the multi-
59 // channel stream. The least significant bit corresponds with the Front Left
60 // speaker, the next least significant bit corresponds to the Front Right
61 // speaker, and so on, continuing in the order defined in KsMedia.h.
62 // See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx
64 static ChannelConfig
GetChannelConfig(EDataFlow data_flow
) {
65 WAVEFORMATPCMEX format
;
66 return SUCCEEDED(media::CoreAudioUtil::GetDefaultSharedModeMixFormat(
67 data_flow
, eConsole
, &format
)) ?
68 static_cast<int>(format
.dwChannelMask
) : 0;
71 // Use the acquired IAudioClock interface to derive a time stamp of the audio
72 // sample which is currently playing through the speakers.
73 static double SpeakerStreamPosInMilliseconds(IAudioClock
* clock
) {
74 UINT64 device_frequency
= 0, position
= 0;
75 if (FAILED(clock
->GetFrequency(&device_frequency
)) ||
76 FAILED(clock
->GetPosition(&position
, NULL
))) {
79 return base::Time::kMillisecondsPerSecond
*
80 (static_cast<double>(position
) / device_frequency
);
83 // Get a time stamp in milliseconds given number of audio frames in |num_frames|
84 // using the current sample rate |fs| as scale factor.
85 // Example: |num_frames| = 960 and |fs| = 48000 => 20 [ms].
86 static double CurrentStreamPosInMilliseconds(UINT64 num_frames
, DWORD fs
) {
87 return base::Time::kMillisecondsPerSecond
*
88 (static_cast<double>(num_frames
) / fs
);
91 // Convert a timestamp in milliseconds to byte units given the audio format
93 // Example: |ts_milliseconds| equals 10, sample rate is 48000 and frame size
94 // is 4 bytes per audio frame => 480 * 4 = 1920 [bytes].
95 static int MillisecondsToBytes(double ts_milliseconds
,
96 const WAVEFORMATPCMEX
& format
) {
97 double seconds
= ts_milliseconds
/ base::Time::kMillisecondsPerSecond
;
98 return static_cast<int>(seconds
* format
.Format
.nSamplesPerSec
*
99 format
.Format
.nBlockAlign
+ 0.5);
102 // Convert frame count to milliseconds given the audio format in |format|.
103 static double FrameCountToMilliseconds(int num_frames
,
104 const WAVEFORMATPCMEX
& format
) {
105 return (base::Time::kMillisecondsPerSecond
* num_frames
) /
106 static_cast<double>(format
.Format
.nSamplesPerSec
);
111 WASAPIUnifiedStream::WASAPIUnifiedStream(AudioManagerWin
* manager
,
112 const AudioParameters
& params
,
113 const std::string
& input_device_id
)
114 : creating_thread_id_(base::PlatformThread::CurrentId()),
117 input_channels_(params
.input_channels()),
118 output_channels_(params
.channels()),
119 input_device_id_(input_device_id
),
120 share_mode_(CoreAudioUtil::GetShareMode()),
123 output_buffer_size_frames_(0),
124 input_buffer_size_frames_(0),
125 endpoint_render_buffer_size_frames_(0),
126 endpoint_capture_buffer_size_frames_(0),
127 num_written_frames_(0),
128 total_delay_ms_(0.0),
129 total_delay_bytes_(0),
131 input_callback_received_(false),
132 io_sample_rate_ratio_(1),
133 target_fifo_frames_(0),
135 fifo_rate_compensation_(1),
136 update_output_delay_(false),
137 capture_delay_ms_(0) {
138 TRACE_EVENT0("audio", "WASAPIUnifiedStream::WASAPIUnifiedStream");
139 VLOG(1) << "WASAPIUnifiedStream::WASAPIUnifiedStream()";
142 VLOG(1) << "Input channels : " << input_channels_
;
143 VLOG(1) << "Output channels: " << output_channels_
;
144 VLOG(1) << "Sample rate : " << params_
.sample_rate();
145 VLOG(1) << "Buffer size : " << params
.frames_per_buffer();
148 input_time_stamps_
.reset(new int64
[kMaxFileSamples
]);
149 num_frames_in_fifo_
.reset(new int[kMaxFileSamples
]);
150 resampler_margin_
.reset(new int[kMaxFileSamples
]);
151 fifo_rate_comps_
.reset(new double[kMaxFileSamples
]);
152 num_elements_
.reset(new int[kMaxNumSampleTypes
]);
153 std::fill(num_elements_
.get(), num_elements_
.get() + kMaxNumSampleTypes
, 0);
154 input_params_
.reset(new int[kMaxNumParams
]);
155 output_params_
.reset(new int[kMaxNumParams
]);
158 DVLOG_IF(1, share_mode_
== AUDCLNT_SHAREMODE_EXCLUSIVE
)
159 << "Core Audio (WASAPI) EXCLUSIVE MODE is enabled.";
161 // Load the Avrt DLL if not already loaded. Required to support MMCSS.
162 bool avrt_init
= avrt::Initialize();
163 DCHECK(avrt_init
) << "Failed to load the avrt.dll";
165 // All events are auto-reset events and non-signaled initially.
167 // Create the event which the audio engine will signal each time a buffer
168 // has been recorded.
169 capture_event_
.Set(CreateEvent(NULL
, FALSE
, FALSE
, NULL
));
171 // Create the event which will be set in Stop() when straeming shall stop.
172 stop_streaming_event_
.Set(CreateEvent(NULL
, FALSE
, FALSE
, NULL
));
175 WASAPIUnifiedStream::~WASAPIUnifiedStream() {
176 VLOG(1) << "WASAPIUnifiedStream::~WASAPIUnifiedStream()";
178 base::FilePath data_file_name
;
179 PathService::Get(base::DIR_EXE
, &data_file_name
);
180 data_file_name
= data_file_name
.AppendASCII(kUnifiedAudioDebugFileName
);
181 data_file_
= file_util::OpenFile(data_file_name
, "wt");
182 DVLOG(1) << ">> Output file " << data_file_name
.value() << " is created.";
185 size_t elements_to_write
= *std::min_element(
186 num_elements_
.get(), num_elements_
.get() + kMaxNumSampleTypes
);
187 while (n
< elements_to_write
) {
188 fprintf(data_file_
, "%I64d %d %d %10.9f\n",
189 input_time_stamps_
[n
],
190 num_frames_in_fifo_
[n
],
191 resampler_margin_
[n
],
192 fifo_rate_comps_
[n
]);
195 file_util::CloseFile(data_file_
);
197 base::FilePath param_file_name
;
198 PathService::Get(base::DIR_EXE
, ¶m_file_name
);
199 param_file_name
= param_file_name
.AppendASCII(kUnifiedAudioParamsFileName
);
200 param_file_
= file_util::OpenFile(param_file_name
, "wt");
201 DVLOG(1) << ">> Output file " << param_file_name
.value() << " is created.";
202 fprintf(param_file_
, "%d %d\n", input_params_
[0], input_params_
[1]);
203 fprintf(param_file_
, "%d %d\n", output_params_
[0], output_params_
[1]);
204 file_util::CloseFile(param_file_
);
208 bool WASAPIUnifiedStream::Open() {
209 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Open");
210 DVLOG(1) << "WASAPIUnifiedStream::Open()";
211 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_
);
215 AudioParameters hw_output_params
;
216 HRESULT hr
= CoreAudioUtil::GetPreferredAudioParameters(
217 eRender
, eConsole
, &hw_output_params
);
219 LOG(ERROR
) << "Failed to get preferred output audio parameters.";
223 AudioParameters hw_input_params
;
224 if (input_device_id_
== AudioManagerBase::kDefaultDeviceId
) {
225 // Query native parameters for the default capture device.
226 hr
= CoreAudioUtil::GetPreferredAudioParameters(
227 eCapture
, eConsole
, &hw_input_params
);
229 // Query native parameters for the capture device given by
230 // |input_device_id_|.
231 hr
= CoreAudioUtil::GetPreferredAudioParameters(
232 input_device_id_
, &hw_input_params
);
235 LOG(ERROR
) << "Failed to get preferred input audio parameters.";
239 // It is currently only possible to open up the output audio device using
240 // the native number of channels.
241 if (output_channels_
!= hw_output_params
.channels()) {
242 LOG(ERROR
) << "Audio device does not support requested output channels.";
246 // It is currently only possible to open up the input audio device using
247 // the native number of channels. If the client asks for a higher channel
248 // count, we will do channel upmixing in this class. The most typical
249 // example is that the client provides stereo but the hardware can only be
250 // opened in mono mode. We will do mono to stereo conversion in this case.
251 if (input_channels_
< hw_input_params
.channels()) {
252 LOG(ERROR
) << "Audio device does not support requested input channels.";
254 } else if (input_channels_
> hw_input_params
.channels()) {
255 ChannelLayout input_layout
=
256 GuessChannelLayout(hw_input_params
.channels());
257 ChannelLayout output_layout
= GuessChannelLayout(input_channels_
);
258 channel_mixer_
.reset(new ChannelMixer(input_layout
, output_layout
));
259 DVLOG(1) << "Remixing input channel layout from " << input_layout
260 << " to " << output_layout
<< "; from "
261 << hw_input_params
.channels() << " channels to "
265 if (hw_output_params
.sample_rate() != params_
.sample_rate()) {
266 LOG(ERROR
) << "Requested sample-rate: " << params_
.sample_rate()
267 << " must match the hardware sample-rate: "
268 << hw_output_params
.sample_rate();
272 if (hw_output_params
.frames_per_buffer() != params_
.frames_per_buffer()) {
273 LOG(ERROR
) << "Requested buffer size: " << params_
.frames_per_buffer()
274 << " must match the hardware buffer size: "
275 << hw_output_params
.frames_per_buffer();
279 // Set up WAVEFORMATPCMEX structures for input and output given the specified
281 SetIOFormats(hw_input_params
, params_
);
283 // Create the input and output busses.
284 input_bus_
= AudioBus::Create(
285 hw_input_params
.channels(), input_buffer_size_frames_
);
286 output_bus_
= AudioBus::Create(params_
);
288 // One extra bus is needed for the input channel mixing case.
289 if (channel_mixer_
) {
290 DCHECK_LT(hw_input_params
.channels(), input_channels_
);
291 // The size of the |channel_bus_| must be the same as the size of the
292 // output bus to ensure that the channel manager can deal with both
293 // resampled and non-resampled data as input.
294 channel_bus_
= AudioBus::Create(
295 input_channels_
, params_
.frames_per_buffer());
298 // Check if FIFO and resampling is required to match the input rate to the
299 // output rate. If so, a special thread loop, optimized for this case, will
300 // be used. This mode is also called varispeed mode.
301 // Note that we can also use this mode when input and output rates are the
302 // same but native buffer sizes differ (can happen if two different audio
303 // devices are used). For this case, the resampler uses a target ratio of
304 // 1.0 but SetRatio is called to compensate for clock-drift. The FIFO is
305 // required to compensate for the difference in buffer sizes.
306 // TODO(henrika): we could perhaps improve the performance for the second
307 // case here by only using the FIFO and avoid resampling. Not sure how much
308 // that would give and we risk not compensation for clock drift.
309 if (hw_input_params
.sample_rate() != params_
.sample_rate() ||
310 hw_input_params
.frames_per_buffer() != params_
.frames_per_buffer()) {
311 DoVarispeedInitialization(hw_input_params
, params_
);
314 // Render side (event driven only in varispeed mode):
316 ScopedComPtr
<IAudioClient
> audio_output_client
=
317 CoreAudioUtil::CreateDefaultClient(eRender
, eConsole
);
318 if (!audio_output_client
)
321 if (!CoreAudioUtil::IsFormatSupported(audio_output_client
,
327 if (share_mode_
== AUDCLNT_SHAREMODE_SHARED
) {
328 // The |render_event_| will be NULL unless varispeed mode is utilized.
329 hr
= CoreAudioUtil::SharedModeInitialize(
330 audio_output_client
, &output_format_
, render_event_
.Get(),
331 &endpoint_render_buffer_size_frames_
);
333 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
338 ScopedComPtr
<IAudioRenderClient
> audio_render_client
=
339 CoreAudioUtil::CreateRenderClient(audio_output_client
);
340 if (!audio_render_client
)
343 // Capture side (always event driven but format depends on varispeed or not):
345 ScopedComPtr
<IAudioClient
> audio_input_client
;
346 if (input_device_id_
== AudioManagerBase::kDefaultDeviceId
) {
347 audio_input_client
= CoreAudioUtil::CreateDefaultClient(eCapture
, eConsole
);
349 ScopedComPtr
<IMMDevice
> audio_input_device(
350 CoreAudioUtil::CreateDevice(input_device_id_
));
351 audio_input_client
= CoreAudioUtil::CreateClient(audio_input_device
);
353 if (!audio_input_client
)
356 if (!CoreAudioUtil::IsFormatSupported(audio_input_client
,
362 if (share_mode_
== AUDCLNT_SHAREMODE_SHARED
) {
363 // Include valid event handle for event-driven initialization.
364 // The input side is always event driven independent of if varispeed is
366 hr
= CoreAudioUtil::SharedModeInitialize(
367 audio_input_client
, &input_format_
, capture_event_
.Get(),
368 &endpoint_capture_buffer_size_frames_
);
370 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
375 ScopedComPtr
<IAudioCaptureClient
> audio_capture_client
=
376 CoreAudioUtil::CreateCaptureClient(audio_input_client
);
377 if (!audio_capture_client
)
380 // Varispeed mode requires additional preparations.
384 // Store all valid COM interfaces.
385 audio_output_client_
= audio_output_client
;
386 audio_render_client_
= audio_render_client
;
387 audio_input_client_
= audio_input_client
;
388 audio_capture_client_
= audio_capture_client
;
391 return SUCCEEDED(hr
);
394 void WASAPIUnifiedStream::Start(AudioSourceCallback
* callback
) {
395 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Start");
396 DVLOG(1) << "WASAPIUnifiedStream::Start()";
397 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_
);
401 if (audio_io_thread_
) {
402 CHECK_EQ(callback
, source_
);
408 if (VarispeedMode()) {
410 fifo_rate_compensation_
= 1.0;
411 average_delta_
= 0.0;
412 input_callback_received_
= false;
413 update_output_delay_
= false;
416 // Create and start the thread that will listen for capture events.
417 // We will also listen on render events on the same thread if varispeed
419 audio_io_thread_
.reset(
420 new base::DelegateSimpleThread(this, "wasapi_io_thread"));
421 audio_io_thread_
->Start();
422 if (!audio_io_thread_
->HasBeenStarted()) {
423 DLOG(ERROR
) << "Failed to start WASAPI IO thread.";
427 // Start input streaming data between the endpoint buffer and the audio
429 HRESULT hr
= audio_input_client_
->Start();
431 StopAndJoinThread(hr
);
435 // Ensure that the endpoint buffer is prepared with silence.
436 if (share_mode_
== AUDCLNT_SHAREMODE_SHARED
) {
437 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
438 audio_output_client_
, audio_render_client_
)) {
439 DLOG(WARNING
) << "Failed to prepare endpoint buffers with silence.";
443 num_written_frames_
= endpoint_render_buffer_size_frames_
;
445 // Start output streaming data between the endpoint buffer and the audio
447 hr
= audio_output_client_
->Start();
449 StopAndJoinThread(hr
);
454 void WASAPIUnifiedStream::Stop() {
455 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Stop");
456 DVLOG(1) << "WASAPIUnifiedStream::Stop()";
457 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_
);
458 if (!audio_io_thread_
)
461 // Stop input audio streaming.
462 HRESULT hr
= audio_input_client_
->Stop();
464 DLOG_IF(ERROR
, hr
!= AUDCLNT_E_NOT_INITIALIZED
)
465 << "Failed to stop input streaming: " << std::hex
<< hr
;
468 // Stop output audio streaming.
469 hr
= audio_output_client_
->Stop();
471 DLOG_IF(ERROR
, hr
!= AUDCLNT_E_NOT_INITIALIZED
)
472 << "Failed to stop output streaming: " << std::hex
<< hr
;
475 // Wait until the thread completes and perform cleanup.
476 SetEvent(stop_streaming_event_
.Get());
477 audio_io_thread_
->Join();
478 audio_io_thread_
.reset();
480 // Ensure that we don't quit the main thread loop immediately next
481 // time Start() is called.
482 ResetEvent(stop_streaming_event_
.Get());
484 // Clear source callback, it'll be set again on the next Start() call.
487 // Flush all pending data and reset the audio clock stream position to 0.
488 hr
= audio_output_client_
->Reset();
490 DLOG_IF(ERROR
, hr
!= AUDCLNT_E_NOT_INITIALIZED
)
491 << "Failed to reset output streaming: " << std::hex
<< hr
;
494 audio_input_client_
->Reset();
496 DLOG_IF(ERROR
, hr
!= AUDCLNT_E_NOT_INITIALIZED
)
497 << "Failed to reset input streaming: " << std::hex
<< hr
;
500 // Extra safety check to ensure that the buffers are cleared.
501 // If the buffers are not cleared correctly, the next call to Start()
502 // would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer().
503 // TODO(henrika): this check is is only needed for shared-mode streams.
504 UINT32 num_queued_frames
= 0;
505 audio_output_client_
->GetCurrentPadding(&num_queued_frames
);
506 DCHECK_EQ(0u, num_queued_frames
);
509 void WASAPIUnifiedStream::Close() {
510 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Close");
511 DVLOG(1) << "WASAPIUnifiedStream::Close()";
512 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_
);
514 // It is valid to call Close() before calling open or Start().
515 // It is also valid to call Close() after Start() has been called.
518 // Inform the audio manager that we have been closed. This will cause our
520 manager_
->ReleaseOutputStream(this);
523 void WASAPIUnifiedStream::SetVolume(double volume
) {
524 DVLOG(1) << "SetVolume(volume=" << volume
<< ")";
525 if (volume
< 0 || volume
> 1)
530 void WASAPIUnifiedStream::GetVolume(double* volume
) {
531 DVLOG(1) << "GetVolume()";
532 *volume
= static_cast<double>(volume_
);
536 void WASAPIUnifiedStream::ProvideInput(int frame_delay
, AudioBus
* audio_bus
) {
537 // TODO(henrika): utilize frame_delay?
538 // A non-zero framed delay means multiple callbacks were necessary to
539 // fulfill the requested number of frames.
541 DVLOG(3) << "frame_delay: " << frame_delay
;
544 resampler_margin_
[num_elements_
[RESAMPLER_MARGIN
]] =
545 fifo_
->frames() - audio_bus
->frames();
546 num_elements_
[RESAMPLER_MARGIN
]++;
549 if (fifo_
->frames() < audio_bus
->frames()) {
550 DVLOG(ERROR
) << "Not enough data in the FIFO ("
551 << fifo_
->frames() << " < " << audio_bus
->frames() << ")";
556 fifo_
->Consume(audio_bus
, 0, audio_bus
->frames());
559 void WASAPIUnifiedStream::SetIOFormats(const AudioParameters
& input_params
,
560 const AudioParameters
& output_params
) {
561 for (int n
= 0; n
< 2; ++n
) {
562 const AudioParameters
& params
= (n
== 0) ? input_params
: output_params
;
563 WAVEFORMATPCMEX
* xformat
= (n
== 0) ? &input_format_
: &output_format_
;
564 WAVEFORMATEX
* format
= &xformat
->Format
;
566 // Begin with the WAVEFORMATEX structure that specifies the basic format.
567 format
->wFormatTag
= WAVE_FORMAT_EXTENSIBLE
;
568 format
->nChannels
= params
.channels();
569 format
->nSamplesPerSec
= params
.sample_rate();
570 format
->wBitsPerSample
= params
.bits_per_sample();
571 format
->nBlockAlign
= (format
->wBitsPerSample
/ 8) * format
->nChannels
;
572 format
->nAvgBytesPerSec
= format
->nSamplesPerSec
* format
->nBlockAlign
;
573 format
->cbSize
= sizeof(WAVEFORMATEXTENSIBLE
) - sizeof(WAVEFORMATEX
);
575 // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE.
576 // Note that we always open up using the native channel layout.
577 (*xformat
).Samples
.wValidBitsPerSample
= format
->wBitsPerSample
;
578 (*xformat
).dwChannelMask
= (n
== 0) ?
579 GetChannelConfig(eCapture
) : GetChannelConfig(eRender
);
580 (*xformat
).SubFormat
= KSDATAFORMAT_SUBTYPE_PCM
;
583 input_buffer_size_frames_
= input_params
.frames_per_buffer();
584 output_buffer_size_frames_
= output_params
.frames_per_buffer();
585 VLOG(1) << "#audio frames per input buffer : " << input_buffer_size_frames_
;
586 VLOG(1) << "#audio frames per output buffer: " << output_buffer_size_frames_
;
589 input_params_
[0] = input_format_
.Format
.nSamplesPerSec
;
590 input_params_
[1] = input_buffer_size_frames_
;
591 output_params_
[0] = output_format_
.Format
.nSamplesPerSec
;
592 output_params_
[1] = output_buffer_size_frames_
;
596 void WASAPIUnifiedStream::DoVarispeedInitialization(
597 const AudioParameters
& input_params
, const AudioParameters
& output_params
) {
598 DVLOG(1) << "WASAPIUnifiedStream::DoVarispeedInitialization()";
600 // A FIFO is required in this mode for input to output buffering.
601 // Note that it will add some latency.
602 fifo_
.reset(new AudioFifo(input_params
.channels(), kFifoSize
));
603 VLOG(1) << "Using FIFO of size " << fifo_
->max_frames()
604 << " (#channels=" << input_params
.channels() << ")";
606 // Create the multi channel resampler using the initial sample rate ratio.
607 // We will call MultiChannelResampler::SetRatio() during runtime to
608 // allow arbitrary combinations of input and output devices running off
609 // different clocks and using different drivers, with potentially
610 // differing sample-rates. Note that the requested block size is given by
611 // the native input buffer size |input_buffer_size_frames_|.
612 io_sample_rate_ratio_
= input_params
.sample_rate() /
613 static_cast<double>(output_params
.sample_rate());
614 DVLOG(2) << "io_sample_rate_ratio: " << io_sample_rate_ratio_
;
615 resampler_
.reset(new MultiChannelResampler(
616 input_params
.channels(), io_sample_rate_ratio_
, input_buffer_size_frames_
,
617 base::Bind(&WASAPIUnifiedStream::ProvideInput
, base::Unretained(this))));
618 VLOG(1) << "Resampling from " << input_params
.sample_rate() << " to "
619 << output_params
.sample_rate();
621 // The optimal number of frames we'd like to keep in the FIFO at all times.
622 // The actual size will vary but the goal is to ensure that the average size
623 // is given by this value.
624 target_fifo_frames_
= kTargetFifoSafetyFactor
* input_buffer_size_frames_
;
625 VLOG(1) << "Target FIFO size: " << target_fifo_frames_
;
627 // Create the event which the audio engine will signal each time it
628 // wants an audio buffer to render.
629 render_event_
.Set(CreateEvent(NULL
, FALSE
, FALSE
, NULL
));
631 // Allocate memory for temporary audio bus used to store resampled input
633 resampled_bus_
= AudioBus::Create(
634 input_params
.channels(), output_buffer_size_frames_
);
636 // Buffer initial silence corresponding to target I/O buffering.
640 void WASAPIUnifiedStream::ResetVarispeed() {
641 DCHECK(VarispeedMode());
643 // Buffer initial silence corresponding to target I/O buffering.
645 scoped_ptr
<AudioBus
> silence
=
646 AudioBus::Create(input_format_
.Format
.nChannels
,
647 target_fifo_frames_
);
649 fifo_
->Push(silence
.get());
653 void WASAPIUnifiedStream::Run() {
654 ScopedCOMInitializer
com_init(ScopedCOMInitializer::kMTA
);
656 // Increase the thread priority.
657 audio_io_thread_
->SetThreadPriority(base::kThreadPriority_RealtimeAudio
);
659 // Enable MMCSS to ensure that this thread receives prioritized access to
661 // TODO(henrika): investigate if it is possible to include these additional
662 // settings in SetThreadPriority() as well.
663 DWORD task_index
= 0;
664 HANDLE mm_task
= avrt::AvSetMmThreadCharacteristics(L
"Pro Audio",
667 (mm_task
&& avrt::AvSetMmThreadPriority(mm_task
, AVRT_PRIORITY_CRITICAL
));
669 // Failed to enable MMCSS on this thread. It is not fatal but can lead
670 // to reduced QoS at high load.
671 DWORD err
= GetLastError();
672 LOG(WARNING
) << "Failed to enable MMCSS (error code=" << err
<< ").";
675 // The IAudioClock interface enables us to monitor a stream's data
676 // rate and the current position in the stream. Allocate it before we
678 ScopedComPtr
<IAudioClock
> audio_output_clock
;
679 HRESULT hr
= audio_output_client_
->GetService(
680 __uuidof(IAudioClock
), audio_output_clock
.ReceiveVoid());
681 LOG_IF(WARNING
, FAILED(hr
)) << "Failed to create IAudioClock: "
684 bool streaming
= true;
687 HANDLE wait_array
[3];
688 size_t num_handles
= 0;
689 wait_array
[num_handles
++] = stop_streaming_event_
;
690 wait_array
[num_handles
++] = capture_event_
;
692 // One extra event handle is needed in varispeed mode.
693 wait_array
[num_handles
++] = render_event_
;
696 // Keep streaming audio until stop event is signaled.
697 // Capture events are always used but render events are only active in
699 while (streaming
&& !error
) {
700 // Wait for a close-down event, or a new capture event.
701 DWORD wait_result
= WaitForMultipleObjects(num_handles
,
705 switch (wait_result
) {
706 case WAIT_OBJECT_0
+ 0:
707 // |stop_streaming_event_| has been set.
710 case WAIT_OBJECT_0
+ 1:
711 // |capture_event_| has been set
712 if (VarispeedMode()) {
716 ProcessOutputAudio(audio_output_clock
);
719 case WAIT_OBJECT_0
+ 2:
720 DCHECK(VarispeedMode());
721 // |render_event_| has been set
722 ProcessOutputAudio(audio_output_clock
);
730 if (streaming
&& error
) {
731 // Stop audio streaming since something has gone wrong in our main thread
732 // loop. Note that, we are still in a "started" state, hence a Stop() call
733 // is required to join the thread properly.
734 audio_input_client_
->Stop();
735 audio_output_client_
->Stop();
736 PLOG(ERROR
) << "WASAPI streaming failed.";
740 if (mm_task
&& !avrt::AvRevertMmThreadCharacteristics(mm_task
)) {
741 PLOG(WARNING
) << "Failed to disable MMCSS";
745 void WASAPIUnifiedStream::ProcessInputAudio() {
746 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessInputAudio");
748 BYTE
* data_ptr
= NULL
;
749 UINT32 num_captured_frames
= 0;
751 UINT64 device_position
= 0;
752 UINT64 capture_time_stamp
= 0;
754 const int bytes_per_sample
= input_format_
.Format
.wBitsPerSample
>> 3;
756 base::TimeTicks now_tick
= base::TimeTicks::HighResNow();
759 if (VarispeedMode()) {
760 input_time_stamps_
[num_elements_
[INPUT_TIME_STAMP
]] =
761 now_tick
.ToInternalValue();
762 num_elements_
[INPUT_TIME_STAMP
]++;
766 // Retrieve the amount of data in the capture endpoint buffer.
767 // |endpoint_capture_time_stamp| is the value of the performance
768 // counter at the time that the audio endpoint device recorded
769 // the device position of the first audio frame in the data packet.
770 HRESULT hr
= audio_capture_client_
->GetBuffer(&data_ptr
,
771 &num_captured_frames
,
774 &capture_time_stamp
);
776 DLOG(ERROR
) << "Failed to get data from the capture buffer";
780 if (hr
== AUDCLNT_S_BUFFER_EMPTY
) {
781 // The return coded is a success code but a new packet is *not* available
782 // and none of the output parameters in the GetBuffer() call contains valid
783 // values. Best we can do is to deliver silence and avoid setting
784 // |input_callback_received_| since this only seems to happen for the
785 // initial event(s) on some devices.
788 // Valid data has been recorded and it is now OK to set the flag which
789 // informs the render side that capturing has started.
790 input_callback_received_
= true;
793 if (num_captured_frames
!= 0) {
794 if (flags
& AUDCLNT_BUFFERFLAGS_SILENT
) {
795 // Clear out the capture buffer since silence is reported.
798 // Store captured data in an audio bus after de-interleaving
799 // the data to match the audio bus structure.
800 input_bus_
->FromInterleaved(
801 data_ptr
, num_captured_frames
, bytes_per_sample
);
805 hr
= audio_capture_client_
->ReleaseBuffer(num_captured_frames
);
806 DLOG_IF(ERROR
, FAILED(hr
)) << "Failed to release capture buffer";
808 // Buffer input into FIFO if varispeed mode is used. The render event
809 // will drive resampling of this data to match the output side.
810 if (VarispeedMode()) {
811 int available_frames
= fifo_
->max_frames() - fifo_
->frames();
812 if (input_bus_
->frames() <= available_frames
) {
813 fifo_
->Push(input_bus_
.get());
816 num_frames_in_fifo_
[num_elements_
[NUM_FRAMES_IN_FIFO
]] =
818 num_elements_
[NUM_FRAMES_IN_FIFO
]++;
822 // Save resource by not asking for new delay estimates each time.
823 // These estimates are fairly stable and it is perfectly safe to only
824 // sample at a rate of ~1Hz.
825 // TODO(henrika): we might have to increase the update rate in varispeed
826 // mode since the delay variations are higher in this mode.
827 if ((now_tick
- last_delay_sample_time_
).InMilliseconds() >
828 kTimeDiffInMillisecondsBetweenDelayMeasurements
&&
829 input_callback_received_
) {
830 // Calculate the estimated capture delay, i.e., the latency between
831 // the recording time and the time we when we are notified about
832 // the recorded data. Note that the capture time stamp is given in
833 // 100-nanosecond (0.1 microseconds) units.
834 base::TimeDelta diff
=
835 now_tick
- base::TimeTicks::FromInternalValue(0.1 * capture_time_stamp
);
836 capture_delay_ms_
= diff
.InMillisecondsF();
838 last_delay_sample_time_
= now_tick
;
839 update_output_delay_
= true;
843 void WASAPIUnifiedStream::ProcessOutputAudio(IAudioClock
* audio_output_clock
) {
844 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessOutputAudio");
846 if (!input_callback_received_
) {
847 if (share_mode_
== AUDCLNT_SHAREMODE_SHARED
) {
848 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
849 audio_output_client_
, audio_render_client_
))
850 DLOG(WARNING
) << "Failed to prepare endpoint buffers with silence.";
855 // Rate adjusted resampling is required in varispeed mode. It means that
856 // recorded audio samples will be read from the FIFO, resampled to match the
857 // output sample-rate and then stored in |resampled_bus_|.
858 if (VarispeedMode()) {
859 // Calculate a varispeed rate scalar factor to compensate for drift between
860 // input and output. We use the actual number of frames still in the FIFO
861 // compared with the ideal value of |target_fifo_frames_|.
862 int delta
= fifo_
->frames() - target_fifo_frames_
;
864 // Average |delta| because it can jitter back/forth quite frequently
865 // by +/- the hardware buffer-size *if* the input and output callbacks are
866 // happening at almost exactly the same time. Also, if the input and output
867 // sample-rates are different then |delta| will jitter quite a bit due to
868 // the rate conversion happening in the varispeed, plus the jittering of
869 // the callbacks. The average value is what's important here.
870 // We use an exponential smoothing filter to reduce the variations.
871 average_delta_
+= kAlpha
* (delta
- average_delta_
);
873 // Compute a rate compensation which always attracts us back to the
874 // |target_fifo_frames_| over a period of kCorrectionTimeSeconds.
875 double correction_time_frames
=
876 kCorrectionTimeSeconds
* output_format_
.Format
.nSamplesPerSec
;
877 fifo_rate_compensation_
=
878 (correction_time_frames
+ average_delta_
) / correction_time_frames
;
881 fifo_rate_comps_
[num_elements_
[RATE_COMPENSATION
]] =
882 fifo_rate_compensation_
;
883 num_elements_
[RATE_COMPENSATION
]++;
886 // Adjust for FIFO drift.
887 const double new_ratio
= io_sample_rate_ratio_
* fifo_rate_compensation_
;
888 resampler_
->SetRatio(new_ratio
);
889 // Get resampled input audio from FIFO where the size is given by the
891 resampler_
->Resample(resampled_bus_
->frames(), resampled_bus_
.get());
894 // Derive a new total delay estimate if the capture side has set the
895 // |update_output_delay_| flag.
896 if (update_output_delay_
) {
897 // Calculate the estimated render delay, i.e., the time difference
898 // between the time when data is added to the endpoint buffer and
899 // when the data is played out on the actual speaker.
900 const double stream_pos
= CurrentStreamPosInMilliseconds(
901 num_written_frames_
+ output_buffer_size_frames_
,
902 output_format_
.Format
.nSamplesPerSec
);
903 const double speaker_pos
=
904 SpeakerStreamPosInMilliseconds(audio_output_clock
);
905 const double render_delay_ms
= stream_pos
- speaker_pos
;
906 const double fifo_delay_ms
= VarispeedMode() ?
907 FrameCountToMilliseconds(target_fifo_frames_
, input_format_
) : 0;
909 // Derive the total delay, i.e., the sum of the input and output
910 // delays. Also convert the value into byte units. An extra FIFO delay
911 // is added for varispeed usage cases.
912 total_delay_ms_
= VarispeedMode() ?
913 capture_delay_ms_
+ render_delay_ms
+ fifo_delay_ms
:
914 capture_delay_ms_
+ render_delay_ms
;
915 DVLOG(2) << "total_delay_ms : " << total_delay_ms_
;
916 DVLOG(3) << " capture_delay_ms: " << capture_delay_ms_
;
917 DVLOG(3) << " render_delay_ms : " << render_delay_ms
;
918 DVLOG(3) << " fifo_delay_ms : " << fifo_delay_ms
;
919 total_delay_bytes_
= MillisecondsToBytes(total_delay_ms_
, output_format_
);
921 // Wait for new signal from the capture side.
922 update_output_delay_
= false;
925 // Select source depending on if varispeed is utilized or not.
926 // Also, the source might be the output of a channel mixer if channel mixing
927 // is required to match the native input channels to the number of input
928 // channels used by the client (given by |input_channels_| in this case).
929 AudioBus
* input_bus
= VarispeedMode() ?
930 resampled_bus_
.get() : input_bus_
.get();
931 if (channel_mixer_
) {
932 DCHECK_EQ(input_bus
->frames(), channel_bus_
->frames());
933 // Most common case is 1->2 channel upmixing.
934 channel_mixer_
->Transform(input_bus
, channel_bus_
.get());
935 // Use the output from the channel mixer as new input bus.
936 input_bus
= channel_bus_
.get();
939 // Prepare for rendering by calling OnMoreIOData().
940 int frames_filled
= source_
->OnMoreIOData(
943 AudioBuffersState(0, total_delay_bytes_
));
944 DCHECK_EQ(frames_filled
, output_bus_
->frames());
946 // Keep track of number of rendered frames since we need it for
947 // our delay calculations.
948 num_written_frames_
+= frames_filled
;
950 // Derive the the amount of available space in the endpoint buffer.
951 // Avoid render attempt if there is no room for a captured packet.
952 UINT32 num_queued_frames
= 0;
953 audio_output_client_
->GetCurrentPadding(&num_queued_frames
);
954 if (endpoint_render_buffer_size_frames_
- num_queued_frames
<
955 output_buffer_size_frames_
)
958 // Grab all available space in the rendering endpoint buffer
959 // into which the client can write a data packet.
960 uint8
* audio_data
= NULL
;
961 HRESULT hr
= audio_render_client_
->GetBuffer(output_buffer_size_frames_
,
964 DLOG(ERROR
) << "Failed to access render buffer";
968 const int bytes_per_sample
= output_format_
.Format
.wBitsPerSample
>> 3;
970 // Convert the audio bus content to interleaved integer data using
971 // |audio_data| as destination.
972 output_bus_
->Scale(volume_
);
973 output_bus_
->ToInterleaved(
974 output_buffer_size_frames_
, bytes_per_sample
, audio_data
);
976 // Release the buffer space acquired in the GetBuffer() call.
977 audio_render_client_
->ReleaseBuffer(output_buffer_size_frames_
, 0);
978 DLOG_IF(ERROR
, FAILED(hr
)) << "Failed to release render buffer";
983 void WASAPIUnifiedStream::HandleError(HRESULT err
) {
984 CHECK((started() && GetCurrentThreadId() == audio_io_thread_
->tid()) ||
985 (!started() && GetCurrentThreadId() == creating_thread_id_
));
986 NOTREACHED() << "Error code: " << std::hex
<< err
;
988 source_
->OnError(this);
991 void WASAPIUnifiedStream::StopAndJoinThread(HRESULT err
) {
992 CHECK(GetCurrentThreadId() == creating_thread_id_
);
993 DCHECK(audio_io_thread_
.get());
994 SetEvent(stop_streaming_event_
.Get());
995 audio_io_thread_
->Join();
996 audio_io_thread_
.reset();
1000 } // namespace media