1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/audio/win/audio_unified_win.h"
7 #include <Functiondiscoverykeys_devpkey.h>
9 #include "base/debug/trace_event.h"
11 #include "base/file_util.h"
12 #include "base/path_service.h"
14 #include "base/time/time.h"
15 #include "base/win/scoped_com_initializer.h"
16 #include "media/audio/win/audio_manager_win.h"
17 #include "media/audio/win/avrt_wrapper_win.h"
18 #include "media/audio/win/core_audio_util_win.h"
20 using base::win::ScopedComPtr
;
21 using base::win::ScopedCOMInitializer
;
22 using base::win::ScopedCoMem
;
24 // Smoothing factor in exponential smoothing filter where 0 < alpha < 1.
25 // Larger values of alpha reduce the level of smoothing.
26 // See http://en.wikipedia.org/wiki/Exponential_smoothing for details.
27 static const double kAlpha
= 0.1;
29 // Compute a rate compensation which always attracts us back to a specified
30 // target level over a period of |kCorrectionTimeSeconds|.
31 static const double kCorrectionTimeSeconds
= 0.1;
34 // Max number of columns in the output text file |kUnifiedAudioDebugFileName|.
35 // See LogElementNames enumerator for details on what each column represents.
36 static const size_t kMaxNumSampleTypes
= 4;
38 static const size_t kMaxNumParams
= 2;
40 // Max number of rows in the output file |kUnifiedAudioDebugFileName|.
41 // Each row corresponds to one set of sample values for (approximately) the
42 // same time instant (stored in the first column).
43 static const size_t kMaxFileSamples
= 10000;
45 // Name of output debug file used for off-line analysis of measurements which
46 // can be utilized for performance tuning of this class.
47 static const char kUnifiedAudioDebugFileName
[] = "unified_win_debug.txt";
49 // Name of output debug file used for off-line analysis of measurements.
50 // This file will contain a list of audio parameters.
51 static const char kUnifiedAudioParamsFileName
[] = "unified_win_params.txt";
54 // Use the acquired IAudioClock interface to derive a time stamp of the audio
55 // sample which is currently playing through the speakers.
56 static double SpeakerStreamPosInMilliseconds(IAudioClock
* clock
) {
57 UINT64 device_frequency
= 0, position
= 0;
58 if (FAILED(clock
->GetFrequency(&device_frequency
)) ||
59 FAILED(clock
->GetPosition(&position
, NULL
))) {
62 return base::Time::kMillisecondsPerSecond
*
63 (static_cast<double>(position
) / device_frequency
);
66 // Get a time stamp in milliseconds given number of audio frames in |num_frames|
67 // using the current sample rate |fs| as scale factor.
68 // Example: |num_frames| = 960 and |fs| = 48000 => 20 [ms].
69 static double CurrentStreamPosInMilliseconds(UINT64 num_frames
, DWORD fs
) {
70 return base::Time::kMillisecondsPerSecond
*
71 (static_cast<double>(num_frames
) / fs
);
74 // Convert a timestamp in milliseconds to byte units given the audio format
76 // Example: |ts_milliseconds| equals 10, sample rate is 48000 and frame size
77 // is 4 bytes per audio frame => 480 * 4 = 1920 [bytes].
78 static int MillisecondsToBytes(double ts_milliseconds
,
79 const WAVEFORMATPCMEX
& format
) {
80 double seconds
= ts_milliseconds
/ base::Time::kMillisecondsPerSecond
;
81 return static_cast<int>(seconds
* format
.Format
.nSamplesPerSec
*
82 format
.Format
.nBlockAlign
+ 0.5);
85 // Convert frame count to milliseconds given the audio format in |format|.
86 static double FrameCountToMilliseconds(int num_frames
,
87 const WAVEFORMATPCMEX
& format
) {
88 return (base::Time::kMillisecondsPerSecond
* num_frames
) /
89 static_cast<double>(format
.Format
.nSamplesPerSec
);
94 WASAPIUnifiedStream::WASAPIUnifiedStream(AudioManagerWin
* manager
,
95 const AudioParameters
& params
,
96 const std::string
& input_device_id
)
97 : creating_thread_id_(base::PlatformThread::CurrentId()),
100 input_channels_(params
.input_channels()),
101 output_channels_(params
.channels()),
102 input_device_id_(input_device_id
),
103 share_mode_(CoreAudioUtil::GetShareMode()),
106 output_buffer_size_frames_(0),
107 input_buffer_size_frames_(0),
108 endpoint_render_buffer_size_frames_(0),
109 endpoint_capture_buffer_size_frames_(0),
110 num_written_frames_(0),
111 total_delay_ms_(0.0),
112 total_delay_bytes_(0),
114 input_callback_received_(false),
115 io_sample_rate_ratio_(1),
116 target_fifo_frames_(0),
118 fifo_rate_compensation_(1),
119 update_output_delay_(false),
120 capture_delay_ms_(0) {
121 TRACE_EVENT0("audio", "WASAPIUnifiedStream::WASAPIUnifiedStream");
122 VLOG(1) << "WASAPIUnifiedStream::WASAPIUnifiedStream()";
125 VLOG(1) << "Input channels : " << input_channels_
;
126 VLOG(1) << "Output channels: " << output_channels_
;
127 VLOG(1) << "Sample rate : " << params_
.sample_rate();
128 VLOG(1) << "Buffer size : " << params
.frames_per_buffer();
131 input_time_stamps_
.reset(new int64
[kMaxFileSamples
]);
132 num_frames_in_fifo_
.reset(new int[kMaxFileSamples
]);
133 resampler_margin_
.reset(new int[kMaxFileSamples
]);
134 fifo_rate_comps_
.reset(new double[kMaxFileSamples
]);
135 num_elements_
.reset(new int[kMaxNumSampleTypes
]);
136 std::fill(num_elements_
.get(), num_elements_
.get() + kMaxNumSampleTypes
, 0);
137 input_params_
.reset(new int[kMaxNumParams
]);
138 output_params_
.reset(new int[kMaxNumParams
]);
141 DVLOG_IF(1, share_mode_
== AUDCLNT_SHAREMODE_EXCLUSIVE
)
142 << "Core Audio (WASAPI) EXCLUSIVE MODE is enabled.";
144 // Load the Avrt DLL if not already loaded. Required to support MMCSS.
145 bool avrt_init
= avrt::Initialize();
146 DCHECK(avrt_init
) << "Failed to load the avrt.dll";
148 // All events are auto-reset events and non-signaled initially.
150 // Create the event which the audio engine will signal each time a buffer
151 // has been recorded.
152 capture_event_
.Set(CreateEvent(NULL
, FALSE
, FALSE
, NULL
));
154 // Create the event which will be set in Stop() when straeming shall stop.
155 stop_streaming_event_
.Set(CreateEvent(NULL
, FALSE
, FALSE
, NULL
));
158 WASAPIUnifiedStream::~WASAPIUnifiedStream() {
159 VLOG(1) << "WASAPIUnifiedStream::~WASAPIUnifiedStream()";
161 base::FilePath data_file_name
;
162 PathService::Get(base::DIR_EXE
, &data_file_name
);
163 data_file_name
= data_file_name
.AppendASCII(kUnifiedAudioDebugFileName
);
164 data_file_
= file_util::OpenFile(data_file_name
, "wt");
165 DVLOG(1) << ">> Output file " << data_file_name
.value() << " is created.";
168 size_t elements_to_write
= *std::min_element(
169 num_elements_
.get(), num_elements_
.get() + kMaxNumSampleTypes
);
170 while (n
< elements_to_write
) {
171 fprintf(data_file_
, "%I64d %d %d %10.9f\n",
172 input_time_stamps_
[n
],
173 num_frames_in_fifo_
[n
],
174 resampler_margin_
[n
],
175 fifo_rate_comps_
[n
]);
178 file_util::CloseFile(data_file_
);
180 base::FilePath param_file_name
;
181 PathService::Get(base::DIR_EXE
, ¶m_file_name
);
182 param_file_name
= param_file_name
.AppendASCII(kUnifiedAudioParamsFileName
);
183 param_file_
= file_util::OpenFile(param_file_name
, "wt");
184 DVLOG(1) << ">> Output file " << param_file_name
.value() << " is created.";
185 fprintf(param_file_
, "%d %d\n", input_params_
[0], input_params_
[1]);
186 fprintf(param_file_
, "%d %d\n", output_params_
[0], output_params_
[1]);
187 file_util::CloseFile(param_file_
);
191 bool WASAPIUnifiedStream::Open() {
192 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Open");
193 DVLOG(1) << "WASAPIUnifiedStream::Open()";
194 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_
);
198 AudioParameters hw_output_params
;
199 HRESULT hr
= CoreAudioUtil::GetPreferredAudioParameters(
200 eRender
, eConsole
, &hw_output_params
);
202 LOG(ERROR
) << "Failed to get preferred output audio parameters.";
206 AudioParameters hw_input_params
;
207 if (input_device_id_
== AudioManagerBase::kDefaultDeviceId
) {
208 // Query native parameters for the default capture device.
209 hr
= CoreAudioUtil::GetPreferredAudioParameters(
210 eCapture
, eConsole
, &hw_input_params
);
212 // Query native parameters for the capture device given by
213 // |input_device_id_|.
214 hr
= CoreAudioUtil::GetPreferredAudioParameters(
215 input_device_id_
, &hw_input_params
);
218 LOG(ERROR
) << "Failed to get preferred input audio parameters.";
222 // It is currently only possible to open up the output audio device using
223 // the native number of channels.
224 if (output_channels_
!= hw_output_params
.channels()) {
225 LOG(ERROR
) << "Audio device does not support requested output channels.";
229 // It is currently only possible to open up the input audio device using
230 // the native number of channels. If the client asks for a higher channel
231 // count, we will do channel upmixing in this class. The most typical
232 // example is that the client provides stereo but the hardware can only be
233 // opened in mono mode. We will do mono to stereo conversion in this case.
234 if (input_channels_
< hw_input_params
.channels()) {
235 LOG(ERROR
) << "Audio device does not support requested input channels.";
237 } else if (input_channels_
> hw_input_params
.channels()) {
238 ChannelLayout input_layout
=
239 GuessChannelLayout(hw_input_params
.channels());
240 ChannelLayout output_layout
= GuessChannelLayout(input_channels_
);
241 channel_mixer_
.reset(new ChannelMixer(input_layout
, output_layout
));
242 DVLOG(1) << "Remixing input channel layout from " << input_layout
243 << " to " << output_layout
<< "; from "
244 << hw_input_params
.channels() << " channels to "
248 if (hw_output_params
.sample_rate() != params_
.sample_rate()) {
249 LOG(ERROR
) << "Requested sample-rate: " << params_
.sample_rate()
250 << " must match the hardware sample-rate: "
251 << hw_output_params
.sample_rate();
255 if (hw_output_params
.frames_per_buffer() != params_
.frames_per_buffer()) {
256 LOG(ERROR
) << "Requested buffer size: " << params_
.frames_per_buffer()
257 << " must match the hardware buffer size: "
258 << hw_output_params
.frames_per_buffer();
262 // Set up WAVEFORMATPCMEX structures for input and output given the specified
264 SetIOFormats(hw_input_params
, params_
);
266 // Create the input and output busses.
267 input_bus_
= AudioBus::Create(
268 hw_input_params
.channels(), input_buffer_size_frames_
);
269 output_bus_
= AudioBus::Create(params_
);
271 // One extra bus is needed for the input channel mixing case.
272 if (channel_mixer_
) {
273 DCHECK_LT(hw_input_params
.channels(), input_channels_
);
274 // The size of the |channel_bus_| must be the same as the size of the
275 // output bus to ensure that the channel manager can deal with both
276 // resampled and non-resampled data as input.
277 channel_bus_
= AudioBus::Create(
278 input_channels_
, params_
.frames_per_buffer());
281 // Check if FIFO and resampling is required to match the input rate to the
282 // output rate. If so, a special thread loop, optimized for this case, will
283 // be used. This mode is also called varispeed mode.
284 // Note that we can also use this mode when input and output rates are the
285 // same but native buffer sizes differ (can happen if two different audio
286 // devices are used). For this case, the resampler uses a target ratio of
287 // 1.0 but SetRatio is called to compensate for clock-drift. The FIFO is
288 // required to compensate for the difference in buffer sizes.
289 // TODO(henrika): we could perhaps improve the performance for the second
290 // case here by only using the FIFO and avoid resampling. Not sure how much
291 // that would give and we risk not compensation for clock drift.
292 if (hw_input_params
.sample_rate() != params_
.sample_rate() ||
293 hw_input_params
.frames_per_buffer() != params_
.frames_per_buffer()) {
294 DoVarispeedInitialization(hw_input_params
, params_
);
297 // Render side (event driven only in varispeed mode):
299 ScopedComPtr
<IAudioClient
> audio_output_client
=
300 CoreAudioUtil::CreateDefaultClient(eRender
, eConsole
);
301 if (!audio_output_client
)
304 if (!CoreAudioUtil::IsFormatSupported(audio_output_client
,
310 if (share_mode_
== AUDCLNT_SHAREMODE_SHARED
) {
311 // The |render_event_| will be NULL unless varispeed mode is utilized.
312 hr
= CoreAudioUtil::SharedModeInitialize(
313 audio_output_client
, &output_format_
, render_event_
.Get(),
314 &endpoint_render_buffer_size_frames_
);
316 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
321 ScopedComPtr
<IAudioRenderClient
> audio_render_client
=
322 CoreAudioUtil::CreateRenderClient(audio_output_client
);
323 if (!audio_render_client
)
326 // Capture side (always event driven but format depends on varispeed or not):
328 ScopedComPtr
<IAudioClient
> audio_input_client
;
329 if (input_device_id_
== AudioManagerBase::kDefaultDeviceId
) {
330 audio_input_client
= CoreAudioUtil::CreateDefaultClient(eCapture
, eConsole
);
332 ScopedComPtr
<IMMDevice
> audio_input_device(
333 CoreAudioUtil::CreateDevice(input_device_id_
));
334 audio_input_client
= CoreAudioUtil::CreateClient(audio_input_device
);
336 if (!audio_input_client
)
339 if (!CoreAudioUtil::IsFormatSupported(audio_input_client
,
345 if (share_mode_
== AUDCLNT_SHAREMODE_SHARED
) {
346 // Include valid event handle for event-driven initialization.
347 // The input side is always event driven independent of if varispeed is
349 hr
= CoreAudioUtil::SharedModeInitialize(
350 audio_input_client
, &input_format_
, capture_event_
.Get(),
351 &endpoint_capture_buffer_size_frames_
);
353 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
358 ScopedComPtr
<IAudioCaptureClient
> audio_capture_client
=
359 CoreAudioUtil::CreateCaptureClient(audio_input_client
);
360 if (!audio_capture_client
)
363 // Varispeed mode requires additional preparations.
367 // Store all valid COM interfaces.
368 audio_output_client_
= audio_output_client
;
369 audio_render_client_
= audio_render_client
;
370 audio_input_client_
= audio_input_client
;
371 audio_capture_client_
= audio_capture_client
;
374 return SUCCEEDED(hr
);
377 void WASAPIUnifiedStream::Start(AudioSourceCallback
* callback
) {
378 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Start");
379 DVLOG(1) << "WASAPIUnifiedStream::Start()";
380 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_
);
384 if (audio_io_thread_
) {
385 CHECK_EQ(callback
, source_
);
391 if (VarispeedMode()) {
393 fifo_rate_compensation_
= 1.0;
394 average_delta_
= 0.0;
395 input_callback_received_
= false;
396 update_output_delay_
= false;
399 // Create and start the thread that will listen for capture events.
400 // We will also listen on render events on the same thread if varispeed
402 audio_io_thread_
.reset(
403 new base::DelegateSimpleThread(this, "wasapi_io_thread"));
404 audio_io_thread_
->Start();
405 if (!audio_io_thread_
->HasBeenStarted()) {
406 DLOG(ERROR
) << "Failed to start WASAPI IO thread.";
410 // Start input streaming data between the endpoint buffer and the audio
412 HRESULT hr
= audio_input_client_
->Start();
414 StopAndJoinThread(hr
);
418 // Ensure that the endpoint buffer is prepared with silence.
419 if (share_mode_
== AUDCLNT_SHAREMODE_SHARED
) {
420 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
421 audio_output_client_
, audio_render_client_
)) {
422 DLOG(WARNING
) << "Failed to prepare endpoint buffers with silence.";
426 num_written_frames_
= endpoint_render_buffer_size_frames_
;
428 // Start output streaming data between the endpoint buffer and the audio
430 hr
= audio_output_client_
->Start();
432 StopAndJoinThread(hr
);
437 void WASAPIUnifiedStream::Stop() {
438 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Stop");
439 DVLOG(1) << "WASAPIUnifiedStream::Stop()";
440 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_
);
441 if (!audio_io_thread_
)
444 // Stop input audio streaming.
445 HRESULT hr
= audio_input_client_
->Stop();
447 DLOG_IF(ERROR
, hr
!= AUDCLNT_E_NOT_INITIALIZED
)
448 << "Failed to stop input streaming: " << std::hex
<< hr
;
451 // Stop output audio streaming.
452 hr
= audio_output_client_
->Stop();
454 DLOG_IF(ERROR
, hr
!= AUDCLNT_E_NOT_INITIALIZED
)
455 << "Failed to stop output streaming: " << std::hex
<< hr
;
458 // Wait until the thread completes and perform cleanup.
459 SetEvent(stop_streaming_event_
.Get());
460 audio_io_thread_
->Join();
461 audio_io_thread_
.reset();
463 // Ensure that we don't quit the main thread loop immediately next
464 // time Start() is called.
465 ResetEvent(stop_streaming_event_
.Get());
467 // Clear source callback, it'll be set again on the next Start() call.
470 // Flush all pending data and reset the audio clock stream position to 0.
471 hr
= audio_output_client_
->Reset();
473 DLOG_IF(ERROR
, hr
!= AUDCLNT_E_NOT_INITIALIZED
)
474 << "Failed to reset output streaming: " << std::hex
<< hr
;
477 audio_input_client_
->Reset();
479 DLOG_IF(ERROR
, hr
!= AUDCLNT_E_NOT_INITIALIZED
)
480 << "Failed to reset input streaming: " << std::hex
<< hr
;
483 // Extra safety check to ensure that the buffers are cleared.
484 // If the buffers are not cleared correctly, the next call to Start()
485 // would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer().
486 // TODO(henrika): this check is is only needed for shared-mode streams.
487 UINT32 num_queued_frames
= 0;
488 audio_output_client_
->GetCurrentPadding(&num_queued_frames
);
489 DCHECK_EQ(0u, num_queued_frames
);
492 void WASAPIUnifiedStream::Close() {
493 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Close");
494 DVLOG(1) << "WASAPIUnifiedStream::Close()";
495 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_
);
497 // It is valid to call Close() before calling open or Start().
498 // It is also valid to call Close() after Start() has been called.
501 // Inform the audio manager that we have been closed. This will cause our
503 manager_
->ReleaseOutputStream(this);
506 void WASAPIUnifiedStream::SetVolume(double volume
) {
507 DVLOG(1) << "SetVolume(volume=" << volume
<< ")";
508 if (volume
< 0 || volume
> 1)
513 void WASAPIUnifiedStream::GetVolume(double* volume
) {
514 DVLOG(1) << "GetVolume()";
515 *volume
= static_cast<double>(volume_
);
519 void WASAPIUnifiedStream::ProvideInput(int frame_delay
, AudioBus
* audio_bus
) {
520 // TODO(henrika): utilize frame_delay?
521 // A non-zero framed delay means multiple callbacks were necessary to
522 // fulfill the requested number of frames.
524 DVLOG(3) << "frame_delay: " << frame_delay
;
527 resampler_margin_
[num_elements_
[RESAMPLER_MARGIN
]] =
528 fifo_
->frames() - audio_bus
->frames();
529 num_elements_
[RESAMPLER_MARGIN
]++;
532 if (fifo_
->frames() < audio_bus
->frames()) {
533 DVLOG(ERROR
) << "Not enough data in the FIFO ("
534 << fifo_
->frames() << " < " << audio_bus
->frames() << ")";
539 fifo_
->Consume(audio_bus
, 0, audio_bus
->frames());
542 void WASAPIUnifiedStream::SetIOFormats(const AudioParameters
& input_params
,
543 const AudioParameters
& output_params
) {
544 for (int n
= 0; n
< 2; ++n
) {
545 const AudioParameters
& params
= (n
== 0) ? input_params
: output_params
;
546 WAVEFORMATPCMEX
* xformat
= (n
== 0) ? &input_format_
: &output_format_
;
547 WAVEFORMATEX
* format
= &xformat
->Format
;
549 // Begin with the WAVEFORMATEX structure that specifies the basic format.
550 format
->wFormatTag
= WAVE_FORMAT_EXTENSIBLE
;
551 format
->nChannels
= params
.channels();
552 format
->nSamplesPerSec
= params
.sample_rate();
553 format
->wBitsPerSample
= params
.bits_per_sample();
554 format
->nBlockAlign
= (format
->wBitsPerSample
/ 8) * format
->nChannels
;
555 format
->nAvgBytesPerSec
= format
->nSamplesPerSec
* format
->nBlockAlign
;
556 format
->cbSize
= sizeof(WAVEFORMATEXTENSIBLE
) - sizeof(WAVEFORMATEX
);
558 // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE.
559 // Note that we always open up using the native channel layout.
560 (*xformat
).Samples
.wValidBitsPerSample
= format
->wBitsPerSample
;
561 (*xformat
).dwChannelMask
=
562 CoreAudioUtil::GetChannelConfig(
563 std::string(), n
== 0 ? eCapture
: eRender
);
564 (*xformat
).SubFormat
= KSDATAFORMAT_SUBTYPE_PCM
;
567 input_buffer_size_frames_
= input_params
.frames_per_buffer();
568 output_buffer_size_frames_
= output_params
.frames_per_buffer();
569 VLOG(1) << "#audio frames per input buffer : " << input_buffer_size_frames_
;
570 VLOG(1) << "#audio frames per output buffer: " << output_buffer_size_frames_
;
573 input_params_
[0] = input_format_
.Format
.nSamplesPerSec
;
574 input_params_
[1] = input_buffer_size_frames_
;
575 output_params_
[0] = output_format_
.Format
.nSamplesPerSec
;
576 output_params_
[1] = output_buffer_size_frames_
;
580 void WASAPIUnifiedStream::DoVarispeedInitialization(
581 const AudioParameters
& input_params
, const AudioParameters
& output_params
) {
582 DVLOG(1) << "WASAPIUnifiedStream::DoVarispeedInitialization()";
584 // A FIFO is required in this mode for input to output buffering.
585 // Note that it will add some latency.
586 fifo_
.reset(new AudioFifo(input_params
.channels(), kFifoSize
));
587 VLOG(1) << "Using FIFO of size " << fifo_
->max_frames()
588 << " (#channels=" << input_params
.channels() << ")";
590 // Create the multi channel resampler using the initial sample rate ratio.
591 // We will call MultiChannelResampler::SetRatio() during runtime to
592 // allow arbitrary combinations of input and output devices running off
593 // different clocks and using different drivers, with potentially
594 // differing sample-rates. Note that the requested block size is given by
595 // the native input buffer size |input_buffer_size_frames_|.
596 io_sample_rate_ratio_
= input_params
.sample_rate() /
597 static_cast<double>(output_params
.sample_rate());
598 DVLOG(2) << "io_sample_rate_ratio: " << io_sample_rate_ratio_
;
599 resampler_
.reset(new MultiChannelResampler(
600 input_params
.channels(), io_sample_rate_ratio_
, input_buffer_size_frames_
,
601 base::Bind(&WASAPIUnifiedStream::ProvideInput
, base::Unretained(this))));
602 VLOG(1) << "Resampling from " << input_params
.sample_rate() << " to "
603 << output_params
.sample_rate();
605 // The optimal number of frames we'd like to keep in the FIFO at all times.
606 // The actual size will vary but the goal is to ensure that the average size
607 // is given by this value.
608 target_fifo_frames_
= kTargetFifoSafetyFactor
* input_buffer_size_frames_
;
609 VLOG(1) << "Target FIFO size: " << target_fifo_frames_
;
611 // Create the event which the audio engine will signal each time it
612 // wants an audio buffer to render.
613 render_event_
.Set(CreateEvent(NULL
, FALSE
, FALSE
, NULL
));
615 // Allocate memory for temporary audio bus used to store resampled input
617 resampled_bus_
= AudioBus::Create(
618 input_params
.channels(), output_buffer_size_frames_
);
620 // Buffer initial silence corresponding to target I/O buffering.
624 void WASAPIUnifiedStream::ResetVarispeed() {
625 DCHECK(VarispeedMode());
627 // Buffer initial silence corresponding to target I/O buffering.
629 scoped_ptr
<AudioBus
> silence
=
630 AudioBus::Create(input_format_
.Format
.nChannels
,
631 target_fifo_frames_
);
633 fifo_
->Push(silence
.get());
637 void WASAPIUnifiedStream::Run() {
638 ScopedCOMInitializer
com_init(ScopedCOMInitializer::kMTA
);
640 // Increase the thread priority.
641 audio_io_thread_
->SetThreadPriority(base::kThreadPriority_RealtimeAudio
);
643 // Enable MMCSS to ensure that this thread receives prioritized access to
645 // TODO(henrika): investigate if it is possible to include these additional
646 // settings in SetThreadPriority() as well.
647 DWORD task_index
= 0;
648 HANDLE mm_task
= avrt::AvSetMmThreadCharacteristics(L
"Pro Audio",
651 (mm_task
&& avrt::AvSetMmThreadPriority(mm_task
, AVRT_PRIORITY_CRITICAL
));
653 // Failed to enable MMCSS on this thread. It is not fatal but can lead
654 // to reduced QoS at high load.
655 DWORD err
= GetLastError();
656 LOG(WARNING
) << "Failed to enable MMCSS (error code=" << err
<< ").";
659 // The IAudioClock interface enables us to monitor a stream's data
660 // rate and the current position in the stream. Allocate it before we
662 ScopedComPtr
<IAudioClock
> audio_output_clock
;
663 HRESULT hr
= audio_output_client_
->GetService(
664 __uuidof(IAudioClock
), audio_output_clock
.ReceiveVoid());
665 LOG_IF(WARNING
, FAILED(hr
)) << "Failed to create IAudioClock: "
668 bool streaming
= true;
671 HANDLE wait_array
[3];
672 size_t num_handles
= 0;
673 wait_array
[num_handles
++] = stop_streaming_event_
;
674 wait_array
[num_handles
++] = capture_event_
;
676 // One extra event handle is needed in varispeed mode.
677 wait_array
[num_handles
++] = render_event_
;
680 // Keep streaming audio until stop event is signaled.
681 // Capture events are always used but render events are only active in
683 while (streaming
&& !error
) {
684 // Wait for a close-down event, or a new capture event.
685 DWORD wait_result
= WaitForMultipleObjects(num_handles
,
689 switch (wait_result
) {
690 case WAIT_OBJECT_0
+ 0:
691 // |stop_streaming_event_| has been set.
694 case WAIT_OBJECT_0
+ 1:
695 // |capture_event_| has been set
696 if (VarispeedMode()) {
700 ProcessOutputAudio(audio_output_clock
);
703 case WAIT_OBJECT_0
+ 2:
704 DCHECK(VarispeedMode());
705 // |render_event_| has been set
706 ProcessOutputAudio(audio_output_clock
);
714 if (streaming
&& error
) {
715 // Stop audio streaming since something has gone wrong in our main thread
716 // loop. Note that, we are still in a "started" state, hence a Stop() call
717 // is required to join the thread properly.
718 audio_input_client_
->Stop();
719 audio_output_client_
->Stop();
720 PLOG(ERROR
) << "WASAPI streaming failed.";
724 if (mm_task
&& !avrt::AvRevertMmThreadCharacteristics(mm_task
)) {
725 PLOG(WARNING
) << "Failed to disable MMCSS";
729 void WASAPIUnifiedStream::ProcessInputAudio() {
730 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessInputAudio");
732 BYTE
* data_ptr
= NULL
;
733 UINT32 num_captured_frames
= 0;
735 UINT64 device_position
= 0;
736 UINT64 capture_time_stamp
= 0;
738 const int bytes_per_sample
= input_format_
.Format
.wBitsPerSample
>> 3;
740 base::TimeTicks now_tick
= base::TimeTicks::HighResNow();
743 if (VarispeedMode()) {
744 input_time_stamps_
[num_elements_
[INPUT_TIME_STAMP
]] =
745 now_tick
.ToInternalValue();
746 num_elements_
[INPUT_TIME_STAMP
]++;
750 // Retrieve the amount of data in the capture endpoint buffer.
751 // |endpoint_capture_time_stamp| is the value of the performance
752 // counter at the time that the audio endpoint device recorded
753 // the device position of the first audio frame in the data packet.
754 HRESULT hr
= audio_capture_client_
->GetBuffer(&data_ptr
,
755 &num_captured_frames
,
758 &capture_time_stamp
);
760 DLOG(ERROR
) << "Failed to get data from the capture buffer";
764 if (hr
== AUDCLNT_S_BUFFER_EMPTY
) {
765 // The return coded is a success code but a new packet is *not* available
766 // and none of the output parameters in the GetBuffer() call contains valid
767 // values. Best we can do is to deliver silence and avoid setting
768 // |input_callback_received_| since this only seems to happen for the
769 // initial event(s) on some devices.
772 // Valid data has been recorded and it is now OK to set the flag which
773 // informs the render side that capturing has started.
774 input_callback_received_
= true;
777 if (num_captured_frames
!= 0) {
778 if (flags
& AUDCLNT_BUFFERFLAGS_SILENT
) {
779 // Clear out the capture buffer since silence is reported.
782 // Store captured data in an audio bus after de-interleaving
783 // the data to match the audio bus structure.
784 input_bus_
->FromInterleaved(
785 data_ptr
, num_captured_frames
, bytes_per_sample
);
789 hr
= audio_capture_client_
->ReleaseBuffer(num_captured_frames
);
790 DLOG_IF(ERROR
, FAILED(hr
)) << "Failed to release capture buffer";
792 // Buffer input into FIFO if varispeed mode is used. The render event
793 // will drive resampling of this data to match the output side.
794 if (VarispeedMode()) {
795 int available_frames
= fifo_
->max_frames() - fifo_
->frames();
796 if (input_bus_
->frames() <= available_frames
) {
797 fifo_
->Push(input_bus_
.get());
800 num_frames_in_fifo_
[num_elements_
[NUM_FRAMES_IN_FIFO
]] =
802 num_elements_
[NUM_FRAMES_IN_FIFO
]++;
806 // Save resource by not asking for new delay estimates each time.
807 // These estimates are fairly stable and it is perfectly safe to only
808 // sample at a rate of ~1Hz.
809 // TODO(henrika): we might have to increase the update rate in varispeed
810 // mode since the delay variations are higher in this mode.
811 if ((now_tick
- last_delay_sample_time_
).InMilliseconds() >
812 kTimeDiffInMillisecondsBetweenDelayMeasurements
&&
813 input_callback_received_
) {
814 // Calculate the estimated capture delay, i.e., the latency between
815 // the recording time and the time we when we are notified about
816 // the recorded data. Note that the capture time stamp is given in
817 // 100-nanosecond (0.1 microseconds) units.
818 base::TimeDelta diff
=
819 now_tick
- base::TimeTicks::FromInternalValue(0.1 * capture_time_stamp
);
820 capture_delay_ms_
= diff
.InMillisecondsF();
822 last_delay_sample_time_
= now_tick
;
823 update_output_delay_
= true;
827 void WASAPIUnifiedStream::ProcessOutputAudio(IAudioClock
* audio_output_clock
) {
828 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessOutputAudio");
830 if (!input_callback_received_
) {
831 if (share_mode_
== AUDCLNT_SHAREMODE_SHARED
) {
832 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
833 audio_output_client_
, audio_render_client_
))
834 DLOG(WARNING
) << "Failed to prepare endpoint buffers with silence.";
839 // Rate adjusted resampling is required in varispeed mode. It means that
840 // recorded audio samples will be read from the FIFO, resampled to match the
841 // output sample-rate and then stored in |resampled_bus_|.
842 if (VarispeedMode()) {
843 // Calculate a varispeed rate scalar factor to compensate for drift between
844 // input and output. We use the actual number of frames still in the FIFO
845 // compared with the ideal value of |target_fifo_frames_|.
846 int delta
= fifo_
->frames() - target_fifo_frames_
;
848 // Average |delta| because it can jitter back/forth quite frequently
849 // by +/- the hardware buffer-size *if* the input and output callbacks are
850 // happening at almost exactly the same time. Also, if the input and output
851 // sample-rates are different then |delta| will jitter quite a bit due to
852 // the rate conversion happening in the varispeed, plus the jittering of
853 // the callbacks. The average value is what's important here.
854 // We use an exponential smoothing filter to reduce the variations.
855 average_delta_
+= kAlpha
* (delta
- average_delta_
);
857 // Compute a rate compensation which always attracts us back to the
858 // |target_fifo_frames_| over a period of kCorrectionTimeSeconds.
859 double correction_time_frames
=
860 kCorrectionTimeSeconds
* output_format_
.Format
.nSamplesPerSec
;
861 fifo_rate_compensation_
=
862 (correction_time_frames
+ average_delta_
) / correction_time_frames
;
865 fifo_rate_comps_
[num_elements_
[RATE_COMPENSATION
]] =
866 fifo_rate_compensation_
;
867 num_elements_
[RATE_COMPENSATION
]++;
870 // Adjust for FIFO drift.
871 const double new_ratio
= io_sample_rate_ratio_
* fifo_rate_compensation_
;
872 resampler_
->SetRatio(new_ratio
);
873 // Get resampled input audio from FIFO where the size is given by the
875 resampler_
->Resample(resampled_bus_
->frames(), resampled_bus_
.get());
878 // Derive a new total delay estimate if the capture side has set the
879 // |update_output_delay_| flag.
880 if (update_output_delay_
) {
881 // Calculate the estimated render delay, i.e., the time difference
882 // between the time when data is added to the endpoint buffer and
883 // when the data is played out on the actual speaker.
884 const double stream_pos
= CurrentStreamPosInMilliseconds(
885 num_written_frames_
+ output_buffer_size_frames_
,
886 output_format_
.Format
.nSamplesPerSec
);
887 const double speaker_pos
=
888 SpeakerStreamPosInMilliseconds(audio_output_clock
);
889 const double render_delay_ms
= stream_pos
- speaker_pos
;
890 const double fifo_delay_ms
= VarispeedMode() ?
891 FrameCountToMilliseconds(target_fifo_frames_
, input_format_
) : 0;
893 // Derive the total delay, i.e., the sum of the input and output
894 // delays. Also convert the value into byte units. An extra FIFO delay
895 // is added for varispeed usage cases.
896 total_delay_ms_
= VarispeedMode() ?
897 capture_delay_ms_
+ render_delay_ms
+ fifo_delay_ms
:
898 capture_delay_ms_
+ render_delay_ms
;
899 DVLOG(2) << "total_delay_ms : " << total_delay_ms_
;
900 DVLOG(3) << " capture_delay_ms: " << capture_delay_ms_
;
901 DVLOG(3) << " render_delay_ms : " << render_delay_ms
;
902 DVLOG(3) << " fifo_delay_ms : " << fifo_delay_ms
;
903 total_delay_bytes_
= MillisecondsToBytes(total_delay_ms_
, output_format_
);
905 // Wait for new signal from the capture side.
906 update_output_delay_
= false;
909 // Select source depending on if varispeed is utilized or not.
910 // Also, the source might be the output of a channel mixer if channel mixing
911 // is required to match the native input channels to the number of input
912 // channels used by the client (given by |input_channels_| in this case).
913 AudioBus
* input_bus
= VarispeedMode() ?
914 resampled_bus_
.get() : input_bus_
.get();
915 if (channel_mixer_
) {
916 DCHECK_EQ(input_bus
->frames(), channel_bus_
->frames());
917 // Most common case is 1->2 channel upmixing.
918 channel_mixer_
->Transform(input_bus
, channel_bus_
.get());
919 // Use the output from the channel mixer as new input bus.
920 input_bus
= channel_bus_
.get();
923 // Prepare for rendering by calling OnMoreIOData().
924 int frames_filled
= source_
->OnMoreIOData(
927 AudioBuffersState(0, total_delay_bytes_
));
928 DCHECK_EQ(frames_filled
, output_bus_
->frames());
930 // Keep track of number of rendered frames since we need it for
931 // our delay calculations.
932 num_written_frames_
+= frames_filled
;
934 // Derive the the amount of available space in the endpoint buffer.
935 // Avoid render attempt if there is no room for a captured packet.
936 UINT32 num_queued_frames
= 0;
937 audio_output_client_
->GetCurrentPadding(&num_queued_frames
);
938 if (endpoint_render_buffer_size_frames_
- num_queued_frames
<
939 output_buffer_size_frames_
)
942 // Grab all available space in the rendering endpoint buffer
943 // into which the client can write a data packet.
944 uint8
* audio_data
= NULL
;
945 HRESULT hr
= audio_render_client_
->GetBuffer(output_buffer_size_frames_
,
948 DLOG(ERROR
) << "Failed to access render buffer";
952 const int bytes_per_sample
= output_format_
.Format
.wBitsPerSample
>> 3;
954 // Convert the audio bus content to interleaved integer data using
955 // |audio_data| as destination.
956 output_bus_
->Scale(volume_
);
957 output_bus_
->ToInterleaved(
958 output_buffer_size_frames_
, bytes_per_sample
, audio_data
);
960 // Release the buffer space acquired in the GetBuffer() call.
961 audio_render_client_
->ReleaseBuffer(output_buffer_size_frames_
, 0);
962 DLOG_IF(ERROR
, FAILED(hr
)) << "Failed to release render buffer";
967 void WASAPIUnifiedStream::HandleError(HRESULT err
) {
968 CHECK((started() && GetCurrentThreadId() == audio_io_thread_
->tid()) ||
969 (!started() && GetCurrentThreadId() == creating_thread_id_
));
970 NOTREACHED() << "Error code: " << std::hex
<< err
;
972 source_
->OnError(this);
975 void WASAPIUnifiedStream::StopAndJoinThread(HRESULT err
) {
976 CHECK(GetCurrentThreadId() == creating_thread_id_
);
977 DCHECK(audio_io_thread_
.get());
978 SetEvent(stop_streaming_event_
.Get());
979 audio_io_thread_
->Join();
980 audio_io_thread_
.reset();