1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/audio/win/audio_low_latency_input_win.h"
7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h"
9 #include "base/utf_string_conversions.h"
10 #include "media/audio/audio_util.h"
11 #include "media/audio/win/audio_manager_win.h"
12 #include "media/audio/win/avrt_wrapper_win.h"
14 using base::win::ScopedComPtr
;
15 using base::win::ScopedCOMInitializer
;
19 WASAPIAudioInputStream::WASAPIAudioInputStream(
20 AudioManagerWin
* manager
, const AudioParameters
& params
,
21 const std::string
& device_id
)
23 capture_thread_(NULL
),
26 endpoint_buffer_size_frames_(0),
27 device_id_(device_id
),
31 // Load the Avrt DLL if not already loaded. Required to support MMCSS.
32 bool avrt_init
= avrt::Initialize();
33 DCHECK(avrt_init
) << "Failed to load the Avrt.dll";
35 // Set up the desired capture format specified by the client.
36 format_
.nSamplesPerSec
= params
.sample_rate();
37 format_
.wFormatTag
= WAVE_FORMAT_PCM
;
38 format_
.wBitsPerSample
= params
.bits_per_sample();
39 format_
.nChannels
= params
.channels();
40 format_
.nBlockAlign
= (format_
.wBitsPerSample
/ 8) * format_
.nChannels
;
41 format_
.nAvgBytesPerSec
= format_
.nSamplesPerSec
* format_
.nBlockAlign
;
44 // Size in bytes of each audio frame.
45 frame_size_
= format_
.nBlockAlign
;
46 // Store size of audio packets which we expect to get from the audio
47 // endpoint device in each capture event.
48 packet_size_frames_
= params
.GetBytesPerBuffer() / format_
.nBlockAlign
;
49 packet_size_bytes_
= params
.GetBytesPerBuffer();
50 DVLOG(1) << "Number of bytes per audio frame : " << frame_size_
;
51 DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_
;
53 // All events are auto-reset events and non-signaled initially.
55 // Create the event which the audio engine will signal each time
56 // a buffer becomes ready to be processed by the client.
57 audio_samples_ready_event_
.Set(CreateEvent(NULL
, FALSE
, FALSE
, NULL
));
58 DCHECK(audio_samples_ready_event_
.IsValid());
60 // Create the event which will be set in Stop() when capturing shall stop.
61 stop_capture_event_
.Set(CreateEvent(NULL
, FALSE
, FALSE
, NULL
));
62 DCHECK(stop_capture_event_
.IsValid());
64 ms_to_frame_count_
= static_cast<double>(params
.sample_rate()) / 1000.0;
66 LARGE_INTEGER performance_frequency
;
67 if (QueryPerformanceFrequency(&performance_frequency
)) {
68 perf_count_to_100ns_units_
=
69 (10000000.0 / static_cast<double>(performance_frequency
.QuadPart
));
71 LOG(ERROR
) << "High-resolution performance counters are not supported.";
72 perf_count_to_100ns_units_
= 0.0;
76 WASAPIAudioInputStream::~WASAPIAudioInputStream() {}
78 bool WASAPIAudioInputStream::Open() {
79 DCHECK(CalledOnValidThread());
80 // Verify that we are not already opened.
84 // Obtain a reference to the IMMDevice interface of the capturing
85 // device with the specified unique identifier or role which was
86 // set at construction.
87 HRESULT hr
= SetCaptureDevice();
91 // Obtain an IAudioClient interface which enables us to create and initialize
92 // an audio stream between an audio application and the audio engine.
93 hr
= ActivateCaptureDevice();
97 // Retrieve the stream format which the audio engine uses for its internal
98 // processing/mixing of shared-mode streams. This function call is for
99 // diagnostic purposes only and only in debug mode.
101 hr
= GetAudioEngineStreamFormat();
104 // Verify that the selected audio endpoint supports the specified format
105 // set during construction.
106 if (!DesiredFormatIsSupported()) {
110 // Initialize the audio stream between the client and the device using
111 // shared mode and a lowest possible glitch-free latency.
112 hr
= InitializeAudioEngine();
114 opened_
= SUCCEEDED(hr
);
118 void WASAPIAudioInputStream::Start(AudioInputCallback
* callback
) {
119 DCHECK(CalledOnValidThread());
121 DLOG_IF(ERROR
, !opened_
) << "Open() has not been called successfully";
130 // Create and start the thread that will drive the capturing by waiting for
133 new base::DelegateSimpleThread(this, "wasapi_capture_thread");
134 capture_thread_
->Start();
136 // Start streaming data between the endpoint buffer and the audio engine.
137 HRESULT hr
= audio_client_
->Start();
138 DLOG_IF(ERROR
, FAILED(hr
)) << "Failed to start input streaming.";
140 started_
= SUCCEEDED(hr
);
143 void WASAPIAudioInputStream::Stop() {
144 DCHECK(CalledOnValidThread());
145 DVLOG(1) << "WASAPIAudioInputStream::Stop()";
149 // Shut down the capture thread.
150 if (stop_capture_event_
.IsValid()) {
151 SetEvent(stop_capture_event_
.Get());
154 // Stop the input audio streaming.
155 HRESULT hr
= audio_client_
->Stop();
157 LOG(ERROR
) << "Failed to stop input streaming.";
160 // Wait until the thread completes and perform cleanup.
161 if (capture_thread_
) {
162 SetEvent(stop_capture_event_
.Get());
163 capture_thread_
->Join();
164 capture_thread_
= NULL
;
170 void WASAPIAudioInputStream::Close() {
171 DVLOG(1) << "WASAPIAudioInputStream::Close()";
172 // It is valid to call Close() before calling open or Start().
173 // It is also valid to call Close() after Start() has been called.
176 sink_
->OnClose(this);
180 // Inform the audio manager that we have been closed. This will cause our
182 manager_
->ReleaseInputStream(this);
185 double WASAPIAudioInputStream::GetMaxVolume() {
186 // Verify that Open() has been called succesfully, to ensure that an audio
187 // session exists and that an ISimpleAudioVolume interface has been created.
188 DLOG_IF(ERROR
, !opened_
) << "Open() has not been called successfully";
192 // The effective volume value is always in the range 0.0 to 1.0, hence
193 // we can return a fixed value (=1.0) here.
197 void WASAPIAudioInputStream::SetVolume(double volume
) {
198 DVLOG(1) << "SetVolume(volume=" << volume
<< ")";
199 DCHECK(CalledOnValidThread());
200 DCHECK_GE(volume
, 0.0);
201 DCHECK_LE(volume
, 1.0);
203 DLOG_IF(ERROR
, !opened_
) << "Open() has not been called successfully";
207 // Set a new master volume level. Valid volume levels are in the range
208 // 0.0 to 1.0. Ignore volume-change events.
209 HRESULT hr
= simple_audio_volume_
->SetMasterVolume(static_cast<float>(volume
),
211 DLOG_IF(WARNING
, FAILED(hr
)) << "Failed to set new input master volume.";
213 // Update the AGC volume level based on the last setting above. Note that,
214 // the volume-level resolution is not infinite and it is therefore not
215 // possible to assume that the volume provided as input parameter can be
216 // used directly. Instead, a new query to the audio hardware is required.
217 // This method does nothing if AGC is disabled.
221 double WASAPIAudioInputStream::GetVolume() {
222 DLOG_IF(ERROR
, !opened_
) << "Open() has not been called successfully";
226 // Retrieve the current volume level. The value is in the range 0.0 to 1.0.
228 HRESULT hr
= simple_audio_volume_
->GetMasterVolume(&level
);
229 DLOG_IF(WARNING
, FAILED(hr
)) << "Failed to get input master volume.";
231 return static_cast<double>(level
);
235 int WASAPIAudioInputStream::HardwareSampleRate(
236 const std::string
& device_id
) {
237 base::win::ScopedCoMem
<WAVEFORMATEX
> audio_engine_mix_format
;
238 HRESULT hr
= GetMixFormat(device_id
, &audio_engine_mix_format
);
242 return static_cast<int>(audio_engine_mix_format
->nSamplesPerSec
);
246 uint32
WASAPIAudioInputStream::HardwareChannelCount(
247 const std::string
& device_id
) {
248 base::win::ScopedCoMem
<WAVEFORMATEX
> audio_engine_mix_format
;
249 HRESULT hr
= GetMixFormat(device_id
, &audio_engine_mix_format
);
253 return static_cast<uint32
>(audio_engine_mix_format
->nChannels
);
257 HRESULT
WASAPIAudioInputStream::GetMixFormat(const std::string
& device_id
,
258 WAVEFORMATEX
** device_format
) {
259 // It is assumed that this static method is called from a COM thread, i.e.,
260 // CoInitializeEx() is not called here to avoid STA/MTA conflicts.
261 ScopedComPtr
<IMMDeviceEnumerator
> enumerator
;
262 HRESULT hr
= CoCreateInstance(__uuidof(MMDeviceEnumerator
),
264 CLSCTX_INPROC_SERVER
,
265 __uuidof(IMMDeviceEnumerator
),
266 enumerator
.ReceiveVoid());
270 ScopedComPtr
<IMMDevice
> endpoint_device
;
271 if (device_id
== AudioManagerBase::kDefaultDeviceId
) {
272 // Retrieve the default capture audio endpoint.
273 hr
= enumerator
->GetDefaultAudioEndpoint(eCapture
, eConsole
,
274 endpoint_device
.Receive());
276 // Retrieve a capture endpoint device that is specified by an endpoint
277 // device-identification string.
278 hr
= enumerator
->GetDevice(UTF8ToUTF16(device_id
).c_str(),
279 endpoint_device
.Receive());
284 ScopedComPtr
<IAudioClient
> audio_client
;
285 hr
= endpoint_device
->Activate(__uuidof(IAudioClient
),
286 CLSCTX_INPROC_SERVER
,
288 audio_client
.ReceiveVoid());
289 return SUCCEEDED(hr
) ? audio_client
->GetMixFormat(device_format
) : hr
;
292 void WASAPIAudioInputStream::Run() {
293 ScopedCOMInitializer
com_init(ScopedCOMInitializer::kMTA
);
295 // Increase the thread priority.
296 capture_thread_
->SetThreadPriority(base::kThreadPriority_RealtimeAudio
);
298 // Enable MMCSS to ensure that this thread receives prioritized access to
300 DWORD task_index
= 0;
301 HANDLE mm_task
= avrt::AvSetMmThreadCharacteristics(L
"Pro Audio",
304 (mm_task
&& avrt::AvSetMmThreadPriority(mm_task
, AVRT_PRIORITY_CRITICAL
));
306 // Failed to enable MMCSS on this thread. It is not fatal but can lead
307 // to reduced QoS at high load.
308 DWORD err
= GetLastError();
309 LOG(WARNING
) << "Failed to enable MMCSS (error code=" << err
<< ").";
312 // Allocate a buffer with a size that enables us to take care of cases like:
313 // 1) The recorded buffer size is smaller, or does not match exactly with,
314 // the selected packet size used in each callback.
315 // 2) The selected buffer size is larger than the recorded buffer size in
317 size_t buffer_frame_index
= 0;
318 size_t capture_buffer_size
= std::max(
319 2 * endpoint_buffer_size_frames_
* frame_size_
,
320 2 * packet_size_frames_
* frame_size_
);
321 scoped_array
<uint8
> capture_buffer(new uint8
[capture_buffer_size
]);
323 LARGE_INTEGER now_count
;
324 bool recording
= true;
326 double volume
= GetVolume();
327 HANDLE wait_array
[2] = {stop_capture_event_
, audio_samples_ready_event_
};
329 while (recording
&& !error
) {
330 HRESULT hr
= S_FALSE
;
332 // Wait for a close-down event or a new capture event.
333 DWORD wait_result
= WaitForMultipleObjects(2, wait_array
, FALSE
, INFINITE
);
334 switch (wait_result
) {
338 case WAIT_OBJECT_0
+ 0:
339 // |stop_capture_event_| has been set.
342 case WAIT_OBJECT_0
+ 1:
344 // |audio_samples_ready_event_| has been set.
345 BYTE
* data_ptr
= NULL
;
346 UINT32 num_frames_to_read
= 0;
348 UINT64 device_position
= 0;
349 UINT64 first_audio_frame_timestamp
= 0;
351 // Retrieve the amount of data in the capture endpoint buffer,
352 // replace it with silence if required, create callbacks for each
353 // packet and store non-delivered data for the next event.
354 hr
= audio_capture_client_
->GetBuffer(&data_ptr
,
358 &first_audio_frame_timestamp
);
360 DLOG(ERROR
) << "Failed to get data from the capture buffer";
364 if (num_frames_to_read
!= 0) {
365 size_t pos
= buffer_frame_index
* frame_size_
;
366 size_t num_bytes
= num_frames_to_read
* frame_size_
;
367 DCHECK_GE(capture_buffer_size
, pos
+ num_bytes
);
369 if (flags
& AUDCLNT_BUFFERFLAGS_SILENT
) {
370 // Clear out the local buffer since silence is reported.
371 memset(&capture_buffer
[pos
], 0, num_bytes
);
373 // Copy captured data from audio engine buffer to local buffer.
374 memcpy(&capture_buffer
[pos
], data_ptr
, num_bytes
);
377 buffer_frame_index
+= num_frames_to_read
;
380 hr
= audio_capture_client_
->ReleaseBuffer(num_frames_to_read
);
381 DLOG_IF(ERROR
, FAILED(hr
)) << "Failed to release capture buffer";
383 // Derive a delay estimate for the captured audio packet.
384 // The value contains two parts (A+B), where A is the delay of the
385 // first audio frame in the packet and B is the extra delay
386 // contained in any stored data. Unit is in audio frames.
387 QueryPerformanceCounter(&now_count
);
388 double audio_delay_frames
=
389 ((perf_count_to_100ns_units_
* now_count
.QuadPart
-
390 first_audio_frame_timestamp
) / 10000.0) * ms_to_frame_count_
+
391 buffer_frame_index
- num_frames_to_read
;
393 // Update the AGC volume level once every second. Note that,
394 // |volume| is also updated each time SetVolume() is called
395 // through IPC by the render-side AGC.
396 QueryAgcVolume(&volume
);
398 // Deliver captured data to the registered consumer using a packet
399 // size which was specified at construction.
400 uint32 delay_frames
= static_cast<uint32
>(audio_delay_frames
+ 0.5);
401 while (buffer_frame_index
>= packet_size_frames_
) {
403 reinterpret_cast<uint8
*>(capture_buffer
.get());
405 // Deliver data packet, delay estimation and volume level to
410 delay_frames
* frame_size_
,
413 // Store parts of the recorded data which can't be delivered
414 // using the current packet size. The stored section will be used
415 // either in the next while-loop iteration or in the next
417 memmove(&capture_buffer
[0],
418 &capture_buffer
[packet_size_bytes_
],
419 (buffer_frame_index
- packet_size_frames_
) * frame_size_
);
421 buffer_frame_index
-= packet_size_frames_
;
422 delay_frames
-= packet_size_frames_
;
432 if (recording
&& error
) {
433 // TODO(henrika): perhaps it worth improving the cleanup here by e.g.
434 // stopping the audio client, joining the thread etc.?
435 NOTREACHED() << "WASAPI capturing failed with error code "
440 if (mm_task
&& !avrt::AvRevertMmThreadCharacteristics(mm_task
)) {
441 PLOG(WARNING
) << "Failed to disable MMCSS";
445 void WASAPIAudioInputStream::HandleError(HRESULT err
) {
446 NOTREACHED() << "Error code: " << err
;
448 sink_
->OnError(this);
451 HRESULT
WASAPIAudioInputStream::SetCaptureDevice() {
452 ScopedComPtr
<IMMDeviceEnumerator
> enumerator
;
453 HRESULT hr
= CoCreateInstance(__uuidof(MMDeviceEnumerator
),
455 CLSCTX_INPROC_SERVER
,
456 __uuidof(IMMDeviceEnumerator
),
457 enumerator
.ReceiveVoid());
459 // Retrieve the IMMDevice by using the specified role or the specified
460 // unique endpoint device-identification string.
461 // TODO(henrika): possibly add support for the eCommunications as well.
462 if (device_id_
== AudioManagerBase::kDefaultDeviceId
) {
463 // Retrieve the default capture audio endpoint for the specified role.
464 // Note that, in Windows Vista, the MMDevice API supports device roles
465 // but the system-supplied user interface programs do not.
466 hr
= enumerator
->GetDefaultAudioEndpoint(eCapture
,
468 endpoint_device_
.Receive());
470 // Retrieve a capture endpoint device that is specified by an endpoint
471 // device-identification string.
472 hr
= enumerator
->GetDevice(UTF8ToUTF16(device_id_
).c_str(),
473 endpoint_device_
.Receive());
479 // Verify that the audio endpoint device is active, i.e., the audio
480 // adapter that connects to the endpoint device is present and enabled.
481 DWORD state
= DEVICE_STATE_DISABLED
;
482 hr
= endpoint_device_
->GetState(&state
);
484 if (!(state
& DEVICE_STATE_ACTIVE
)) {
485 DLOG(ERROR
) << "Selected capture device is not active.";
494 HRESULT
WASAPIAudioInputStream::ActivateCaptureDevice() {
495 // Creates and activates an IAudioClient COM object given the selected
496 // capture endpoint device.
497 HRESULT hr
= endpoint_device_
->Activate(__uuidof(IAudioClient
),
498 CLSCTX_INPROC_SERVER
,
500 audio_client_
.ReceiveVoid());
504 HRESULT
WASAPIAudioInputStream::GetAudioEngineStreamFormat() {
507 // The GetMixFormat() method retrieves the stream format that the
508 // audio engine uses for its internal processing of shared-mode streams.
509 // The method always uses a WAVEFORMATEXTENSIBLE structure, instead
510 // of a stand-alone WAVEFORMATEX structure, to specify the format.
511 // An WAVEFORMATEXTENSIBLE structure can specify both the mapping of
512 // channels to speakers and the number of bits of precision in each sample.
513 base::win::ScopedCoMem
<WAVEFORMATEXTENSIBLE
> format_ex
;
514 hr
= audio_client_
->GetMixFormat(
515 reinterpret_cast<WAVEFORMATEX
**>(&format_ex
));
517 // See http://msdn.microsoft.com/en-us/windows/hardware/gg463006#EFH
518 // for details on the WAVE file format.
519 WAVEFORMATEX format
= format_ex
->Format
;
520 DVLOG(2) << "WAVEFORMATEX:";
521 DVLOG(2) << " wFormatTags : 0x" << std::hex
<< format
.wFormatTag
;
522 DVLOG(2) << " nChannels : " << format
.nChannels
;
523 DVLOG(2) << " nSamplesPerSec : " << format
.nSamplesPerSec
;
524 DVLOG(2) << " nAvgBytesPerSec: " << format
.nAvgBytesPerSec
;
525 DVLOG(2) << " nBlockAlign : " << format
.nBlockAlign
;
526 DVLOG(2) << " wBitsPerSample : " << format
.wBitsPerSample
;
527 DVLOG(2) << " cbSize : " << format
.cbSize
;
529 DVLOG(2) << "WAVEFORMATEXTENSIBLE:";
530 DVLOG(2) << " wValidBitsPerSample: " <<
531 format_ex
->Samples
.wValidBitsPerSample
;
532 DVLOG(2) << " dwChannelMask : 0x" << std::hex
<<
533 format_ex
->dwChannelMask
;
534 if (format_ex
->SubFormat
== KSDATAFORMAT_SUBTYPE_PCM
)
535 DVLOG(2) << " SubFormat : KSDATAFORMAT_SUBTYPE_PCM";
536 else if (format_ex
->SubFormat
== KSDATAFORMAT_SUBTYPE_IEEE_FLOAT
)
537 DVLOG(2) << " SubFormat : KSDATAFORMAT_SUBTYPE_IEEE_FLOAT";
538 else if (format_ex
->SubFormat
== KSDATAFORMAT_SUBTYPE_WAVEFORMATEX
)
539 DVLOG(2) << " SubFormat : KSDATAFORMAT_SUBTYPE_WAVEFORMATEX";
544 bool WASAPIAudioInputStream::DesiredFormatIsSupported() {
545 // An application that uses WASAPI to manage shared-mode streams can rely
546 // on the audio engine to perform only limited format conversions. The audio
547 // engine can convert between a standard PCM sample size used by the
548 // application and the floating-point samples that the engine uses for its
549 // internal processing. However, the format for an application stream
550 // typically must have the same number of channels and the same sample
551 // rate as the stream format used by the device.
552 // Many audio devices support both PCM and non-PCM stream formats. However,
553 // the audio engine can mix only PCM streams.
554 base::win::ScopedCoMem
<WAVEFORMATEX
> closest_match
;
555 HRESULT hr
= audio_client_
->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED
,
558 DLOG_IF(ERROR
, hr
== S_FALSE
) << "Format is not supported "
559 << "but a closest match exists.";
563 HRESULT
WASAPIAudioInputStream::InitializeAudioEngine() {
564 // Initialize the audio stream between the client and the device.
565 // We connect indirectly through the audio engine by using shared mode
566 // and WASAPI is initialized in an event driven mode.
567 // Note that, |hnsBufferDuration| is set of 0, which ensures that the
568 // buffer is never smaller than the minimum buffer size needed to ensure
569 // that glitches do not occur between the periodic processing passes.
570 // This setting should lead to lowest possible latency.
571 HRESULT hr
= audio_client_
->Initialize(AUDCLNT_SHAREMODE_SHARED
,
572 AUDCLNT_STREAMFLAGS_EVENTCALLBACK
|
573 AUDCLNT_STREAMFLAGS_NOPERSIST
,
574 0, // hnsBufferDuration
581 // Retrieve the length of the endpoint buffer shared between the client
582 // and the audio engine. The buffer length determines the maximum amount
583 // of capture data that the audio engine can read from the endpoint buffer
584 // during a single processing pass.
585 // A typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.
586 hr
= audio_client_
->GetBufferSize(&endpoint_buffer_size_frames_
);
589 DVLOG(1) << "endpoint buffer size: " << endpoint_buffer_size_frames_
593 // The period between processing passes by the audio engine is fixed for a
594 // particular audio endpoint device and represents the smallest processing
595 // quantum for the audio engine. This period plus the stream latency between
596 // the buffer and endpoint device represents the minimum possible latency
597 // that an audio application can achieve.
598 // TODO(henrika): possibly remove this section when all parts are ready.
599 REFERENCE_TIME device_period_shared_mode
= 0;
600 REFERENCE_TIME device_period_exclusive_mode
= 0;
601 HRESULT hr_dbg
= audio_client_
->GetDevicePeriod(
602 &device_period_shared_mode
, &device_period_exclusive_mode
);
603 if (SUCCEEDED(hr_dbg
)) {
604 DVLOG(1) << "device period: "
605 << static_cast<double>(device_period_shared_mode
/ 10000.0)
609 REFERENCE_TIME latency
= 0;
610 hr_dbg
= audio_client_
->GetStreamLatency(&latency
);
611 if (SUCCEEDED(hr_dbg
)) {
612 DVLOG(1) << "stream latency: " << static_cast<double>(latency
/ 10000.0)
617 // Set the event handle that the audio engine will signal each time
618 // a buffer becomes ready to be processed by the client.
619 hr
= audio_client_
->SetEventHandle(audio_samples_ready_event_
.Get());
623 // Get access to the IAudioCaptureClient interface. This interface
624 // enables us to read input data from the capture endpoint buffer.
625 hr
= audio_client_
->GetService(__uuidof(IAudioCaptureClient
),
626 audio_capture_client_
.ReceiveVoid());
630 // Obtain a reference to the ISimpleAudioVolume interface which enables
631 // us to control the master volume level of an audio session.
632 hr
= audio_client_
->GetService(__uuidof(ISimpleAudioVolume
),
633 simple_audio_volume_
.ReceiveVoid());