1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/audio/win/audio_low_latency_input_win.h"
7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h"
9 #include "base/strings/utf_string_conversions.h"
10 #include "media/audio/audio_util.h"
11 #include "media/audio/win/audio_manager_win.h"
12 #include "media/audio/win/avrt_wrapper_win.h"
14 using base::win::ScopedComPtr
;
15 using base::win::ScopedCOMInitializer
;
19 WASAPIAudioInputStream::WASAPIAudioInputStream(
20 AudioManagerWin
* manager
, const AudioParameters
& params
,
21 const std::string
& device_id
)
23 capture_thread_(NULL
),
26 endpoint_buffer_size_frames_(0),
27 device_id_(device_id
),
31 // Load the Avrt DLL if not already loaded. Required to support MMCSS.
32 bool avrt_init
= avrt::Initialize();
33 DCHECK(avrt_init
) << "Failed to load the Avrt.dll";
35 // Set up the desired capture format specified by the client.
36 format_
.nSamplesPerSec
= params
.sample_rate();
37 format_
.wFormatTag
= WAVE_FORMAT_PCM
;
38 format_
.wBitsPerSample
= params
.bits_per_sample();
39 format_
.nChannels
= params
.channels();
40 format_
.nBlockAlign
= (format_
.wBitsPerSample
/ 8) * format_
.nChannels
;
41 format_
.nAvgBytesPerSec
= format_
.nSamplesPerSec
* format_
.nBlockAlign
;
44 // Size in bytes of each audio frame.
45 frame_size_
= format_
.nBlockAlign
;
46 // Store size of audio packets which we expect to get from the audio
47 // endpoint device in each capture event.
48 packet_size_frames_
= params
.GetBytesPerBuffer() / format_
.nBlockAlign
;
49 packet_size_bytes_
= params
.GetBytesPerBuffer();
50 DVLOG(1) << "Number of bytes per audio frame : " << frame_size_
;
51 DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_
;
53 // All events are auto-reset events and non-signaled initially.
55 // Create the event which the audio engine will signal each time
56 // a buffer becomes ready to be processed by the client.
57 audio_samples_ready_event_
.Set(CreateEvent(NULL
, FALSE
, FALSE
, NULL
));
58 DCHECK(audio_samples_ready_event_
.IsValid());
60 // Create the event which will be set in Stop() when capturing shall stop.
61 stop_capture_event_
.Set(CreateEvent(NULL
, FALSE
, FALSE
, NULL
));
62 DCHECK(stop_capture_event_
.IsValid());
64 ms_to_frame_count_
= static_cast<double>(params
.sample_rate()) / 1000.0;
66 LARGE_INTEGER performance_frequency
;
67 if (QueryPerformanceFrequency(&performance_frequency
)) {
68 perf_count_to_100ns_units_
=
69 (10000000.0 / static_cast<double>(performance_frequency
.QuadPart
));
71 LOG(ERROR
) << "High-resolution performance counters are not supported.";
72 perf_count_to_100ns_units_
= 0.0;
76 WASAPIAudioInputStream::~WASAPIAudioInputStream() {}
78 bool WASAPIAudioInputStream::Open() {
79 DCHECK(CalledOnValidThread());
80 // Verify that we are not already opened.
84 // Obtain a reference to the IMMDevice interface of the capturing
85 // device with the specified unique identifier or role which was
86 // set at construction.
87 HRESULT hr
= SetCaptureDevice();
91 // Obtain an IAudioClient interface which enables us to create and initialize
92 // an audio stream between an audio application and the audio engine.
93 hr
= ActivateCaptureDevice();
97 // Retrieve the stream format which the audio engine uses for its internal
98 // processing/mixing of shared-mode streams. This function call is for
99 // diagnostic purposes only and only in debug mode.
101 hr
= GetAudioEngineStreamFormat();
104 // Verify that the selected audio endpoint supports the specified format
105 // set during construction.
106 if (!DesiredFormatIsSupported()) {
110 // Initialize the audio stream between the client and the device using
111 // shared mode and a lowest possible glitch-free latency.
112 hr
= InitializeAudioEngine();
114 opened_
= SUCCEEDED(hr
);
118 void WASAPIAudioInputStream::Start(AudioInputCallback
* callback
) {
119 DCHECK(CalledOnValidThread());
121 DLOG_IF(ERROR
, !opened_
) << "Open() has not been called successfully";
130 // Starts periodic AGC microphone measurements if the AGC has been enabled
131 // using SetAutomaticGainControl().
134 // Create and start the thread that will drive the capturing by waiting for
137 new base::DelegateSimpleThread(this, "wasapi_capture_thread");
138 capture_thread_
->Start();
140 // Start streaming data between the endpoint buffer and the audio engine.
141 HRESULT hr
= audio_client_
->Start();
142 DLOG_IF(ERROR
, FAILED(hr
)) << "Failed to start input streaming.";
144 started_
= SUCCEEDED(hr
);
147 void WASAPIAudioInputStream::Stop() {
148 DCHECK(CalledOnValidThread());
149 DVLOG(1) << "WASAPIAudioInputStream::Stop()";
153 // Stops periodic AGC microphone measurements.
156 // Shut down the capture thread.
157 if (stop_capture_event_
.IsValid()) {
158 SetEvent(stop_capture_event_
.Get());
161 // Stop the input audio streaming.
162 HRESULT hr
= audio_client_
->Stop();
164 LOG(ERROR
) << "Failed to stop input streaming.";
167 // Wait until the thread completes and perform cleanup.
168 if (capture_thread_
) {
169 SetEvent(stop_capture_event_
.Get());
170 capture_thread_
->Join();
171 capture_thread_
= NULL
;
177 void WASAPIAudioInputStream::Close() {
178 DVLOG(1) << "WASAPIAudioInputStream::Close()";
179 // It is valid to call Close() before calling open or Start().
180 // It is also valid to call Close() after Start() has been called.
183 sink_
->OnClose(this);
187 // Inform the audio manager that we have been closed. This will cause our
189 manager_
->ReleaseInputStream(this);
192 double WASAPIAudioInputStream::GetMaxVolume() {
193 // Verify that Open() has been called succesfully, to ensure that an audio
194 // session exists and that an ISimpleAudioVolume interface has been created.
195 DLOG_IF(ERROR
, !opened_
) << "Open() has not been called successfully";
199 // The effective volume value is always in the range 0.0 to 1.0, hence
200 // we can return a fixed value (=1.0) here.
204 void WASAPIAudioInputStream::SetVolume(double volume
) {
205 DVLOG(1) << "SetVolume(volume=" << volume
<< ")";
206 DCHECK(CalledOnValidThread());
207 DCHECK_GE(volume
, 0.0);
208 DCHECK_LE(volume
, 1.0);
210 DLOG_IF(ERROR
, !opened_
) << "Open() has not been called successfully";
214 // Set a new master volume level. Valid volume levels are in the range
215 // 0.0 to 1.0. Ignore volume-change events.
216 HRESULT hr
= simple_audio_volume_
->SetMasterVolume(static_cast<float>(volume
),
218 DLOG_IF(WARNING
, FAILED(hr
)) << "Failed to set new input master volume.";
220 // Update the AGC volume level based on the last setting above. Note that,
221 // the volume-level resolution is not infinite and it is therefore not
222 // possible to assume that the volume provided as input parameter can be
223 // used directly. Instead, a new query to the audio hardware is required.
224 // This method does nothing if AGC is disabled.
228 double WASAPIAudioInputStream::GetVolume() {
229 DLOG_IF(ERROR
, !opened_
) << "Open() has not been called successfully";
233 // Retrieve the current volume level. The value is in the range 0.0 to 1.0.
235 HRESULT hr
= simple_audio_volume_
->GetMasterVolume(&level
);
236 DLOG_IF(WARNING
, FAILED(hr
)) << "Failed to get input master volume.";
238 return static_cast<double>(level
);
242 int WASAPIAudioInputStream::HardwareSampleRate(
243 const std::string
& device_id
) {
244 base::win::ScopedCoMem
<WAVEFORMATEX
> audio_engine_mix_format
;
245 HRESULT hr
= GetMixFormat(device_id
, &audio_engine_mix_format
);
249 return static_cast<int>(audio_engine_mix_format
->nSamplesPerSec
);
253 uint32
WASAPIAudioInputStream::HardwareChannelCount(
254 const std::string
& device_id
) {
255 base::win::ScopedCoMem
<WAVEFORMATEX
> audio_engine_mix_format
;
256 HRESULT hr
= GetMixFormat(device_id
, &audio_engine_mix_format
);
260 return static_cast<uint32
>(audio_engine_mix_format
->nChannels
);
264 HRESULT
WASAPIAudioInputStream::GetMixFormat(const std::string
& device_id
,
265 WAVEFORMATEX
** device_format
) {
266 // It is assumed that this static method is called from a COM thread, i.e.,
267 // CoInitializeEx() is not called here to avoid STA/MTA conflicts.
268 ScopedComPtr
<IMMDeviceEnumerator
> enumerator
;
269 HRESULT hr
= enumerator
.CreateInstance(__uuidof(MMDeviceEnumerator
), NULL
,
270 CLSCTX_INPROC_SERVER
);
274 ScopedComPtr
<IMMDevice
> endpoint_device
;
275 if (device_id
== AudioManagerBase::kDefaultDeviceId
) {
276 // Retrieve the default capture audio endpoint.
277 hr
= enumerator
->GetDefaultAudioEndpoint(eCapture
, eConsole
,
278 endpoint_device
.Receive());
280 // Retrieve a capture endpoint device that is specified by an endpoint
281 // device-identification string.
282 hr
= enumerator
->GetDevice(UTF8ToUTF16(device_id
).c_str(),
283 endpoint_device
.Receive());
288 ScopedComPtr
<IAudioClient
> audio_client
;
289 hr
= endpoint_device
->Activate(__uuidof(IAudioClient
),
290 CLSCTX_INPROC_SERVER
,
292 audio_client
.ReceiveVoid());
293 return SUCCEEDED(hr
) ? audio_client
->GetMixFormat(device_format
) : hr
;
296 void WASAPIAudioInputStream::Run() {
297 ScopedCOMInitializer
com_init(ScopedCOMInitializer::kMTA
);
299 // Increase the thread priority.
300 capture_thread_
->SetThreadPriority(base::kThreadPriority_RealtimeAudio
);
302 // Enable MMCSS to ensure that this thread receives prioritized access to
304 DWORD task_index
= 0;
305 HANDLE mm_task
= avrt::AvSetMmThreadCharacteristics(L
"Pro Audio",
308 (mm_task
&& avrt::AvSetMmThreadPriority(mm_task
, AVRT_PRIORITY_CRITICAL
));
310 // Failed to enable MMCSS on this thread. It is not fatal but can lead
311 // to reduced QoS at high load.
312 DWORD err
= GetLastError();
313 LOG(WARNING
) << "Failed to enable MMCSS (error code=" << err
<< ").";
316 // Allocate a buffer with a size that enables us to take care of cases like:
317 // 1) The recorded buffer size is smaller, or does not match exactly with,
318 // the selected packet size used in each callback.
319 // 2) The selected buffer size is larger than the recorded buffer size in
321 size_t buffer_frame_index
= 0;
322 size_t capture_buffer_size
= std::max(
323 2 * endpoint_buffer_size_frames_
* frame_size_
,
324 2 * packet_size_frames_
* frame_size_
);
325 scoped_ptr
<uint8
[]> capture_buffer(new uint8
[capture_buffer_size
]);
327 LARGE_INTEGER now_count
;
328 bool recording
= true;
330 double volume
= GetVolume();
331 HANDLE wait_array
[2] = {stop_capture_event_
, audio_samples_ready_event_
};
333 while (recording
&& !error
) {
334 HRESULT hr
= S_FALSE
;
336 // Wait for a close-down event or a new capture event.
337 DWORD wait_result
= WaitForMultipleObjects(2, wait_array
, FALSE
, INFINITE
);
338 switch (wait_result
) {
342 case WAIT_OBJECT_0
+ 0:
343 // |stop_capture_event_| has been set.
346 case WAIT_OBJECT_0
+ 1:
348 // |audio_samples_ready_event_| has been set.
349 BYTE
* data_ptr
= NULL
;
350 UINT32 num_frames_to_read
= 0;
352 UINT64 device_position
= 0;
353 UINT64 first_audio_frame_timestamp
= 0;
355 // Retrieve the amount of data in the capture endpoint buffer,
356 // replace it with silence if required, create callbacks for each
357 // packet and store non-delivered data for the next event.
358 hr
= audio_capture_client_
->GetBuffer(&data_ptr
,
362 &first_audio_frame_timestamp
);
364 DLOG(ERROR
) << "Failed to get data from the capture buffer";
368 if (num_frames_to_read
!= 0) {
369 size_t pos
= buffer_frame_index
* frame_size_
;
370 size_t num_bytes
= num_frames_to_read
* frame_size_
;
371 DCHECK_GE(capture_buffer_size
, pos
+ num_bytes
);
373 if (flags
& AUDCLNT_BUFFERFLAGS_SILENT
) {
374 // Clear out the local buffer since silence is reported.
375 memset(&capture_buffer
[pos
], 0, num_bytes
);
377 // Copy captured data from audio engine buffer to local buffer.
378 memcpy(&capture_buffer
[pos
], data_ptr
, num_bytes
);
381 buffer_frame_index
+= num_frames_to_read
;
384 hr
= audio_capture_client_
->ReleaseBuffer(num_frames_to_read
);
385 DLOG_IF(ERROR
, FAILED(hr
)) << "Failed to release capture buffer";
387 // Derive a delay estimate for the captured audio packet.
388 // The value contains two parts (A+B), where A is the delay of the
389 // first audio frame in the packet and B is the extra delay
390 // contained in any stored data. Unit is in audio frames.
391 QueryPerformanceCounter(&now_count
);
392 double audio_delay_frames
=
393 ((perf_count_to_100ns_units_
* now_count
.QuadPart
-
394 first_audio_frame_timestamp
) / 10000.0) * ms_to_frame_count_
+
395 buffer_frame_index
- num_frames_to_read
;
397 // Get a cached AGC volume level which is updated once every second
398 // on the audio manager thread. Note that, |volume| is also updated
399 // each time SetVolume() is called through IPC by the render-side AGC.
400 GetAgcVolume(&volume
);
402 // Deliver captured data to the registered consumer using a packet
403 // size which was specified at construction.
404 uint32 delay_frames
= static_cast<uint32
>(audio_delay_frames
+ 0.5);
405 while (buffer_frame_index
>= packet_size_frames_
) {
407 reinterpret_cast<uint8
*>(capture_buffer
.get());
409 // Deliver data packet, delay estimation and volume level to
414 delay_frames
* frame_size_
,
417 // Store parts of the recorded data which can't be delivered
418 // using the current packet size. The stored section will be used
419 // either in the next while-loop iteration or in the next
421 memmove(&capture_buffer
[0],
422 &capture_buffer
[packet_size_bytes_
],
423 (buffer_frame_index
- packet_size_frames_
) * frame_size_
);
425 buffer_frame_index
-= packet_size_frames_
;
426 delay_frames
-= packet_size_frames_
;
436 if (recording
&& error
) {
437 // TODO(henrika): perhaps it worth improving the cleanup here by e.g.
438 // stopping the audio client, joining the thread etc.?
439 NOTREACHED() << "WASAPI capturing failed with error code "
444 if (mm_task
&& !avrt::AvRevertMmThreadCharacteristics(mm_task
)) {
445 PLOG(WARNING
) << "Failed to disable MMCSS";
449 void WASAPIAudioInputStream::HandleError(HRESULT err
) {
450 NOTREACHED() << "Error code: " << err
;
452 sink_
->OnError(this);
455 HRESULT
WASAPIAudioInputStream::SetCaptureDevice() {
456 ScopedComPtr
<IMMDeviceEnumerator
> enumerator
;
457 HRESULT hr
= CoCreateInstance(__uuidof(MMDeviceEnumerator
),
459 CLSCTX_INPROC_SERVER
,
460 __uuidof(IMMDeviceEnumerator
),
461 enumerator
.ReceiveVoid());
463 // Retrieve the IMMDevice by using the specified role or the specified
464 // unique endpoint device-identification string.
465 // TODO(henrika): possibly add support for the eCommunications as well.
466 if (device_id_
== AudioManagerBase::kDefaultDeviceId
) {
467 // Retrieve the default capture audio endpoint for the specified role.
468 // Note that, in Windows Vista, the MMDevice API supports device roles
469 // but the system-supplied user interface programs do not.
470 hr
= enumerator
->GetDefaultAudioEndpoint(eCapture
,
472 endpoint_device_
.Receive());
474 // Retrieve a capture endpoint device that is specified by an endpoint
475 // device-identification string.
476 hr
= enumerator
->GetDevice(UTF8ToUTF16(device_id_
).c_str(),
477 endpoint_device_
.Receive());
483 // Verify that the audio endpoint device is active, i.e., the audio
484 // adapter that connects to the endpoint device is present and enabled.
485 DWORD state
= DEVICE_STATE_DISABLED
;
486 hr
= endpoint_device_
->GetState(&state
);
488 if (!(state
& DEVICE_STATE_ACTIVE
)) {
489 DLOG(ERROR
) << "Selected capture device is not active.";
498 HRESULT
WASAPIAudioInputStream::ActivateCaptureDevice() {
499 // Creates and activates an IAudioClient COM object given the selected
500 // capture endpoint device.
501 HRESULT hr
= endpoint_device_
->Activate(__uuidof(IAudioClient
),
502 CLSCTX_INPROC_SERVER
,
504 audio_client_
.ReceiveVoid());
508 HRESULT
WASAPIAudioInputStream::GetAudioEngineStreamFormat() {
511 // The GetMixFormat() method retrieves the stream format that the
512 // audio engine uses for its internal processing of shared-mode streams.
513 // The method always uses a WAVEFORMATEXTENSIBLE structure, instead
514 // of a stand-alone WAVEFORMATEX structure, to specify the format.
515 // An WAVEFORMATEXTENSIBLE structure can specify both the mapping of
516 // channels to speakers and the number of bits of precision in each sample.
517 base::win::ScopedCoMem
<WAVEFORMATEXTENSIBLE
> format_ex
;
518 hr
= audio_client_
->GetMixFormat(
519 reinterpret_cast<WAVEFORMATEX
**>(&format_ex
));
521 // See http://msdn.microsoft.com/en-us/windows/hardware/gg463006#EFH
522 // for details on the WAVE file format.
523 WAVEFORMATEX format
= format_ex
->Format
;
524 DVLOG(2) << "WAVEFORMATEX:";
525 DVLOG(2) << " wFormatTags : 0x" << std::hex
<< format
.wFormatTag
;
526 DVLOG(2) << " nChannels : " << format
.nChannels
;
527 DVLOG(2) << " nSamplesPerSec : " << format
.nSamplesPerSec
;
528 DVLOG(2) << " nAvgBytesPerSec: " << format
.nAvgBytesPerSec
;
529 DVLOG(2) << " nBlockAlign : " << format
.nBlockAlign
;
530 DVLOG(2) << " wBitsPerSample : " << format
.wBitsPerSample
;
531 DVLOG(2) << " cbSize : " << format
.cbSize
;
533 DVLOG(2) << "WAVEFORMATEXTENSIBLE:";
534 DVLOG(2) << " wValidBitsPerSample: " <<
535 format_ex
->Samples
.wValidBitsPerSample
;
536 DVLOG(2) << " dwChannelMask : 0x" << std::hex
<<
537 format_ex
->dwChannelMask
;
538 if (format_ex
->SubFormat
== KSDATAFORMAT_SUBTYPE_PCM
)
539 DVLOG(2) << " SubFormat : KSDATAFORMAT_SUBTYPE_PCM";
540 else if (format_ex
->SubFormat
== KSDATAFORMAT_SUBTYPE_IEEE_FLOAT
)
541 DVLOG(2) << " SubFormat : KSDATAFORMAT_SUBTYPE_IEEE_FLOAT";
542 else if (format_ex
->SubFormat
== KSDATAFORMAT_SUBTYPE_WAVEFORMATEX
)
543 DVLOG(2) << " SubFormat : KSDATAFORMAT_SUBTYPE_WAVEFORMATEX";
548 bool WASAPIAudioInputStream::DesiredFormatIsSupported() {
549 // An application that uses WASAPI to manage shared-mode streams can rely
550 // on the audio engine to perform only limited format conversions. The audio
551 // engine can convert between a standard PCM sample size used by the
552 // application and the floating-point samples that the engine uses for its
553 // internal processing. However, the format for an application stream
554 // typically must have the same number of channels and the same sample
555 // rate as the stream format used by the device.
556 // Many audio devices support both PCM and non-PCM stream formats. However,
557 // the audio engine can mix only PCM streams.
558 base::win::ScopedCoMem
<WAVEFORMATEX
> closest_match
;
559 HRESULT hr
= audio_client_
->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED
,
562 DLOG_IF(ERROR
, hr
== S_FALSE
) << "Format is not supported "
563 << "but a closest match exists.";
567 HRESULT
WASAPIAudioInputStream::InitializeAudioEngine() {
568 // Initialize the audio stream between the client and the device.
569 // We connect indirectly through the audio engine by using shared mode
570 // and WASAPI is initialized in an event driven mode.
571 // Note that, |hnsBufferDuration| is set of 0, which ensures that the
572 // buffer is never smaller than the minimum buffer size needed to ensure
573 // that glitches do not occur between the periodic processing passes.
574 // This setting should lead to lowest possible latency.
575 HRESULT hr
= audio_client_
->Initialize(AUDCLNT_SHAREMODE_SHARED
,
576 AUDCLNT_STREAMFLAGS_EVENTCALLBACK
|
577 AUDCLNT_STREAMFLAGS_NOPERSIST
,
578 0, // hnsBufferDuration
585 // Retrieve the length of the endpoint buffer shared between the client
586 // and the audio engine. The buffer length determines the maximum amount
587 // of capture data that the audio engine can read from the endpoint buffer
588 // during a single processing pass.
589 // A typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.
590 hr
= audio_client_
->GetBufferSize(&endpoint_buffer_size_frames_
);
593 DVLOG(1) << "endpoint buffer size: " << endpoint_buffer_size_frames_
597 // The period between processing passes by the audio engine is fixed for a
598 // particular audio endpoint device and represents the smallest processing
599 // quantum for the audio engine. This period plus the stream latency between
600 // the buffer and endpoint device represents the minimum possible latency
601 // that an audio application can achieve.
602 // TODO(henrika): possibly remove this section when all parts are ready.
603 REFERENCE_TIME device_period_shared_mode
= 0;
604 REFERENCE_TIME device_period_exclusive_mode
= 0;
605 HRESULT hr_dbg
= audio_client_
->GetDevicePeriod(
606 &device_period_shared_mode
, &device_period_exclusive_mode
);
607 if (SUCCEEDED(hr_dbg
)) {
608 DVLOG(1) << "device period: "
609 << static_cast<double>(device_period_shared_mode
/ 10000.0)
613 REFERENCE_TIME latency
= 0;
614 hr_dbg
= audio_client_
->GetStreamLatency(&latency
);
615 if (SUCCEEDED(hr_dbg
)) {
616 DVLOG(1) << "stream latency: " << static_cast<double>(latency
/ 10000.0)
621 // Set the event handle that the audio engine will signal each time
622 // a buffer becomes ready to be processed by the client.
623 hr
= audio_client_
->SetEventHandle(audio_samples_ready_event_
.Get());
627 // Get access to the IAudioCaptureClient interface. This interface
628 // enables us to read input data from the capture endpoint buffer.
629 hr
= audio_client_
->GetService(__uuidof(IAudioCaptureClient
),
630 audio_capture_client_
.ReceiveVoid());
634 // Obtain a reference to the ISimpleAudioVolume interface which enables
635 // us to control the master volume level of an audio session.
636 hr
= audio_client_
->GetService(__uuidof(ISimpleAudioVolume
),
637 simple_audio_volume_
.ReceiveVoid());