1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
6 #define MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
8 #include <Audioclient.h>
9 #include <MMDeviceAPI.h>
13 #include "base/compiler_specific.h"
14 #include "base/gtest_prod_util.h"
15 #include "base/threading/platform_thread.h"
16 #include "base/threading/simple_thread.h"
17 #include "base/win/scoped_co_mem.h"
18 #include "base/win/scoped_comptr.h"
19 #include "base/win/scoped_handle.h"
20 #include "media/audio/audio_io.h"
21 #include "media/audio/audio_parameters.h"
22 #include "media/base/audio_fifo.h"
23 #include "media/base/channel_mixer.h"
24 #include "media/base/media_export.h"
25 #include "media/base/multi_channel_resampler.h"
29 class AudioManagerWin
;
31 // Implementation of AudioOutputStream for Windows using the Core Audio API
32 // where both capturing and rendering takes place on the same thread to enable
33 // audio I/O. This class allows arbitrary combinations of input and output
34 // devices running off different clocks and using different drivers, with
35 // potentially differing sample-rates.
37 // It is required to first acquire the native sample rate of the selected
38 // output device and then use the same rate when creating this object.
39 // The inner operation depends on the input sample rate which is determined
40 // during construction. Three different main modes are supported:
42 // 1) input rate == output rate => input side drives output side directly.
43 // 2) input rate != output rate => both sides are driven independently by
44 // events and a FIFO plus a resampling unit is used to compensate for
45 // differences in sample rates between the two sides.
46 // 3) input rate == output rate but native buffer sizes are not identical =>
47 // same inner functionality as in (2) to compensate for the differences
48 // in buffer sizes and also compensate for any potential clock drift
49 // between the two devices.
51 // Mode detection is is done at construction and using mode (1) will lead to
52 // best performance (lower delay and no "varispeed distortion"), i.e., it is
53 // recommended to use same sample rates for input and output. Mode (2) uses a
54 // resampler which supports rate adjustments to fine tune for things like
55 // clock drift and differences in sample rates between different devices.
56 // Mode (2) - which uses a FIFO and a adjustable multi-channel resampler -
57 // is also called the varispeed mode and it is used for case (3) as well to
58 // compensate for the difference in buffer sizes mainly.
59 // Mode (3) can happen if two different audio devices are used.
60 // As an example: some devices needs a buffer size of 441 @ 44.1kHz and others
61 // 448 @ 44.1kHz. This is a rare case and will only happen for sample rates
62 // which are even multiples of 11025 Hz (11025, 22050, 44100, 88200 etc.).
64 // Implementation notes:
66 // - Open() can fail if the input and output parameters do not fulfill
67 // certain conditions. See source for Open() for more details.
68 // - Channel mixing will be performed if the clients asks for a larger
69 // number of channels than the native audio layer provides.
70 // Example: client wants stereo but audio layer provides mono. In this case
71 // upmixing from mono to stereo (1->2) will be done.
75 // - Add support for exclusive mode.
76 // - Add support for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, i.e., 32-bit float
77 // as internal sample-value representation.
78 // - Perform fine-tuning for non-matching sample rates to reduce latency.
80 class MEDIA_EXPORT WASAPIUnifiedStream
81 : public AudioOutputStream
,
82 public base::DelegateSimpleThread::Delegate
{
84 // The ctor takes all the usual parameters, plus |manager| which is the
85 // the audio manager who is creating this object.
86 WASAPIUnifiedStream(AudioManagerWin
* manager
,
87 const AudioParameters
& params
,
88 const std::string
& input_device_id
);
90 // The dtor is typically called by the AudioManager only and it is usually
91 // triggered by calling AudioOutputStream::Close().
92 virtual ~WASAPIUnifiedStream();
94 // Implementation of AudioOutputStream.
95 virtual bool Open() OVERRIDE
;
96 virtual void Start(AudioSourceCallback
* callback
) OVERRIDE
;
97 virtual void Stop() OVERRIDE
;
98 virtual void Close() OVERRIDE
;
99 virtual void SetVolume(double volume
) OVERRIDE
;
100 virtual void GetVolume(double* volume
) OVERRIDE
;
102 bool started() const {
103 return audio_io_thread_
.get() != NULL
;
106 // Returns true if input sample rate differs from the output sample rate.
107 // A FIFO and a adjustable multi-channel resampler are utilized in this mode.
108 bool VarispeedMode() const { return (fifo_
&& resampler_
); }
112 // Time in milliseconds between two successive delay measurements.
113 // We save resources by not updating the delay estimates for each capture
114 // event (typically 100Hz rate).
115 kTimeDiffInMillisecondsBetweenDelayMeasurements
= 1000,
117 // Max possible FIFO size.
120 // This value was determined empirically for minimum latency while still
121 // guarding against FIFO under-runs. The actual target size will be equal
122 // to kTargetFifoSafetyFactor * (native input buffer size).
123 // TODO(henrika): tune this value for lowest possible latency for all
124 // possible sample rate combinations.
125 kTargetFifoSafetyFactor
= 2
128 // Additional initialization required when input and output sample rate
129 // differs. Allocates resources for |fifo_|, |resampler_|, |render_event_|,
130 // and the |capture_bus_| and configures the |input_format_| structure
131 // given the provided input and output audio parameters.
132 void DoVarispeedInitialization(const AudioParameters
& input_params
,
133 const AudioParameters
& output_params
);
135 // Clears varispeed related components such as the FIFO and the resampler.
136 void ResetVarispeed();
138 // Builds WAVEFORMATEX structures for input and output based on input and
139 // output audio parameters.
140 void SetIOFormats(const AudioParameters
& input_params
,
141 const AudioParameters
& output_params
);
143 // DelegateSimpleThread::Delegate implementation.
144 virtual void Run() OVERRIDE
;
146 // MultiChannelResampler::MultiChannelAudioSourceProvider implementation.
147 // Callback for providing more data into the resampler.
148 // Only used in varispeed mode, i.e., when input rate != output rate.
149 virtual void ProvideInput(int frame_delay
, AudioBus
* audio_bus
);
151 // Issues the OnError() callback to the |source_|.
152 void HandleError(HRESULT err
);
154 // Stops and joins the audio thread in case of an error.
155 void StopAndJoinThread(HRESULT err
);
157 // Converts unique endpoint ID to user-friendly device name.
158 std::string
GetDeviceName(LPCWSTR device_id
) const;
160 // Called on the audio IO thread for each capture event.
161 // Buffers captured audio into a FIFO if varispeed is used or into an audio
162 // bus if input and output sample rates are identical.
163 void ProcessInputAudio();
165 // Called on the audio IO thread for each render event when varispeed is
166 // active or for each capture event when varispeed is not used.
167 // In varispeed mode, it triggers a resampling callback, which reads from the
168 // FIFO, and calls AudioSourceCallback::OnMoreIOData using the resampled
169 // input signal and at the same time asks for data to play out.
170 // If input and output rates are the same - instead of reading from the FIFO
171 // and do resampling - we read directly from the audio bus used to store
172 // captured data in ProcessInputAudio.
173 void ProcessOutputAudio(IAudioClock
* audio_output_clock
);
175 // Contains the thread ID of the creating thread.
176 base::PlatformThreadId creating_thread_id_
;
178 // Our creator, the audio manager needs to be notified when we close.
179 AudioManagerWin
* manager_
;
181 // Contains the audio parameter structure provided at construction.
182 AudioParameters params_
;
183 // For convenience, same as in params_.
185 int output_channels_
;
187 // Unique ID of the input device to be opened.
188 const std::string input_device_id_
;
190 // The sharing mode for the streams.
191 // Valid values are AUDCLNT_SHAREMODE_SHARED and AUDCLNT_SHAREMODE_EXCLUSIVE
192 // where AUDCLNT_SHAREMODE_SHARED is the default.
193 AUDCLNT_SHAREMODE share_mode_
;
195 // Rendering and capturing is driven by this thread (no message loop).
196 // All OnMoreIOData() callbacks will be called from this thread.
197 scoped_ptr
<base::DelegateSimpleThread
> audio_io_thread_
;
199 // Contains the desired audio output format which is set up at construction.
200 // It is required to first acquire the native sample rate of the selected
201 // output device and then use the same rate when creating this object.
202 WAVEFORMATPCMEX output_format_
;
204 // Contains the native audio input format which is set up at construction
205 // if varispeed mode is utilized.
206 WAVEFORMATPCMEX input_format_
;
208 // True when successfully opened.
211 // Volume level from 0 to 1 used for output scaling.
214 // Size in audio frames of each audio packet where an audio packet
215 // is defined as the block of data which the destination is expected to
216 // receive in each OnMoreIOData() callback.
217 size_t output_buffer_size_frames_
;
219 // Size in audio frames of each audio packet where an audio packet
220 // is defined as the block of data which the source is expected to
221 // deliver in each OnMoreIOData() callback.
222 size_t input_buffer_size_frames_
;
224 // Length of the audio endpoint buffer.
225 uint32 endpoint_render_buffer_size_frames_
;
226 uint32 endpoint_capture_buffer_size_frames_
;
228 // Counts the number of audio frames written to the endpoint buffer.
229 uint64 num_written_frames_
;
231 // Time stamp for last delay measurement.
232 base::TimeTicks last_delay_sample_time_
;
234 // Contains the total (sum of render and capture) delay in milliseconds.
235 double total_delay_ms_
;
237 // Contains the total (sum of render and capture and possibly FIFO) delay
238 // in bytes. The update frequency is set by a constant called
239 // |kTimeDiffInMillisecondsBetweenDelayMeasurements|.
240 int total_delay_bytes_
;
242 // Pointer to the client that will deliver audio samples to be played out.
243 AudioSourceCallback
* source_
;
245 // IMMDevice interfaces which represents audio endpoint devices.
246 base::win::ScopedComPtr
<IMMDevice
> endpoint_render_device_
;
247 base::win::ScopedComPtr
<IMMDevice
> endpoint_capture_device_
;
249 // IAudioClient interfaces which enables a client to create and initialize
250 // an audio stream between an audio application and the audio engine.
251 base::win::ScopedComPtr
<IAudioClient
> audio_output_client_
;
252 base::win::ScopedComPtr
<IAudioClient
> audio_input_client_
;
254 // IAudioRenderClient interfaces enables a client to write output
255 // data to a rendering endpoint buffer.
256 base::win::ScopedComPtr
<IAudioRenderClient
> audio_render_client_
;
258 // IAudioCaptureClient interfaces enables a client to read input
259 // data from a capturing endpoint buffer.
260 base::win::ScopedComPtr
<IAudioCaptureClient
> audio_capture_client_
;
262 // The audio engine will signal this event each time a buffer has been
264 base::win::ScopedHandle capture_event_
;
266 // The audio engine will signal this event each time it needs a new
267 // audio buffer to play out.
268 // Only utilized in varispeed mode.
269 base::win::ScopedHandle render_event_
;
271 // This event will be signaled when streaming shall stop.
272 base::win::ScopedHandle stop_streaming_event_
;
274 // Container for retrieving data from AudioSourceCallback::OnMoreIOData().
275 scoped_ptr
<AudioBus
> output_bus_
;
277 // Container for sending data to AudioSourceCallback::OnMoreIOData().
278 scoped_ptr
<AudioBus
> input_bus_
;
280 // Container for storing output from the channel mixer.
281 scoped_ptr
<AudioBus
> channel_bus_
;
283 // All members below are only allocated, or used, in varispeed mode:
285 // Temporary storage of resampled input audio data.
286 scoped_ptr
<AudioBus
> resampled_bus_
;
288 // Set to true first time a capture event has been received in varispeed
290 bool input_callback_received_
;
292 // MultiChannelResampler is a multi channel wrapper for SincResampler;
293 // allowing high quality sample rate conversion of multiple channels at once.
294 scoped_ptr
<MultiChannelResampler
> resampler_
;
296 // Resampler I/O ratio.
297 double io_sample_rate_ratio_
;
299 // Used for input to output buffering.
300 scoped_ptr
<AudioFifo
> fifo_
;
302 // The channel mixer is only created and utilized if number of input channels
303 // is larger than the native number of input channels (e.g client wants
304 // stereo but the audio device only supports mono).
305 scoped_ptr
<ChannelMixer
> channel_mixer_
;
307 // The optimal number of frames we'd like to keep in the FIFO at all times.
308 int target_fifo_frames_
;
310 // A running average of the measured delta between actual number of frames
311 // in the FIFO versus |target_fifo_frames_|.
312 double average_delta_
;
314 // A varispeed rate scalar which is calculated based on FIFO drift.
315 double fifo_rate_compensation_
;
317 // Set to true when input side signals output side that a new delay
318 // estimate is needed.
319 bool update_output_delay_
;
321 // Capture side stores its delay estimate so the sum can be derived in
323 double capture_delay_ms_
;
325 // TODO(henrika): possibly remove these members once the performance is
326 // properly tuned. Only used for off-line debugging.
328 enum LogElementNames
{
335 scoped_ptr
<int64
[]> input_time_stamps_
;
336 scoped_ptr
<int[]> num_frames_in_fifo_
;
337 scoped_ptr
<int[]> resampler_margin_
;
338 scoped_ptr
<double[]> fifo_rate_comps_
;
339 scoped_ptr
<int[]> num_elements_
;
340 scoped_ptr
<int[]> input_params_
;
341 scoped_ptr
<int[]> output_params_
;
347 DISALLOW_COPY_AND_ASSIGN(WASAPIUnifiedStream
);
352 #endif // MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_