Removed unused VideoCaptureCapability parameters.
[chromium-blink-merge.git] / media / audio / win / audio_unified_win.h
blob76c53297b51a6766bd7f69a5c5a213be941c68ae
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
6 #define MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
8 #include <Audioclient.h>
9 #include <MMDeviceAPI.h>
11 #include <string>
13 #include "base/compiler_specific.h"
14 #include "base/gtest_prod_util.h"
15 #include "base/threading/platform_thread.h"
16 #include "base/threading/simple_thread.h"
17 #include "base/win/scoped_co_mem.h"
18 #include "base/win/scoped_comptr.h"
19 #include "base/win/scoped_handle.h"
20 #include "media/audio/audio_io.h"
21 #include "media/audio/audio_parameters.h"
22 #include "media/base/audio_fifo.h"
23 #include "media/base/channel_mixer.h"
24 #include "media/base/media_export.h"
25 #include "media/base/multi_channel_resampler.h"
27 namespace media {
29 class AudioManagerWin;
31 // Implementation of AudioOutputStream for Windows using the Core Audio API
32 // where both capturing and rendering takes place on the same thread to enable
33 // audio I/O. This class allows arbitrary combinations of input and output
34 // devices running off different clocks and using different drivers, with
35 // potentially differing sample-rates.
37 // It is required to first acquire the native sample rate of the selected
38 // output device and then use the same rate when creating this object.
39 // The inner operation depends on the input sample rate which is determined
40 // during construction. Three different main modes are supported:
42 // 1) input rate == output rate => input side drives output side directly.
43 // 2) input rate != output rate => both sides are driven independently by
44 // events and a FIFO plus a resampling unit is used to compensate for
45 // differences in sample rates between the two sides.
46 // 3) input rate == output rate but native buffer sizes are not identical =>
47 // same inner functionality as in (2) to compensate for the differences
48 // in buffer sizes and also compensate for any potential clock drift
49 // between the two devices.
51 // Mode detection is is done at construction and using mode (1) will lead to
52 // best performance (lower delay and no "varispeed distortion"), i.e., it is
53 // recommended to use same sample rates for input and output. Mode (2) uses a
54 // resampler which supports rate adjustments to fine tune for things like
55 // clock drift and differences in sample rates between different devices.
56 // Mode (2) - which uses a FIFO and a adjustable multi-channel resampler -
57 // is also called the varispeed mode and it is used for case (3) as well to
58 // compensate for the difference in buffer sizes mainly.
59 // Mode (3) can happen if two different audio devices are used.
60 // As an example: some devices needs a buffer size of 441 @ 44.1kHz and others
61 // 448 @ 44.1kHz. This is a rare case and will only happen for sample rates
62 // which are even multiples of 11025 Hz (11025, 22050, 44100, 88200 etc.).
64 // Implementation notes:
66 // - Open() can fail if the input and output parameters do not fulfill
67 // certain conditions. See source for Open() for more details.
68 // - Channel mixing will be performed if the clients asks for a larger
69 // number of channels than the native audio layer provides.
70 // Example: client wants stereo but audio layer provides mono. In this case
71 // upmixing from mono to stereo (1->2) will be done.
73 // TODO(henrika):
75 // - Add support for exclusive mode.
76 // - Add support for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, i.e., 32-bit float
77 // as internal sample-value representation.
78 // - Perform fine-tuning for non-matching sample rates to reduce latency.
80 class MEDIA_EXPORT WASAPIUnifiedStream
81 : public AudioOutputStream,
82 public base::DelegateSimpleThread::Delegate {
83 public:
84 // The ctor takes all the usual parameters, plus |manager| which is the
85 // the audio manager who is creating this object.
86 WASAPIUnifiedStream(AudioManagerWin* manager,
87 const AudioParameters& params,
88 const std::string& input_device_id);
90 // The dtor is typically called by the AudioManager only and it is usually
91 // triggered by calling AudioOutputStream::Close().
92 virtual ~WASAPIUnifiedStream();
94 // Implementation of AudioOutputStream.
95 virtual bool Open() OVERRIDE;
96 virtual void Start(AudioSourceCallback* callback) OVERRIDE;
97 virtual void Stop() OVERRIDE;
98 virtual void Close() OVERRIDE;
99 virtual void SetVolume(double volume) OVERRIDE;
100 virtual void GetVolume(double* volume) OVERRIDE;
102 bool started() const {
103 return audio_io_thread_.get() != NULL;
106 // Returns true if input sample rate differs from the output sample rate.
107 // A FIFO and a adjustable multi-channel resampler are utilized in this mode.
108 bool VarispeedMode() const { return (fifo_ && resampler_); }
110 private:
111 enum {
112 // Time in milliseconds between two successive delay measurements.
113 // We save resources by not updating the delay estimates for each capture
114 // event (typically 100Hz rate).
115 kTimeDiffInMillisecondsBetweenDelayMeasurements = 1000,
117 // Max possible FIFO size.
118 kFifoSize = 16384,
120 // This value was determined empirically for minimum latency while still
121 // guarding against FIFO under-runs. The actual target size will be equal
122 // to kTargetFifoSafetyFactor * (native input buffer size).
123 // TODO(henrika): tune this value for lowest possible latency for all
124 // possible sample rate combinations.
125 kTargetFifoSafetyFactor = 2
128 // Additional initialization required when input and output sample rate
129 // differs. Allocates resources for |fifo_|, |resampler_|, |render_event_|,
130 // and the |capture_bus_| and configures the |input_format_| structure
131 // given the provided input and output audio parameters.
132 void DoVarispeedInitialization(const AudioParameters& input_params,
133 const AudioParameters& output_params);
135 // Clears varispeed related components such as the FIFO and the resampler.
136 void ResetVarispeed();
138 // Builds WAVEFORMATEX structures for input and output based on input and
139 // output audio parameters.
140 void SetIOFormats(const AudioParameters& input_params,
141 const AudioParameters& output_params);
143 // DelegateSimpleThread::Delegate implementation.
144 virtual void Run() OVERRIDE;
146 // MultiChannelResampler::MultiChannelAudioSourceProvider implementation.
147 // Callback for providing more data into the resampler.
148 // Only used in varispeed mode, i.e., when input rate != output rate.
149 virtual void ProvideInput(int frame_delay, AudioBus* audio_bus);
151 // Issues the OnError() callback to the |source_|.
152 void HandleError(HRESULT err);
154 // Stops and joins the audio thread in case of an error.
155 void StopAndJoinThread(HRESULT err);
157 // Converts unique endpoint ID to user-friendly device name.
158 std::string GetDeviceName(LPCWSTR device_id) const;
160 // Called on the audio IO thread for each capture event.
161 // Buffers captured audio into a FIFO if varispeed is used or into an audio
162 // bus if input and output sample rates are identical.
163 void ProcessInputAudio();
165 // Called on the audio IO thread for each render event when varispeed is
166 // active or for each capture event when varispeed is not used.
167 // In varispeed mode, it triggers a resampling callback, which reads from the
168 // FIFO, and calls AudioSourceCallback::OnMoreIOData using the resampled
169 // input signal and at the same time asks for data to play out.
170 // If input and output rates are the same - instead of reading from the FIFO
171 // and do resampling - we read directly from the audio bus used to store
172 // captured data in ProcessInputAudio.
173 void ProcessOutputAudio(IAudioClock* audio_output_clock);
175 // Contains the thread ID of the creating thread.
176 base::PlatformThreadId creating_thread_id_;
178 // Our creator, the audio manager needs to be notified when we close.
179 AudioManagerWin* manager_;
181 // Contains the audio parameter structure provided at construction.
182 AudioParameters params_;
183 // For convenience, same as in params_.
184 int input_channels_;
185 int output_channels_;
187 // Unique ID of the input device to be opened.
188 const std::string input_device_id_;
190 // The sharing mode for the streams.
191 // Valid values are AUDCLNT_SHAREMODE_SHARED and AUDCLNT_SHAREMODE_EXCLUSIVE
192 // where AUDCLNT_SHAREMODE_SHARED is the default.
193 AUDCLNT_SHAREMODE share_mode_;
195 // Rendering and capturing is driven by this thread (no message loop).
196 // All OnMoreIOData() callbacks will be called from this thread.
197 scoped_ptr<base::DelegateSimpleThread> audio_io_thread_;
199 // Contains the desired audio output format which is set up at construction.
200 // It is required to first acquire the native sample rate of the selected
201 // output device and then use the same rate when creating this object.
202 WAVEFORMATPCMEX output_format_;
204 // Contains the native audio input format which is set up at construction
205 // if varispeed mode is utilized.
206 WAVEFORMATPCMEX input_format_;
208 // True when successfully opened.
209 bool opened_;
211 // Volume level from 0 to 1 used for output scaling.
212 double volume_;
214 // Size in audio frames of each audio packet where an audio packet
215 // is defined as the block of data which the destination is expected to
216 // receive in each OnMoreIOData() callback.
217 size_t output_buffer_size_frames_;
219 // Size in audio frames of each audio packet where an audio packet
220 // is defined as the block of data which the source is expected to
221 // deliver in each OnMoreIOData() callback.
222 size_t input_buffer_size_frames_;
224 // Length of the audio endpoint buffer.
225 uint32 endpoint_render_buffer_size_frames_;
226 uint32 endpoint_capture_buffer_size_frames_;
228 // Counts the number of audio frames written to the endpoint buffer.
229 uint64 num_written_frames_;
231 // Time stamp for last delay measurement.
232 base::TimeTicks last_delay_sample_time_;
234 // Contains the total (sum of render and capture) delay in milliseconds.
235 double total_delay_ms_;
237 // Contains the total (sum of render and capture and possibly FIFO) delay
238 // in bytes. The update frequency is set by a constant called
239 // |kTimeDiffInMillisecondsBetweenDelayMeasurements|.
240 int total_delay_bytes_;
242 // Pointer to the client that will deliver audio samples to be played out.
243 AudioSourceCallback* source_;
245 // IMMDevice interfaces which represents audio endpoint devices.
246 base::win::ScopedComPtr<IMMDevice> endpoint_render_device_;
247 base::win::ScopedComPtr<IMMDevice> endpoint_capture_device_;
249 // IAudioClient interfaces which enables a client to create and initialize
250 // an audio stream between an audio application and the audio engine.
251 base::win::ScopedComPtr<IAudioClient> audio_output_client_;
252 base::win::ScopedComPtr<IAudioClient> audio_input_client_;
254 // IAudioRenderClient interfaces enables a client to write output
255 // data to a rendering endpoint buffer.
256 base::win::ScopedComPtr<IAudioRenderClient> audio_render_client_;
258 // IAudioCaptureClient interfaces enables a client to read input
259 // data from a capturing endpoint buffer.
260 base::win::ScopedComPtr<IAudioCaptureClient> audio_capture_client_;
262 // The audio engine will signal this event each time a buffer has been
263 // recorded.
264 base::win::ScopedHandle capture_event_;
266 // The audio engine will signal this event each time it needs a new
267 // audio buffer to play out.
268 // Only utilized in varispeed mode.
269 base::win::ScopedHandle render_event_;
271 // This event will be signaled when streaming shall stop.
272 base::win::ScopedHandle stop_streaming_event_;
274 // Container for retrieving data from AudioSourceCallback::OnMoreIOData().
275 scoped_ptr<AudioBus> output_bus_;
277 // Container for sending data to AudioSourceCallback::OnMoreIOData().
278 scoped_ptr<AudioBus> input_bus_;
280 // Container for storing output from the channel mixer.
281 scoped_ptr<AudioBus> channel_bus_;
283 // All members below are only allocated, or used, in varispeed mode:
285 // Temporary storage of resampled input audio data.
286 scoped_ptr<AudioBus> resampled_bus_;
288 // Set to true first time a capture event has been received in varispeed
289 // mode.
290 bool input_callback_received_;
292 // MultiChannelResampler is a multi channel wrapper for SincResampler;
293 // allowing high quality sample rate conversion of multiple channels at once.
294 scoped_ptr<MultiChannelResampler> resampler_;
296 // Resampler I/O ratio.
297 double io_sample_rate_ratio_;
299 // Used for input to output buffering.
300 scoped_ptr<AudioFifo> fifo_;
302 // The channel mixer is only created and utilized if number of input channels
303 // is larger than the native number of input channels (e.g client wants
304 // stereo but the audio device only supports mono).
305 scoped_ptr<ChannelMixer> channel_mixer_;
307 // The optimal number of frames we'd like to keep in the FIFO at all times.
308 int target_fifo_frames_;
310 // A running average of the measured delta between actual number of frames
311 // in the FIFO versus |target_fifo_frames_|.
312 double average_delta_;
314 // A varispeed rate scalar which is calculated based on FIFO drift.
315 double fifo_rate_compensation_;
317 // Set to true when input side signals output side that a new delay
318 // estimate is needed.
319 bool update_output_delay_;
321 // Capture side stores its delay estimate so the sum can be derived in
322 // the render side.
323 double capture_delay_ms_;
325 // TODO(henrika): possibly remove these members once the performance is
326 // properly tuned. Only used for off-line debugging.
327 #ifndef NDEBUG
328 enum LogElementNames {
329 INPUT_TIME_STAMP,
330 NUM_FRAMES_IN_FIFO,
331 RESAMPLER_MARGIN,
332 RATE_COMPENSATION
335 scoped_ptr<int64[]> input_time_stamps_;
336 scoped_ptr<int[]> num_frames_in_fifo_;
337 scoped_ptr<int[]> resampler_margin_;
338 scoped_ptr<double[]> fifo_rate_comps_;
339 scoped_ptr<int[]> num_elements_;
340 scoped_ptr<int[]> input_params_;
341 scoped_ptr<int[]> output_params_;
343 FILE* data_file_;
344 FILE* param_file_;
345 #endif
347 DISALLOW_COPY_AND_ASSIGN(WASAPIUnifiedStream);
350 } // namespace media
352 #endif // MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_