media/audio/win/audio_low_latency_input_win.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/audio/win/audio_low_latency_input_win.h"
   6
   7 #include "base/logging.h"
   8 #include "base/memory/scoped_ptr.h"
   9 #include "base/strings/utf_string_conversions.h"
  10 #include "media/audio/win/audio_manager_win.h"
  11 #include "media/audio/win/avrt_wrapper_win.h"
  12 #include "media/audio/win/core_audio_util_win.h"
  13 #include "media/base/audio_bus.h"
  14
  15 using base::win::ScopedComPtr;
  16 using base::win::ScopedCOMInitializer;
  17
  18 namespace media {
  19 namespace {
  20
  21 // Returns true if |device| represents the default communication capture device.
  22 bool IsDefaultCommunicationDevice(IMMDeviceEnumerator* enumerator,
  23                                   IMMDevice* device) {
  24   ScopedComPtr<IMMDevice> communications;
  25   if (FAILED(enumerator->GetDefaultAudioEndpoint(eCapture, eCommunications,
  26                                                  communications.Receive()))) {
  27     return false;
  28   }
  29
  30   base::win::ScopedCoMem<WCHAR> communications_id, device_id;
  31   device->GetId(&device_id);
  32   communications->GetId(&communications_id);
  33   return lstrcmpW(communications_id, device_id) == 0;
  34 }
  35
  36 }  // namespace
  37
  38 WASAPIAudioInputStream::WASAPIAudioInputStream(AudioManagerWin* manager,
  39                                                const AudioParameters& params,
  40                                                const std::string& device_id)
  41     : manager_(manager),
  42       capture_thread_(NULL),
  43       opened_(false),
  44       started_(false),
  45       frame_size_(0),
  46       packet_size_frames_(0),
  47       packet_size_bytes_(0),
  48       endpoint_buffer_size_frames_(0),
  49       effects_(params.effects()),
  50       device_id_(device_id),
  51       perf_count_to_100ns_units_(0.0),
  52       ms_to_frame_count_(0.0),
  53       sink_(NULL),
  54       audio_bus_(media::AudioBus::Create(params)) {
  55   DCHECK(manager_);
  56
  57   // Load the Avrt DLL if not already loaded. Required to support MMCSS.
  58   bool avrt_init = avrt::Initialize();
  59   DCHECK(avrt_init) << "Failed to load the Avrt.dll";
  60
  61   // Set up the desired capture format specified by the client.
  62   format_.nSamplesPerSec = params.sample_rate();
  63   format_.wFormatTag = WAVE_FORMAT_PCM;
  64   format_.wBitsPerSample = params.bits_per_sample();
  65   format_.nChannels = params.channels();
  66   format_.nBlockAlign = (format_.wBitsPerSample / 8) * format_.nChannels;
  67   format_.nAvgBytesPerSec = format_.nSamplesPerSec * format_.nBlockAlign;
  68   format_.cbSize = 0;
  69
  70   // Size in bytes of each audio frame.
  71   frame_size_ = format_.nBlockAlign;
  72   // Store size of audio packets which we expect to get from the audio
  73   // endpoint device in each capture event.
  74   packet_size_frames_ = params.GetBytesPerBuffer() / format_.nBlockAlign;
  75   packet_size_bytes_ = params.GetBytesPerBuffer();
  76   DVLOG(1) << "Number of bytes per audio frame  : " << frame_size_;
  77   DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_;
  78
  79   // All events are auto-reset events and non-signaled initially.
  80
  81   // Create the event which the audio engine will signal each time
  82   // a buffer becomes ready to be processed by the client.
  83   audio_samples_ready_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
  84   DCHECK(audio_samples_ready_event_.IsValid());
  85
  86   // Create the event which will be set in Stop() when capturing shall stop.
  87   stop_capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
  88   DCHECK(stop_capture_event_.IsValid());
  89
  90   ms_to_frame_count_ = static_cast<double>(params.sample_rate()) / 1000.0;
  91
  92   LARGE_INTEGER performance_frequency;
  93   if (QueryPerformanceFrequency(&performance_frequency)) {
  94     perf_count_to_100ns_units_ =
  95         (10000000.0 / static_cast<double>(performance_frequency.QuadPart));
  96   } else {
  97     DLOG(ERROR) << "High-resolution performance counters are not supported.";
  98   }
  99 }
 100
 101 WASAPIAudioInputStream::~WASAPIAudioInputStream() {
 102   DCHECK(CalledOnValidThread());
 103 }
 104
 105 bool WASAPIAudioInputStream::Open() {
 106   DCHECK(CalledOnValidThread());
 107   // Verify that we are not already opened.
 108   if (opened_)
 109     return false;
 110
 111   // Obtain a reference to the IMMDevice interface of the capturing
 112   // device with the specified unique identifier or role which was
 113   // set at construction.
 114   HRESULT hr = SetCaptureDevice();
 115   if (FAILED(hr))
 116     return false;
 117
 118   // Obtain an IAudioClient interface which enables us to create and initialize
 119   // an audio stream between an audio application and the audio engine.
 120   hr = ActivateCaptureDevice();
 121   if (FAILED(hr))
 122     return false;
 123
 124   // Retrieve the stream format which the audio engine uses for its internal
 125   // processing/mixing of shared-mode streams. This function call is for
 126   // diagnostic purposes only and only in debug mode.
 127 #ifndef NDEBUG
 128   hr = GetAudioEngineStreamFormat();
 129 #endif
 130
 131   // Verify that the selected audio endpoint supports the specified format
 132   // set during construction.
 133   if (!DesiredFormatIsSupported())
 134     return false;
 135
 136   // Initialize the audio stream between the client and the device using
 137   // shared mode and a lowest possible glitch-free latency.
 138   hr = InitializeAudioEngine();
 139
 140   opened_ = SUCCEEDED(hr);
 141   return opened_;
 142 }
 143
 144 void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {
 145   DCHECK(CalledOnValidThread());
 146   DCHECK(callback);
 147   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
 148   if (!opened_)
 149     return;
 150
 151   if (started_)
 152     return;
 153
 154   DCHECK(!sink_);
 155   sink_ = callback;
 156
 157   // Starts periodic AGC microphone measurements if the AGC has been enabled
 158   // using SetAutomaticGainControl().
 159   StartAgc();
 160
 161   // Create and start the thread that will drive the capturing by waiting for
 162   // capture events.
 163   capture_thread_ =
 164       new base::DelegateSimpleThread(this, "wasapi_capture_thread");
 165   capture_thread_->Start();
 166
 167   // Start streaming data between the endpoint buffer and the audio engine.
 168   HRESULT hr = audio_client_->Start();
 169   DLOG_IF(ERROR, FAILED(hr)) << "Failed to start input streaming.";
 170
 171   if (SUCCEEDED(hr) && audio_render_client_for_loopback_.get())
 172     hr = audio_render_client_for_loopback_->Start();
 173
 174   started_ = SUCCEEDED(hr);
 175 }
 176
 177 void WASAPIAudioInputStream::Stop() {
 178   DCHECK(CalledOnValidThread());
 179   DVLOG(1) << "WASAPIAudioInputStream::Stop()";
 180   if (!started_)
 181     return;
 182
 183   // Stops periodic AGC microphone measurements.
 184   StopAgc();
 185
 186   // Shut down the capture thread.
 187   if (stop_capture_event_.IsValid()) {
 188     SetEvent(stop_capture_event_.Get());
 189   }
 190
 191   // Stop the input audio streaming.
 192   HRESULT hr = audio_client_->Stop();
 193   if (FAILED(hr)) {
 194     LOG(ERROR) << "Failed to stop input streaming.";
 195   }
 196
 197   // Wait until the thread completes and perform cleanup.
 198   if (capture_thread_) {
 199     SetEvent(stop_capture_event_.Get());
 200     capture_thread_->Join();
 201     capture_thread_ = NULL;
 202   }
 203
 204   started_ = false;
 205   sink_ = NULL;
 206 }
 207
 208 void WASAPIAudioInputStream::Close() {
 209   DVLOG(1) << "WASAPIAudioInputStream::Close()";
 210   // It is valid to call Close() before calling open or Start().
 211   // It is also valid to call Close() after Start() has been called.
 212   Stop();
 213
 214   // Inform the audio manager that we have been closed. This will cause our
 215   // destruction.
 216   manager_->ReleaseInputStream(this);
 217 }
 218
 219 double WASAPIAudioInputStream::GetMaxVolume() {
 220   // Verify that Open() has been called succesfully, to ensure that an audio
 221   // session exists and that an ISimpleAudioVolume interface has been created.
 222   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
 223   if (!opened_)
 224     return 0.0;
 225
 226   // The effective volume value is always in the range 0.0 to 1.0, hence
 227   // we can return a fixed value (=1.0) here.
 228   return 1.0;
 229 }
 230
 231 void WASAPIAudioInputStream::SetVolume(double volume) {
 232   DVLOG(1) << "SetVolume(volume=" << volume << ")";
 233   DCHECK(CalledOnValidThread());
 234   DCHECK_GE(volume, 0.0);
 235   DCHECK_LE(volume, 1.0);
 236
 237   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
 238   if (!opened_)
 239     return;
 240
 241   // Set a new master volume level. Valid volume levels are in the range
 242   // 0.0 to 1.0. Ignore volume-change events.
 243   HRESULT hr =
 244       simple_audio_volume_->SetMasterVolume(static_cast<float>(volume), NULL);
 245   if (FAILED(hr))
 246     DLOG(WARNING) << "Failed to set new input master volume.";
 247
 248   // Update the AGC volume level based on the last setting above. Note that,
 249   // the volume-level resolution is not infinite and it is therefore not
 250   // possible to assume that the volume provided as input parameter can be
 251   // used directly. Instead, a new query to the audio hardware is required.
 252   // This method does nothing if AGC is disabled.
 253   UpdateAgcVolume();
 254 }
 255
 256 double WASAPIAudioInputStream::GetVolume() {
 257   DCHECK(opened_) << "Open() has not been called successfully";
 258   if (!opened_)
 259     return 0.0;
 260
 261   // Retrieve the current volume level. The value is in the range 0.0 to 1.0.
 262   float level = 0.0f;
 263   HRESULT hr = simple_audio_volume_->GetMasterVolume(&level);
 264   if (FAILED(hr))
 265     DLOG(WARNING) << "Failed to get input master volume.";
 266
 267   return static_cast<double>(level);
 268 }
 269
 270 bool WASAPIAudioInputStream::IsMuted() {
 271   DCHECK(opened_) << "Open() has not been called successfully";
 272   DCHECK(CalledOnValidThread());
 273   if (!opened_)
 274     return false;
 275
 276   // Retrieves the current muting state for the audio session.
 277   BOOL is_muted = FALSE;
 278   HRESULT hr = simple_audio_volume_->GetMute(&is_muted);
 279   if (FAILED(hr))
 280     DLOG(WARNING) << "Failed to get input master volume.";
 281
 282   return is_muted != FALSE;
 283 }
 284
 285 // static
 286 AudioParameters WASAPIAudioInputStream::GetInputStreamParameters(
 287     const std::string& device_id) {
 288   int sample_rate = 48000;
 289   ChannelLayout channel_layout = CHANNEL_LAYOUT_STEREO;
 290
 291   base::win::ScopedCoMem<WAVEFORMATEX> audio_engine_mix_format;
 292   int effects = AudioParameters::NO_EFFECTS;
 293   if (SUCCEEDED(GetMixFormat(device_id, &audio_engine_mix_format, &effects))) {
 294     sample_rate = static_cast<int>(audio_engine_mix_format->nSamplesPerSec);
 295     channel_layout = audio_engine_mix_format->nChannels == 1 ?
 296         CHANNEL_LAYOUT_MONO : CHANNEL_LAYOUT_STEREO;
 297   }
 298
 299   // Use 10ms frame size as default.
 300   int frames_per_buffer = sample_rate / 100;
 301   return AudioParameters(
 302       AudioParameters::AUDIO_PCM_LOW_LATENCY, channel_layout, sample_rate,
 303       16, frames_per_buffer, effects);
 304 }
 305
 306 // static
 307 HRESULT WASAPIAudioInputStream::GetMixFormat(const std::string& device_id,
 308                                              WAVEFORMATEX** device_format,
 309                                              int* effects) {
 310   DCHECK(effects);
 311
 312   // It is assumed that this static method is called from a COM thread, i.e.,
 313   // CoInitializeEx() is not called here to avoid STA/MTA conflicts.
 314   ScopedComPtr<IMMDeviceEnumerator> enumerator;
 315   HRESULT hr = enumerator.CreateInstance(__uuidof(MMDeviceEnumerator), NULL,
 316                                          CLSCTX_INPROC_SERVER);
 317   if (FAILED(hr))
 318     return hr;
 319
 320   ScopedComPtr<IMMDevice> endpoint_device;
 321   if (device_id == AudioManagerBase::kDefaultDeviceId) {
 322     // Retrieve the default capture audio endpoint.
 323     hr = enumerator->GetDefaultAudioEndpoint(eCapture, eConsole,
 324                                              endpoint_device.Receive());
 325   } else if (device_id == AudioManagerBase::kLoopbackInputDeviceId) {
 326     // Get the mix format of the default playback stream.
 327     hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole,
 328                                              endpoint_device.Receive());
 329   } else {
 330     // Retrieve a capture endpoint device that is specified by an endpoint
 331     // device-identification string.
 332     hr = enumerator->GetDevice(base::UTF8ToUTF16(device_id).c_str(),
 333                                endpoint_device.Receive());
 334   }
 335
 336   if (FAILED(hr))
 337     return hr;
 338
 339   *effects =
 340       IsDefaultCommunicationDevice(enumerator.get(), endpoint_device.get())
 341           ? AudioParameters::DUCKING
 342           : AudioParameters::NO_EFFECTS;
 343
 344   ScopedComPtr<IAudioClient> audio_client;
 345   hr = endpoint_device->Activate(__uuidof(IAudioClient),
 346                                  CLSCTX_INPROC_SERVER,
 347                                  NULL,
 348                                  audio_client.ReceiveVoid());
 349   return SUCCEEDED(hr) ? audio_client->GetMixFormat(device_format) : hr;
 350 }
 351
 352 void WASAPIAudioInputStream::Run() {
 353   ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
 354
 355   // Increase the thread priority.
 356   capture_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);
 357
 358   // Enable MMCSS to ensure that this thread receives prioritized access to
 359   // CPU resources.
 360   DWORD task_index = 0;
 361   HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
 362                                                       &task_index);
 363   bool mmcss_is_ok =
 364       (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
 365   if (!mmcss_is_ok) {
 366     // Failed to enable MMCSS on this thread. It is not fatal but can lead
 367     // to reduced QoS at high load.
 368     DWORD err = GetLastError();
 369     LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
 370   }
 371
 372   // Allocate a buffer with a size that enables us to take care of cases like:
 373   // 1) The recorded buffer size is smaller, or does not match exactly with,
 374   //    the selected packet size used in each callback.
 375   // 2) The selected buffer size is larger than the recorded buffer size in
 376   //    each event.
 377   size_t buffer_frame_index = 0;
 378   size_t capture_buffer_size = std::max(
 379       2 * endpoint_buffer_size_frames_ * frame_size_,
 380       2 * packet_size_frames_ * frame_size_);
 381   scoped_ptr<uint8[]> capture_buffer(new uint8[capture_buffer_size]);
 382
 383   LARGE_INTEGER now_count;
 384   bool recording = true;
 385   bool error = false;
 386   double volume = GetVolume();
 387   HANDLE wait_array[2] =
 388       { stop_capture_event_.Get(), audio_samples_ready_event_.Get() };
 389
 390   while (recording && !error) {
 391     HRESULT hr = S_FALSE;
 392
 393     // Wait for a close-down event or a new capture event.
 394     DWORD wait_result = WaitForMultipleObjects(2, wait_array, FALSE, INFINITE);
 395     switch (wait_result) {
 396       case WAIT_FAILED:
 397         error = true;
 398         break;
 399       case WAIT_OBJECT_0 + 0:
 400         // |stop_capture_event_| has been set.
 401         recording = false;
 402         break;
 403       case WAIT_OBJECT_0 + 1:
 404         {
 405           // |audio_samples_ready_event_| has been set.
 406           BYTE* data_ptr = NULL;
 407           UINT32 num_frames_to_read = 0;
 408           DWORD flags = 0;
 409           UINT64 device_position = 0;
 410           UINT64 first_audio_frame_timestamp = 0;
 411
 412           // Retrieve the amount of data in the capture endpoint buffer,
 413           // replace it with silence if required, create callbacks for each
 414           // packet and store non-delivered data for the next event.
 415           hr = audio_capture_client_->GetBuffer(&data_ptr,
 416                                                 &num_frames_to_read,
 417                                                 &flags,
 418                                                 &device_position,
 419                                                 &first_audio_frame_timestamp);
 420           if (FAILED(hr)) {
 421             DLOG(ERROR) << "Failed to get data from the capture buffer";
 422             continue;
 423           }
 424
 425           if (num_frames_to_read != 0) {
 426             size_t pos = buffer_frame_index * frame_size_;
 427             size_t num_bytes = num_frames_to_read * frame_size_;
 428             DCHECK_GE(capture_buffer_size, pos + num_bytes);
 429
 430             if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
 431               // Clear out the local buffer since silence is reported.
 432               memset(&capture_buffer[pos], 0, num_bytes);
 433             } else {
 434               // Copy captured data from audio engine buffer to local buffer.
 435               memcpy(&capture_buffer[pos], data_ptr, num_bytes);
 436             }
 437
 438             buffer_frame_index += num_frames_to_read;
 439           }
 440
 441           hr = audio_capture_client_->ReleaseBuffer(num_frames_to_read);
 442           DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";
 443
 444           // Derive a delay estimate for the captured audio packet.
 445           // The value contains two parts (A+B), where A is the delay of the
 446           // first audio frame in the packet and B is the extra delay
 447           // contained in any stored data. Unit is in audio frames.
 448           QueryPerformanceCounter(&now_count);
 449           double audio_delay_frames =
 450               ((perf_count_to_100ns_units_ * now_count.QuadPart -
 451                 first_audio_frame_timestamp) / 10000.0) * ms_to_frame_count_ +
 452                 buffer_frame_index - num_frames_to_read;
 453
 454           // Get a cached AGC volume level which is updated once every second
 455           // on the audio manager thread. Note that, |volume| is also updated
 456           // each time SetVolume() is called through IPC by the render-side AGC.
 457           GetAgcVolume(&volume);
 458
 459           // Deliver captured data to the registered consumer using a packet
 460           // size which was specified at construction.
 461           uint32 delay_frames = static_cast<uint32>(audio_delay_frames + 0.5);
 462           while (buffer_frame_index >= packet_size_frames_) {
 463             // Copy data to audio bus to match the OnData interface.
 464             uint8* audio_data = reinterpret_cast<uint8*>(capture_buffer.get());
 465             audio_bus_->FromInterleaved(
 466                 audio_data, audio_bus_->frames(), format_.wBitsPerSample / 8);
 467
 468             // Deliver data packet, delay estimation and volume level to
 469             // the user.
 470             sink_->OnData(
 471                 this, audio_bus_.get(), delay_frames * frame_size_, volume);
 472
 473             // Store parts of the recorded data which can't be delivered
 474             // using the current packet size. The stored section will be used
 475             // either in the next while-loop iteration or in the next
 476             // capture event.
 477             memmove(&capture_buffer[0],
 478                     &capture_buffer[packet_size_bytes_],
 479                     (buffer_frame_index - packet_size_frames_) * frame_size_);
 480
 481             buffer_frame_index -= packet_size_frames_;
 482             delay_frames -= packet_size_frames_;
 483           }
 484         }
 485         break;
 486       default:
 487         error = true;
 488         break;
 489     }
 490   }
 491
 492   if (recording && error) {
 493     // TODO(henrika): perhaps it worth improving the cleanup here by e.g.
 494     // stopping the audio client, joining the thread etc.?
 495     NOTREACHED() << "WASAPI capturing failed with error code "
 496                  << GetLastError();
 497   }
 498
 499   // Disable MMCSS.
 500   if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
 501     PLOG(WARNING) << "Failed to disable MMCSS";
 502   }
 503 }
 504
 505 void WASAPIAudioInputStream::HandleError(HRESULT err) {
 506   NOTREACHED() << "Error code: " << err;
 507   if (sink_)
 508     sink_->OnError(this);
 509 }
 510
 511 HRESULT WASAPIAudioInputStream::SetCaptureDevice() {
 512   DCHECK(!endpoint_device_.get());
 513
 514   ScopedComPtr<IMMDeviceEnumerator> enumerator;
 515   HRESULT hr = enumerator.CreateInstance(__uuidof(MMDeviceEnumerator),
 516                                          NULL, CLSCTX_INPROC_SERVER);
 517   if (FAILED(hr))
 518     return hr;
 519
 520   // Retrieve the IMMDevice by using the specified role or the specified
 521   // unique endpoint device-identification string.
 522
 523   if (effects_ & AudioParameters::DUCKING) {
 524     // Ducking has been requested and it is only supported for the default
 525     // communication device.  So, let's open up the communication device and
 526     // see if the ID of that device matches the requested ID.
 527     // We consider a kDefaultDeviceId as well as an explicit device id match,
 528     // to be valid matches.
 529     hr = enumerator->GetDefaultAudioEndpoint(eCapture, eCommunications,
 530                                              endpoint_device_.Receive());
 531     if (endpoint_device_.get() &&
 532         device_id_ != AudioManagerBase::kDefaultDeviceId) {
 533       base::win::ScopedCoMem<WCHAR> communications_id;
 534       endpoint_device_->GetId(&communications_id);
 535       if (device_id_ !=
 536           base::WideToUTF8(static_cast<WCHAR*>(communications_id))) {
 537         DLOG(WARNING) << "Ducking has been requested for a non-default device."
 538                          "Not supported.";
 539         // We can't honor the requested effect flag, so turn it off and
 540         // continue.  We'll check this flag later to see if we've actually
 541         // opened up the communications device, so it's important that it
 542         // reflects the active state.
 543         effects_ &= ~AudioParameters::DUCKING;
 544         endpoint_device_.Release();  // Fall back on code below.
 545       }
 546     }
 547   }
 548
 549   if (!endpoint_device_.get()) {
 550     if (device_id_ == AudioManagerBase::kDefaultDeviceId) {
 551       // Retrieve the default capture audio endpoint for the specified role.
 552       // Note that, in Windows Vista, the MMDevice API supports device roles
 553       // but the system-supplied user interface programs do not.
 554       hr = enumerator->GetDefaultAudioEndpoint(eCapture, eConsole,
 555                                                endpoint_device_.Receive());
 556     } else if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
 557       // Capture the default playback stream.
 558       hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole,
 559                                                endpoint_device_.Receive());
 560     } else {
 561       hr = enumerator->GetDevice(base::UTF8ToUTF16(device_id_).c_str(),
 562                                  endpoint_device_.Receive());
 563     }
 564   }
 565
 566   if (FAILED(hr))
 567     return hr;
 568
 569   // Verify that the audio endpoint device is active, i.e., the audio
 570   // adapter that connects to the endpoint device is present and enabled.
 571   DWORD state = DEVICE_STATE_DISABLED;
 572   hr = endpoint_device_->GetState(&state);
 573   if (FAILED(hr))
 574     return hr;
 575
 576   if (!(state & DEVICE_STATE_ACTIVE)) {
 577     DLOG(ERROR) << "Selected capture device is not active.";
 578     hr = E_ACCESSDENIED;
 579   }
 580
 581   return hr;
 582 }
 583
 584 HRESULT WASAPIAudioInputStream::ActivateCaptureDevice() {
 585   // Creates and activates an IAudioClient COM object given the selected
 586   // capture endpoint device.
 587   HRESULT hr = endpoint_device_->Activate(__uuidof(IAudioClient),
 588                                           CLSCTX_INPROC_SERVER,
 589                                           NULL,
 590                                           audio_client_.ReceiveVoid());
 591   return hr;
 592 }
 593
 594 HRESULT WASAPIAudioInputStream::GetAudioEngineStreamFormat() {
 595   HRESULT hr = S_OK;
 596 #ifndef NDEBUG
 597   // The GetMixFormat() method retrieves the stream format that the
 598   // audio engine uses for its internal processing of shared-mode streams.
 599   // The method always uses a WAVEFORMATEXTENSIBLE structure, instead
 600   // of a stand-alone WAVEFORMATEX structure, to specify the format.
 601   // An WAVEFORMATEXTENSIBLE structure can specify both the mapping of
 602   // channels to speakers and the number of bits of precision in each sample.
 603   base::win::ScopedCoMem<WAVEFORMATEXTENSIBLE> format_ex;
 604   hr = audio_client_->GetMixFormat(
 605       reinterpret_cast<WAVEFORMATEX**>(&format_ex));
 606
 607   // See http://msdn.microsoft.com/en-us/windows/hardware/gg463006#EFH
 608   // for details on the WAVE file format.
 609   WAVEFORMATEX format = format_ex->Format;
 610   DVLOG(2) << "WAVEFORMATEX:";
 611   DVLOG(2) << "  wFormatTags    : 0x" << std::hex << format.wFormatTag;
 612   DVLOG(2) << "  nChannels      : " << format.nChannels;
 613   DVLOG(2) << "  nSamplesPerSec : " << format.nSamplesPerSec;
 614   DVLOG(2) << "  nAvgBytesPerSec: " << format.nAvgBytesPerSec;
 615   DVLOG(2) << "  nBlockAlign    : " << format.nBlockAlign;
 616   DVLOG(2) << "  wBitsPerSample : " << format.wBitsPerSample;
 617   DVLOG(2) << "  cbSize         : " << format.cbSize;
 618
 619   DVLOG(2) << "WAVEFORMATEXTENSIBLE:";
 620   DVLOG(2) << " wValidBitsPerSample: " <<
 621       format_ex->Samples.wValidBitsPerSample;
 622   DVLOG(2) << " dwChannelMask      : 0x" << std::hex <<
 623       format_ex->dwChannelMask;
 624   if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_PCM)
 625     DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_PCM";
 626   else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
 627     DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_IEEE_FLOAT";
 628   else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_WAVEFORMATEX)
 629     DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_WAVEFORMATEX";
 630 #endif
 631   return hr;
 632 }
 633
 634 bool WASAPIAudioInputStream::DesiredFormatIsSupported() {
 635   // An application that uses WASAPI to manage shared-mode streams can rely
 636   // on the audio engine to perform only limited format conversions. The audio
 637   // engine can convert between a standard PCM sample size used by the
 638   // application and the floating-point samples that the engine uses for its
 639   // internal processing. However, the format for an application stream
 640   // typically must have the same number of channels and the same sample
 641   // rate as the stream format used by the device.
 642   // Many audio devices support both PCM and non-PCM stream formats. However,
 643   // the audio engine can mix only PCM streams.
 644   base::win::ScopedCoMem<WAVEFORMATEX> closest_match;
 645   HRESULT hr = audio_client_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,
 646                                                 &format_,
 647                                                 &closest_match);
 648   DLOG_IF(ERROR, hr == S_FALSE) << "Format is not supported "
 649                                 << "but a closest match exists.";
 650   return (hr == S_OK);
 651 }
 652
 653 HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {
 654   DWORD flags;
 655   // Use event-driven mode only fo regular input devices. For loopback the
 656   // EVENTCALLBACK flag is specified when intializing
 657   // |audio_render_client_for_loopback_|.
 658   if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
 659     flags = AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
 660   } else {
 661     flags =
 662       AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
 663   }
 664
 665   // Initialize the audio stream between the client and the device.
 666   // We connect indirectly through the audio engine by using shared mode.
 667   // Note that, |hnsBufferDuration| is set of 0, which ensures that the
 668   // buffer is never smaller than the minimum buffer size needed to ensure
 669   // that glitches do not occur between the periodic processing passes.
 670   // This setting should lead to lowest possible latency.
 671   HRESULT hr = audio_client_->Initialize(
 672       AUDCLNT_SHAREMODE_SHARED,
 673       flags,
 674       0,  // hnsBufferDuration
 675       0,
 676       &format_,
 677       (effects_ & AudioParameters::DUCKING) ? &kCommunicationsSessionId : NULL);
 678
 679   if (FAILED(hr))
 680     return hr;
 681
 682   // Retrieve the length of the endpoint buffer shared between the client
 683   // and the audio engine. The buffer length determines the maximum amount
 684   // of capture data that the audio engine can read from the endpoint buffer
 685   // during a single processing pass.
 686   // A typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.
 687   hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_);
 688   if (FAILED(hr))
 689     return hr;
 690
 691   DVLOG(1) << "endpoint buffer size: " << endpoint_buffer_size_frames_
 692            << " [frames]";
 693
 694 #ifndef NDEBUG
 695   // The period between processing passes by the audio engine is fixed for a
 696   // particular audio endpoint device and represents the smallest processing
 697   // quantum for the audio engine. This period plus the stream latency between
 698   // the buffer and endpoint device represents the minimum possible latency
 699   // that an audio application can achieve.
 700   // TODO(henrika): possibly remove this section when all parts are ready.
 701   REFERENCE_TIME device_period_shared_mode = 0;
 702   REFERENCE_TIME device_period_exclusive_mode = 0;
 703   HRESULT hr_dbg = audio_client_->GetDevicePeriod(
 704       &device_period_shared_mode, &device_period_exclusive_mode);
 705   if (SUCCEEDED(hr_dbg)) {
 706     DVLOG(1) << "device period: "
 707              << static_cast<double>(device_period_shared_mode / 10000.0)
 708              << " [ms]";
 709   }
 710
 711   REFERENCE_TIME latency = 0;
 712   hr_dbg = audio_client_->GetStreamLatency(&latency);
 713   if (SUCCEEDED(hr_dbg)) {
 714     DVLOG(1) << "stream latency: " << static_cast<double>(latency / 10000.0)
 715              << " [ms]";
 716   }
 717 #endif
 718
 719   // Set the event handle that the audio engine will signal each time a buffer
 720   // becomes ready to be processed by the client.
 721   //
 722   // In loopback case the capture device doesn't receive any events, so we
 723   // need to create a separate playback client to get notifications. According
 724   // to MSDN:
 725   //
 726   //   A pull-mode capture client does not receive any events when a stream is
 727   //   initialized with event-driven buffering and is loopback-enabled. To
 728   //   work around this, initialize a render stream in event-driven mode. Each
 729   //   time the client receives an event for the render stream, it must signal
 730   //   the capture client to run the capture thread that reads the next set of
 731   //   samples from the capture endpoint buffer.
 732   //
 733   // http://msdn.microsoft.com/en-us/library/windows/desktop/dd316551(v=vs.85).aspx
 734   if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
 735     hr = endpoint_device_->Activate(
 736         __uuidof(IAudioClient), CLSCTX_INPROC_SERVER, NULL,
 737         audio_render_client_for_loopback_.ReceiveVoid());
 738     if (FAILED(hr))
 739       return hr;
 740
 741     hr = audio_render_client_for_loopback_->Initialize(
 742         AUDCLNT_SHAREMODE_SHARED,
 743         AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST,
 744         0, 0, &format_, NULL);
 745     if (FAILED(hr))
 746       return hr;
 747
 748     hr = audio_render_client_for_loopback_->SetEventHandle(
 749         audio_samples_ready_event_.Get());
 750   } else {
 751     hr = audio_client_->SetEventHandle(audio_samples_ready_event_.Get());
 752   }
 753
 754   if (FAILED(hr))
 755     return hr;
 756
 757   // Get access to the IAudioCaptureClient interface. This interface
 758   // enables us to read input data from the capture endpoint buffer.
 759   hr = audio_client_->GetService(__uuidof(IAudioCaptureClient),
 760                                  audio_capture_client_.ReceiveVoid());
 761   if (FAILED(hr))
 762     return hr;
 763
 764   // Obtain a reference to the ISimpleAudioVolume interface which enables
 765   // us to control the master volume level of an audio session.
 766   hr = audio_client_->GetService(__uuidof(ISimpleAudioVolume),
 767                                  simple_audio_volume_.ReceiveVoid());
 768   return hr;
 769 }
 770
 771 }  // namespace media