media/audio/win/audio_unified_win.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/audio/win/audio_unified_win.h"
   6
   7 #include <Functiondiscoverykeys_devpkey.h>
   8
   9 #include "base/debug/trace_event.h"
  10 #ifndef NDEBUG
  11 #include "base/file_util.h"
  12 #include "base/path_service.h"
  13 #endif
  14 #include "base/time/time.h"
  15 #include "base/win/scoped_com_initializer.h"
  16 #include "media/audio/win/audio_manager_win.h"
  17 #include "media/audio/win/avrt_wrapper_win.h"
  18 #include "media/audio/win/core_audio_util_win.h"
  19
  20 using base::win::ScopedComPtr;
  21 using base::win::ScopedCOMInitializer;
  22 using base::win::ScopedCoMem;
  23
  24 // Smoothing factor in exponential smoothing filter where 0 < alpha < 1.
  25 // Larger values of alpha reduce the level of smoothing.
  26 // See http://en.wikipedia.org/wiki/Exponential_smoothing for details.
  27 static const double kAlpha = 0.1;
  28
  29 // Compute a rate compensation which always attracts us back to a specified
  30 // target level over a period of |kCorrectionTimeSeconds|.
  31 static const double kCorrectionTimeSeconds = 0.1;
  32
  33 #ifndef NDEBUG
  34 // Max number of columns in the output text file |kUnifiedAudioDebugFileName|.
  35 // See LogElementNames enumerator for details on what each column represents.
  36 static const size_t kMaxNumSampleTypes = 4;
  37
  38 static const size_t kMaxNumParams = 2;
  39
  40 // Max number of rows in the output file |kUnifiedAudioDebugFileName|.
  41 // Each row corresponds to one set of sample values for (approximately) the
  42 // same time instant (stored in the first column).
  43 static const size_t kMaxFileSamples = 10000;
  44
  45 // Name of output debug file used for off-line analysis of measurements which
  46 // can be utilized for performance tuning of this class.
  47 static const char kUnifiedAudioDebugFileName[] = "unified_win_debug.txt";
  48
  49 // Name of output debug file used for off-line analysis of measurements.
  50 // This file will contain a list of audio parameters.
  51 static const char kUnifiedAudioParamsFileName[] = "unified_win_params.txt";
  52 #endif
  53
  54 typedef uint32 ChannelConfig;
  55
  56 // Retrieves an integer mask which corresponds to the channel layout the
  57 // audio engine uses for its internal processing/mixing of shared-mode
  58 // streams. This mask indicates which channels are present in the multi-
  59 // channel stream. The least significant bit corresponds with the Front Left
  60 // speaker, the next least significant bit corresponds to the Front Right
  61 // speaker, and so on, continuing in the order defined in KsMedia.h.
  62 // See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx
  63 // for more details.
  64 static ChannelConfig GetChannelConfig(EDataFlow data_flow) {
  65   WAVEFORMATPCMEX format;
  66   return SUCCEEDED(media::CoreAudioUtil::GetDefaultSharedModeMixFormat(
  67                    data_flow, eConsole, &format)) ?
  68                    static_cast<int>(format.dwChannelMask) : 0;
  69 }
  70
  71 // Use the acquired IAudioClock interface to derive a time stamp of the audio
  72 // sample which is currently playing through the speakers.
  73 static double SpeakerStreamPosInMilliseconds(IAudioClock* clock) {
  74   UINT64 device_frequency = 0, position = 0;
  75   if (FAILED(clock->GetFrequency(&device_frequency)) ||
  76       FAILED(clock->GetPosition(&position, NULL))) {
  77     return 0.0;
  78   }
  79   return base::Time::kMillisecondsPerSecond *
  80       (static_cast<double>(position) / device_frequency);
  81 }
  82
  83 // Get a time stamp in milliseconds given number of audio frames in |num_frames|
  84 // using the current sample rate |fs| as scale factor.
  85 // Example: |num_frames| = 960 and |fs| = 48000 => 20 [ms].
  86 static double CurrentStreamPosInMilliseconds(UINT64 num_frames, DWORD fs) {
  87   return base::Time::kMillisecondsPerSecond *
  88       (static_cast<double>(num_frames) / fs);
  89 }
  90
  91 // Convert a timestamp in milliseconds to byte units given the audio format
  92 // in |format|.
  93 // Example: |ts_milliseconds| equals 10, sample rate is 48000 and frame size
  94 // is 4 bytes per audio frame => 480 * 4 = 1920 [bytes].
  95 static int MillisecondsToBytes(double ts_milliseconds,
  96                                const WAVEFORMATPCMEX& format) {
  97   double seconds = ts_milliseconds / base::Time::kMillisecondsPerSecond;
  98   return static_cast<int>(seconds * format.Format.nSamplesPerSec *
  99       format.Format.nBlockAlign + 0.5);
 100 }
 101
 102 // Convert frame count to milliseconds given the audio format in |format|.
 103 static double FrameCountToMilliseconds(int num_frames,
 104                                        const WAVEFORMATPCMEX& format) {
 105   return (base::Time::kMillisecondsPerSecond * num_frames) /
 106       static_cast<double>(format.Format.nSamplesPerSec);
 107 }
 108
 109 namespace media {
 110
 111 WASAPIUnifiedStream::WASAPIUnifiedStream(AudioManagerWin* manager,
 112                                          const AudioParameters& params,
 113                                          const std::string& input_device_id)
 114     : creating_thread_id_(base::PlatformThread::CurrentId()),
 115       manager_(manager),
 116       params_(params),
 117       input_channels_(params.input_channels()),
 118       output_channels_(params.channels()),
 119       input_device_id_(input_device_id),
 120       share_mode_(CoreAudioUtil::GetShareMode()),
 121       opened_(false),
 122       volume_(1.0),
 123       output_buffer_size_frames_(0),
 124       input_buffer_size_frames_(0),
 125       endpoint_render_buffer_size_frames_(0),
 126       endpoint_capture_buffer_size_frames_(0),
 127       num_written_frames_(0),
 128       total_delay_ms_(0.0),
 129       total_delay_bytes_(0),
 130       source_(NULL),
 131       input_callback_received_(false),
 132       io_sample_rate_ratio_(1),
 133       target_fifo_frames_(0),
 134       average_delta_(0),
 135       fifo_rate_compensation_(1),
 136       update_output_delay_(false),
 137       capture_delay_ms_(0) {
 138   TRACE_EVENT0("audio", "WASAPIUnifiedStream::WASAPIUnifiedStream");
 139   VLOG(1) << "WASAPIUnifiedStream::WASAPIUnifiedStream()";
 140   DCHECK(manager_);
 141
 142   VLOG(1) << "Input channels : " << input_channels_;
 143   VLOG(1) << "Output channels: " << output_channels_;
 144   VLOG(1) << "Sample rate    : " << params_.sample_rate();
 145   VLOG(1) << "Buffer size    : " << params.frames_per_buffer();
 146
 147 #ifndef NDEBUG
 148   input_time_stamps_.reset(new int64[kMaxFileSamples]);
 149   num_frames_in_fifo_.reset(new int[kMaxFileSamples]);
 150   resampler_margin_.reset(new int[kMaxFileSamples]);
 151   fifo_rate_comps_.reset(new double[kMaxFileSamples]);
 152   num_elements_.reset(new int[kMaxNumSampleTypes]);
 153   std::fill(num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes, 0);
 154   input_params_.reset(new int[kMaxNumParams]);
 155   output_params_.reset(new int[kMaxNumParams]);
 156 #endif
 157
 158   DVLOG_IF(1, share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE)
 159       << "Core Audio (WASAPI) EXCLUSIVE MODE is enabled.";
 160
 161   // Load the Avrt DLL if not already loaded. Required to support MMCSS.
 162   bool avrt_init = avrt::Initialize();
 163   DCHECK(avrt_init) << "Failed to load the avrt.dll";
 164
 165   // All events are auto-reset events and non-signaled initially.
 166
 167   // Create the event which the audio engine will signal each time a buffer
 168   // has been recorded.
 169   capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
 170
 171   // Create the event which will be set in Stop() when straeming shall stop.
 172   stop_streaming_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
 173 }
 174
 175 WASAPIUnifiedStream::~WASAPIUnifiedStream() {
 176   VLOG(1) << "WASAPIUnifiedStream::~WASAPIUnifiedStream()";
 177 #ifndef NDEBUG
 178   base::FilePath data_file_name;
 179   PathService::Get(base::DIR_EXE, &data_file_name);
 180   data_file_name = data_file_name.AppendASCII(kUnifiedAudioDebugFileName);
 181   data_file_ = file_util::OpenFile(data_file_name, "wt");
 182   DVLOG(1) << ">> Output file " << data_file_name.value() << " is created.";
 183
 184   size_t n = 0;
 185   size_t elements_to_write = *std::min_element(
 186       num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes);
 187   while (n < elements_to_write) {
 188     fprintf(data_file_, "%I64d %d %d %10.9f\n",
 189         input_time_stamps_[n],
 190         num_frames_in_fifo_[n],
 191         resampler_margin_[n],
 192         fifo_rate_comps_[n]);
 193     ++n;
 194   }
 195   file_util::CloseFile(data_file_);
 196
 197   base::FilePath param_file_name;
 198   PathService::Get(base::DIR_EXE, &param_file_name);
 199   param_file_name = param_file_name.AppendASCII(kUnifiedAudioParamsFileName);
 200   param_file_ = file_util::OpenFile(param_file_name, "wt");
 201   DVLOG(1) << ">> Output file " << param_file_name.value() << " is created.";
 202   fprintf(param_file_, "%d %d\n", input_params_[0], input_params_[1]);
 203   fprintf(param_file_, "%d %d\n", output_params_[0], output_params_[1]);
 204   file_util::CloseFile(param_file_);
 205 #endif
 206 }
 207
 208 bool WASAPIUnifiedStream::Open() {
 209   TRACE_EVENT0("audio", "WASAPIUnifiedStream::Open");
 210   DVLOG(1) << "WASAPIUnifiedStream::Open()";
 211   DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
 212   if (opened_)
 213     return true;
 214
 215   AudioParameters hw_output_params;
 216   HRESULT hr = CoreAudioUtil::GetPreferredAudioParameters(
 217       eRender, eConsole, &hw_output_params);
 218   if (FAILED(hr)) {
 219     LOG(ERROR) << "Failed to get preferred output audio parameters.";
 220     return false;
 221   }
 222
 223   AudioParameters hw_input_params;
 224   if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) {
 225     // Query native parameters for the default capture device.
 226     hr = CoreAudioUtil::GetPreferredAudioParameters(
 227         eCapture, eConsole, &hw_input_params);
 228   } else {
 229     // Query native parameters for the capture device given by
 230     // |input_device_id_|.
 231     hr = CoreAudioUtil::GetPreferredAudioParameters(
 232         input_device_id_, &hw_input_params);
 233   }
 234   if (FAILED(hr)) {
 235     LOG(ERROR) << "Failed to get preferred input audio parameters.";
 236     return false;
 237   }
 238
 239   // It is currently only possible to open up the output audio device using
 240   // the native number of channels.
 241   if (output_channels_ != hw_output_params.channels()) {
 242     LOG(ERROR) << "Audio device does not support requested output channels.";
 243     return false;
 244   }
 245
 246   // It is currently only possible to open up the input audio device using
 247   // the native number of channels. If the client asks for a higher channel
 248   // count, we will do channel upmixing in this class. The most typical
 249   // example is that the client provides stereo but the hardware can only be
 250   // opened in mono mode. We will do mono to stereo conversion in this case.
 251   if (input_channels_ < hw_input_params.channels()) {
 252     LOG(ERROR) << "Audio device does not support requested input channels.";
 253     return false;
 254   } else if (input_channels_ > hw_input_params.channels()) {
 255     ChannelLayout input_layout =
 256         GuessChannelLayout(hw_input_params.channels());
 257     ChannelLayout output_layout = GuessChannelLayout(input_channels_);
 258     channel_mixer_.reset(new ChannelMixer(input_layout, output_layout));
 259     DVLOG(1) << "Remixing input channel layout from " << input_layout
 260              << " to " << output_layout << "; from "
 261              << hw_input_params.channels() << " channels to "
 262              << input_channels_;
 263   }
 264
 265   if (hw_output_params.sample_rate() != params_.sample_rate()) {
 266     LOG(ERROR) << "Requested sample-rate: " << params_.sample_rate()
 267                << " must match the hardware sample-rate: "
 268                << hw_output_params.sample_rate();
 269     return false;
 270   }
 271
 272   if (hw_output_params.frames_per_buffer() != params_.frames_per_buffer()) {
 273     LOG(ERROR) << "Requested buffer size: " << params_.frames_per_buffer()
 274                << " must match the hardware buffer size: "
 275                << hw_output_params.frames_per_buffer();
 276     return false;
 277   }
 278
 279   // Set up WAVEFORMATPCMEX structures for input and output given the specified
 280   // audio parameters.
 281   SetIOFormats(hw_input_params, params_);
 282
 283   // Create the input and output busses.
 284   input_bus_ = AudioBus::Create(
 285       hw_input_params.channels(), input_buffer_size_frames_);
 286   output_bus_ = AudioBus::Create(params_);
 287
 288   // One extra bus is needed for the input channel mixing case.
 289   if (channel_mixer_) {
 290     DCHECK_LT(hw_input_params.channels(), input_channels_);
 291     // The size of the |channel_bus_| must be the same as the size of the
 292     // output bus to ensure that the channel manager can deal with both
 293     // resampled and non-resampled data as input.
 294     channel_bus_ = AudioBus::Create(
 295         input_channels_, params_.frames_per_buffer());
 296   }
 297
 298   // Check if FIFO and resampling is required to match the input rate to the
 299   // output rate. If so, a special thread loop, optimized for this case, will
 300   // be used. This mode is also called varispeed mode.
 301   // Note that we can also use this mode when input and output rates are the
 302   // same but native buffer sizes differ (can happen if two different audio
 303   // devices are used). For this case, the resampler uses a target ratio of
 304   // 1.0 but SetRatio is called to compensate for clock-drift. The FIFO is
 305   // required to compensate for the difference in buffer sizes.
 306   // TODO(henrika): we could perhaps improve the performance for the second
 307   // case here by only using the FIFO and avoid resampling. Not sure how much
 308   // that would give and we risk not compensation for clock drift.
 309   if (hw_input_params.sample_rate() != params_.sample_rate() ||
 310       hw_input_params.frames_per_buffer() != params_.frames_per_buffer()) {
 311     DoVarispeedInitialization(hw_input_params, params_);
 312   }
 313
 314   // Render side (event driven only in varispeed mode):
 315
 316   ScopedComPtr<IAudioClient> audio_output_client =
 317       CoreAudioUtil::CreateDefaultClient(eRender, eConsole);
 318   if (!audio_output_client)
 319     return false;
 320
 321   if (!CoreAudioUtil::IsFormatSupported(audio_output_client,
 322                                         share_mode_,
 323                                         &output_format_)) {
 324     return false;
 325   }
 326
 327   if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
 328     // The |render_event_| will be NULL unless varispeed mode is utilized.
 329     hr = CoreAudioUtil::SharedModeInitialize(
 330         audio_output_client, &output_format_, render_event_.Get(),
 331         &endpoint_render_buffer_size_frames_);
 332   } else {
 333     // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
 334   }
 335   if (FAILED(hr))
 336     return false;
 337
 338   ScopedComPtr<IAudioRenderClient> audio_render_client =
 339       CoreAudioUtil::CreateRenderClient(audio_output_client);
 340   if (!audio_render_client)
 341     return false;
 342
 343   // Capture side (always event driven but format depends on varispeed or not):
 344
 345   ScopedComPtr<IAudioClient> audio_input_client;
 346   if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) {
 347     audio_input_client = CoreAudioUtil::CreateDefaultClient(eCapture, eConsole);
 348   } else {
 349     ScopedComPtr<IMMDevice> audio_input_device(
 350       CoreAudioUtil::CreateDevice(input_device_id_));
 351     audio_input_client = CoreAudioUtil::CreateClient(audio_input_device);
 352   }
 353   if (!audio_input_client)
 354     return false;
 355
 356   if (!CoreAudioUtil::IsFormatSupported(audio_input_client,
 357                                         share_mode_,
 358                                         &input_format_)) {
 359     return false;
 360   }
 361
 362   if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
 363     // Include valid event handle for event-driven initialization.
 364     // The input side is always event driven independent of if varispeed is
 365     // used or not.
 366     hr = CoreAudioUtil::SharedModeInitialize(
 367         audio_input_client, &input_format_, capture_event_.Get(),
 368         &endpoint_capture_buffer_size_frames_);
 369   } else {
 370     // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
 371   }
 372   if (FAILED(hr))
 373     return false;
 374
 375   ScopedComPtr<IAudioCaptureClient> audio_capture_client =
 376       CoreAudioUtil::CreateCaptureClient(audio_input_client);
 377   if (!audio_capture_client)
 378     return false;
 379
 380   // Varispeed mode requires additional preparations.
 381   if (VarispeedMode())
 382     ResetVarispeed();
 383
 384   // Store all valid COM interfaces.
 385   audio_output_client_ = audio_output_client;
 386   audio_render_client_ = audio_render_client;
 387   audio_input_client_ = audio_input_client;
 388   audio_capture_client_ = audio_capture_client;
 389
 390   opened_ = true;
 391   return SUCCEEDED(hr);
 392 }
 393
 394 void WASAPIUnifiedStream::Start(AudioSourceCallback* callback) {
 395   TRACE_EVENT0("audio", "WASAPIUnifiedStream::Start");
 396   DVLOG(1) << "WASAPIUnifiedStream::Start()";
 397   DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
 398   CHECK(callback);
 399   CHECK(opened_);
 400
 401   if (audio_io_thread_) {
 402     CHECK_EQ(callback, source_);
 403     return;
 404   }
 405
 406   source_ = callback;
 407
 408   if (VarispeedMode()) {
 409     ResetVarispeed();
 410     fifo_rate_compensation_ = 1.0;
 411     average_delta_ = 0.0;
 412     input_callback_received_ = false;
 413     update_output_delay_ = false;
 414   }
 415
 416   // Create and start the thread that will listen for capture events.
 417   // We will also listen on render events on the same thread if varispeed
 418   // mode is utilized.
 419   audio_io_thread_.reset(
 420       new base::DelegateSimpleThread(this, "wasapi_io_thread"));
 421   audio_io_thread_->Start();
 422   if (!audio_io_thread_->HasBeenStarted()) {
 423     DLOG(ERROR) << "Failed to start WASAPI IO thread.";
 424     return;
 425   }
 426
 427   // Start input streaming data between the endpoint buffer and the audio
 428   // engine.
 429   HRESULT hr = audio_input_client_->Start();
 430   if (FAILED(hr)) {
 431     StopAndJoinThread(hr);
 432     return;
 433   }
 434
 435   // Ensure that the endpoint buffer is prepared with silence.
 436   if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
 437     if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
 438              audio_output_client_, audio_render_client_)) {
 439       DLOG(WARNING) << "Failed to prepare endpoint buffers with silence.";
 440       return;
 441     }
 442   }
 443   num_written_frames_ = endpoint_render_buffer_size_frames_;
 444
 445   // Start output streaming data between the endpoint buffer and the audio
 446   // engine.
 447   hr = audio_output_client_->Start();
 448   if (FAILED(hr)) {
 449     StopAndJoinThread(hr);
 450     return;
 451   }
 452 }
 453
 454 void WASAPIUnifiedStream::Stop() {
 455   TRACE_EVENT0("audio", "WASAPIUnifiedStream::Stop");
 456   DVLOG(1) << "WASAPIUnifiedStream::Stop()";
 457   DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
 458   if (!audio_io_thread_)
 459     return;
 460
 461   // Stop input audio streaming.
 462   HRESULT hr = audio_input_client_->Stop();
 463   if (FAILED(hr)) {
 464     DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
 465       << "Failed to stop input streaming: " << std::hex << hr;
 466   }
 467
 468   // Stop output audio streaming.
 469   hr = audio_output_client_->Stop();
 470   if (FAILED(hr)) {
 471     DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
 472         << "Failed to stop output streaming: " << std::hex << hr;
 473   }
 474
 475   // Wait until the thread completes and perform cleanup.
 476   SetEvent(stop_streaming_event_.Get());
 477   audio_io_thread_->Join();
 478   audio_io_thread_.reset();
 479
 480   // Ensure that we don't quit the main thread loop immediately next
 481   // time Start() is called.
 482   ResetEvent(stop_streaming_event_.Get());
 483
 484   // Clear source callback, it'll be set again on the next Start() call.
 485   source_ = NULL;
 486
 487   // Flush all pending data and reset the audio clock stream position to 0.
 488   hr = audio_output_client_->Reset();
 489   if (FAILED(hr)) {
 490     DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
 491         << "Failed to reset output streaming: " << std::hex << hr;
 492   }
 493
 494   audio_input_client_->Reset();
 495   if (FAILED(hr)) {
 496     DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
 497         << "Failed to reset input streaming: " << std::hex << hr;
 498   }
 499
 500   // Extra safety check to ensure that the buffers are cleared.
 501   // If the buffers are not cleared correctly, the next call to Start()
 502   // would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer().
 503   // TODO(henrika): this check is is only needed for shared-mode streams.
 504   UINT32 num_queued_frames = 0;
 505   audio_output_client_->GetCurrentPadding(&num_queued_frames);
 506   DCHECK_EQ(0u, num_queued_frames);
 507 }
 508
 509 void WASAPIUnifiedStream::Close() {
 510   TRACE_EVENT0("audio", "WASAPIUnifiedStream::Close");
 511   DVLOG(1) << "WASAPIUnifiedStream::Close()";
 512   DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
 513
 514   // It is valid to call Close() before calling open or Start().
 515   // It is also valid to call Close() after Start() has been called.
 516   Stop();
 517
 518   // Inform the audio manager that we have been closed. This will cause our
 519   // destruction.
 520   manager_->ReleaseOutputStream(this);
 521 }
 522
 523 void WASAPIUnifiedStream::SetVolume(double volume) {
 524   DVLOG(1) << "SetVolume(volume=" << volume << ")";
 525   if (volume < 0 || volume > 1)
 526     return;
 527   volume_ = volume;
 528 }
 529
 530 void WASAPIUnifiedStream::GetVolume(double* volume) {
 531   DVLOG(1) << "GetVolume()";
 532   *volume = static_cast<double>(volume_);
 533 }
 534
 535
 536 void WASAPIUnifiedStream::ProvideInput(int frame_delay, AudioBus* audio_bus) {
 537   // TODO(henrika): utilize frame_delay?
 538   // A non-zero framed delay means multiple callbacks were necessary to
 539   // fulfill the requested number of frames.
 540   if (frame_delay > 0)
 541     DVLOG(3) << "frame_delay: " << frame_delay;
 542
 543 #ifndef NDEBUG
 544   resampler_margin_[num_elements_[RESAMPLER_MARGIN]] =
 545     fifo_->frames() - audio_bus->frames();
 546   num_elements_[RESAMPLER_MARGIN]++;
 547 #endif
 548
 549   if (fifo_->frames() < audio_bus->frames()) {
 550     DVLOG(ERROR) << "Not enough data in the FIFO ("
 551                  << fifo_->frames() << " < " << audio_bus->frames() << ")";
 552     audio_bus->Zero();
 553     return;
 554   }
 555
 556   fifo_->Consume(audio_bus, 0, audio_bus->frames());
 557 }
 558
 559 void WASAPIUnifiedStream::SetIOFormats(const AudioParameters& input_params,
 560                                        const AudioParameters& output_params) {
 561   for (int n = 0; n < 2; ++n) {
 562     const AudioParameters& params = (n == 0) ? input_params : output_params;
 563     WAVEFORMATPCMEX* xformat = (n == 0) ? &input_format_ : &output_format_;
 564     WAVEFORMATEX* format = &xformat->Format;
 565
 566     // Begin with the WAVEFORMATEX structure that specifies the basic format.
 567     format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
 568     format->nChannels =  params.channels();
 569     format->nSamplesPerSec = params.sample_rate();
 570     format->wBitsPerSample = params.bits_per_sample();
 571     format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
 572     format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign;
 573     format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
 574
 575     // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE.
 576     // Note that we always open up using the native channel layout.
 577     (*xformat).Samples.wValidBitsPerSample = format->wBitsPerSample;
 578     (*xformat).dwChannelMask = (n == 0) ?
 579         GetChannelConfig(eCapture) : GetChannelConfig(eRender);
 580     (*xformat).SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
 581   }
 582
 583   input_buffer_size_frames_ = input_params.frames_per_buffer();
 584   output_buffer_size_frames_ = output_params.frames_per_buffer();
 585   VLOG(1) << "#audio frames per input buffer : " << input_buffer_size_frames_;
 586   VLOG(1) << "#audio frames per output buffer: " << output_buffer_size_frames_;
 587
 588 #ifndef NDEBUG
 589   input_params_[0] = input_format_.Format.nSamplesPerSec;
 590   input_params_[1] = input_buffer_size_frames_;
 591   output_params_[0] = output_format_.Format.nSamplesPerSec;
 592   output_params_[1] = output_buffer_size_frames_;
 593 #endif
 594 }
 595
 596 void WASAPIUnifiedStream::DoVarispeedInitialization(
 597     const AudioParameters& input_params, const AudioParameters& output_params) {
 598   DVLOG(1) << "WASAPIUnifiedStream::DoVarispeedInitialization()";
 599
 600   // A FIFO is required in this mode for input to output buffering.
 601   // Note that it will add some latency.
 602   fifo_.reset(new AudioFifo(input_params.channels(), kFifoSize));
 603   VLOG(1) << "Using FIFO of size " << fifo_->max_frames()
 604           << " (#channels=" << input_params.channels() << ")";
 605
 606   // Create the multi channel resampler using the initial sample rate ratio.
 607   // We will call MultiChannelResampler::SetRatio() during runtime to
 608   // allow arbitrary combinations of input and output devices running off
 609   // different clocks and using different drivers, with potentially
 610   // differing sample-rates. Note that the requested block size is given by
 611   // the native input buffer size |input_buffer_size_frames_|.
 612   io_sample_rate_ratio_ = input_params.sample_rate() /
 613       static_cast<double>(output_params.sample_rate());
 614   DVLOG(2) << "io_sample_rate_ratio: " << io_sample_rate_ratio_;
 615   resampler_.reset(new MultiChannelResampler(
 616       input_params.channels(), io_sample_rate_ratio_, input_buffer_size_frames_,
 617       base::Bind(&WASAPIUnifiedStream::ProvideInput, base::Unretained(this))));
 618   VLOG(1) << "Resampling from " << input_params.sample_rate() << " to "
 619           << output_params.sample_rate();
 620
 621   // The optimal number of frames we'd like to keep in the FIFO at all times.
 622   // The actual size will vary but the goal is to ensure that the average size
 623   // is given by this value.
 624   target_fifo_frames_ = kTargetFifoSafetyFactor * input_buffer_size_frames_;
 625   VLOG(1) << "Target FIFO size: " <<  target_fifo_frames_;
 626
 627   // Create the event which the audio engine will signal each time it
 628   // wants an audio buffer to render.
 629   render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
 630
 631   // Allocate memory for temporary audio bus used to store resampled input
 632   // audio.
 633   resampled_bus_ = AudioBus::Create(
 634       input_params.channels(), output_buffer_size_frames_);
 635
 636   // Buffer initial silence corresponding to target I/O buffering.
 637   ResetVarispeed();
 638 }
 639
 640 void WASAPIUnifiedStream::ResetVarispeed() {
 641   DCHECK(VarispeedMode());
 642
 643   // Buffer initial silence corresponding to target I/O buffering.
 644   fifo_->Clear();
 645   scoped_ptr<AudioBus> silence =
 646       AudioBus::Create(input_format_.Format.nChannels,
 647                        target_fifo_frames_);
 648   silence->Zero();
 649   fifo_->Push(silence.get());
 650   resampler_->Flush();
 651 }
 652
 653 void WASAPIUnifiedStream::Run() {
 654   ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
 655
 656   // Increase the thread priority.
 657   audio_io_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);
 658
 659   // Enable MMCSS to ensure that this thread receives prioritized access to
 660   // CPU resources.
 661   // TODO(henrika): investigate if it is possible to include these additional
 662   // settings in SetThreadPriority() as well.
 663   DWORD task_index = 0;
 664   HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
 665                                                       &task_index);
 666   bool mmcss_is_ok =
 667       (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
 668   if (!mmcss_is_ok) {
 669     // Failed to enable MMCSS on this thread. It is not fatal but can lead
 670     // to reduced QoS at high load.
 671     DWORD err = GetLastError();
 672     LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
 673   }
 674
 675   // The IAudioClock interface enables us to monitor a stream's data
 676   // rate and the current position in the stream. Allocate it before we
 677   // start spinning.
 678   ScopedComPtr<IAudioClock> audio_output_clock;
 679   HRESULT hr = audio_output_client_->GetService(
 680       __uuidof(IAudioClock), audio_output_clock.ReceiveVoid());
 681   LOG_IF(WARNING, FAILED(hr)) << "Failed to create IAudioClock: "
 682                               << std::hex << hr;
 683
 684   bool streaming = true;
 685   bool error = false;
 686
 687   HANDLE wait_array[3];
 688   size_t num_handles = 0;
 689   wait_array[num_handles++] = stop_streaming_event_;
 690   wait_array[num_handles++] = capture_event_;
 691   if (render_event_) {
 692     // One extra event handle is needed in varispeed mode.
 693     wait_array[num_handles++] = render_event_;
 694   }
 695
 696   // Keep streaming audio until stop event is signaled.
 697   // Capture events are always used but render events are only active in
 698   // varispeed mode.
 699   while (streaming && !error) {
 700     // Wait for a close-down event, or a new capture event.
 701     DWORD wait_result = WaitForMultipleObjects(num_handles,
 702                                                wait_array,
 703                                                FALSE,
 704                                                INFINITE);
 705     switch (wait_result) {
 706       case WAIT_OBJECT_0 + 0:
 707         // |stop_streaming_event_| has been set.
 708         streaming = false;
 709         break;
 710       case WAIT_OBJECT_0 + 1:
 711         // |capture_event_| has been set
 712         if (VarispeedMode()) {
 713           ProcessInputAudio();
 714         } else {
 715           ProcessInputAudio();
 716           ProcessOutputAudio(audio_output_clock);
 717         }
 718         break;
 719       case WAIT_OBJECT_0 + 2:
 720         DCHECK(VarispeedMode());
 721         // |render_event_| has been set
 722         ProcessOutputAudio(audio_output_clock);
 723         break;
 724       default:
 725         error = true;
 726         break;
 727     }
 728   }
 729
 730   if (streaming && error) {
 731     // Stop audio streaming since something has gone wrong in our main thread
 732     // loop. Note that, we are still in a "started" state, hence a Stop() call
 733     // is required to join the thread properly.
 734     audio_input_client_->Stop();
 735     audio_output_client_->Stop();
 736     PLOG(ERROR) << "WASAPI streaming failed.";
 737   }
 738
 739   // Disable MMCSS.
 740   if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
 741     PLOG(WARNING) << "Failed to disable MMCSS";
 742   }
 743 }
 744
 745 void WASAPIUnifiedStream::ProcessInputAudio() {
 746   TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessInputAudio");
 747
 748   BYTE* data_ptr = NULL;
 749   UINT32 num_captured_frames = 0;
 750   DWORD flags = 0;
 751   UINT64 device_position = 0;
 752   UINT64 capture_time_stamp = 0;
 753
 754   const int bytes_per_sample = input_format_.Format.wBitsPerSample >> 3;
 755
 756   base::TimeTicks now_tick = base::TimeTicks::HighResNow();
 757
 758 #ifndef NDEBUG
 759   if (VarispeedMode()) {
 760     input_time_stamps_[num_elements_[INPUT_TIME_STAMP]] =
 761         now_tick.ToInternalValue();
 762     num_elements_[INPUT_TIME_STAMP]++;
 763   }
 764 #endif
 765
 766   // Retrieve the amount of data in the capture endpoint buffer.
 767   // |endpoint_capture_time_stamp| is the value of the performance
 768   // counter at the time that the audio endpoint device recorded
 769   // the device position of the first audio frame in the data packet.
 770   HRESULT hr = audio_capture_client_->GetBuffer(&data_ptr,
 771                                                 &num_captured_frames,
 772                                                 &flags,
 773                                                 &device_position,
 774                                                 &capture_time_stamp);
 775   if (FAILED(hr)) {
 776     DLOG(ERROR) << "Failed to get data from the capture buffer";
 777     return;
 778   }
 779
 780   if (hr == AUDCLNT_S_BUFFER_EMPTY) {
 781     // The return coded is a success code but a new packet is *not* available
 782     // and none of the output parameters in the GetBuffer() call contains valid
 783     // values. Best we can do is to deliver silence and avoid setting
 784     // |input_callback_received_| since this only seems to happen for the
 785     // initial event(s) on some devices.
 786     input_bus_->Zero();
 787   } else {
 788     // Valid data has been recorded and it is now OK to set the flag which
 789     // informs the render side that capturing has started.
 790     input_callback_received_ = true;
 791   }
 792
 793   if (num_captured_frames != 0) {
 794     if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
 795       // Clear out the capture buffer since silence is reported.
 796       input_bus_->Zero();
 797     } else {
 798       // Store captured data in an audio bus after de-interleaving
 799       // the data to match the audio bus structure.
 800       input_bus_->FromInterleaved(
 801           data_ptr, num_captured_frames, bytes_per_sample);
 802     }
 803   }
 804
 805   hr = audio_capture_client_->ReleaseBuffer(num_captured_frames);
 806   DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";
 807
 808   // Buffer input into FIFO if varispeed mode is used. The render event
 809   // will drive resampling of this data to match the output side.
 810   if (VarispeedMode()) {
 811     int available_frames = fifo_->max_frames() - fifo_->frames();
 812     if (input_bus_->frames() <= available_frames) {
 813       fifo_->Push(input_bus_.get());
 814     }
 815 #ifndef NDEBUG
 816     num_frames_in_fifo_[num_elements_[NUM_FRAMES_IN_FIFO]] =
 817         fifo_->frames();
 818     num_elements_[NUM_FRAMES_IN_FIFO]++;
 819 #endif
 820   }
 821
 822   // Save resource by not asking for new delay estimates each time.
 823   // These estimates are fairly stable and it is perfectly safe to only
 824   // sample at a rate of ~1Hz.
 825   // TODO(henrika): we might have to increase the update rate in varispeed
 826   // mode since the delay variations are higher in this mode.
 827   if ((now_tick - last_delay_sample_time_).InMilliseconds() >
 828       kTimeDiffInMillisecondsBetweenDelayMeasurements &&
 829       input_callback_received_) {
 830     // Calculate the estimated capture delay, i.e., the latency between
 831     // the recording time and the time we when we are notified about
 832     // the recorded data. Note that the capture time stamp is given in
 833     // 100-nanosecond (0.1 microseconds) units.
 834     base::TimeDelta diff =
 835       now_tick - base::TimeTicks::FromInternalValue(0.1 * capture_time_stamp);
 836     capture_delay_ms_ = diff.InMillisecondsF();
 837
 838     last_delay_sample_time_ = now_tick;
 839     update_output_delay_ = true;
 840   }
 841 }
 842
 843 void WASAPIUnifiedStream::ProcessOutputAudio(IAudioClock* audio_output_clock) {
 844   TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessOutputAudio");
 845
 846   if (!input_callback_received_) {
 847     if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
 848       if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
 849               audio_output_client_, audio_render_client_))
 850         DLOG(WARNING) << "Failed to prepare endpoint buffers with silence.";
 851     }
 852     return;
 853   }
 854
 855   // Rate adjusted resampling is required in varispeed mode. It means that
 856   // recorded audio samples will be read from the FIFO, resampled to match the
 857   // output sample-rate and then stored in |resampled_bus_|.
 858   if (VarispeedMode()) {
 859     // Calculate a varispeed rate scalar factor to compensate for drift between
 860     // input and output.  We use the actual number of frames still in the FIFO
 861     // compared with the ideal value of |target_fifo_frames_|.
 862     int delta = fifo_->frames() - target_fifo_frames_;
 863
 864     // Average |delta| because it can jitter back/forth quite frequently
 865     // by +/- the hardware buffer-size *if* the input and output callbacks are
 866     // happening at almost exactly the same time.  Also, if the input and output
 867     // sample-rates are different then |delta| will jitter quite a bit due to
 868     // the rate conversion happening in the varispeed, plus the jittering of
 869     // the callbacks.  The average value is what's important here.
 870     // We use an exponential smoothing filter to reduce the variations.
 871     average_delta_ += kAlpha * (delta - average_delta_);
 872
 873     // Compute a rate compensation which always attracts us back to the
 874     // |target_fifo_frames_| over a period of kCorrectionTimeSeconds.
 875     double correction_time_frames =
 876         kCorrectionTimeSeconds * output_format_.Format.nSamplesPerSec;
 877     fifo_rate_compensation_ =
 878         (correction_time_frames + average_delta_) / correction_time_frames;
 879
 880 #ifndef NDEBUG
 881     fifo_rate_comps_[num_elements_[RATE_COMPENSATION]] =
 882         fifo_rate_compensation_;
 883     num_elements_[RATE_COMPENSATION]++;
 884 #endif
 885
 886     // Adjust for FIFO drift.
 887     const double new_ratio = io_sample_rate_ratio_ * fifo_rate_compensation_;
 888     resampler_->SetRatio(new_ratio);
 889     // Get resampled input audio from FIFO where the size is given by the
 890     // output side.
 891     resampler_->Resample(resampled_bus_->frames(), resampled_bus_.get());
 892   }
 893
 894   // Derive a new total delay estimate if the capture side has set the
 895   // |update_output_delay_| flag.
 896   if (update_output_delay_) {
 897     // Calculate the estimated render delay, i.e., the time difference
 898     // between the time when data is added to the endpoint buffer and
 899     // when the data is played out on the actual speaker.
 900     const double stream_pos = CurrentStreamPosInMilliseconds(
 901         num_written_frames_ + output_buffer_size_frames_,
 902         output_format_.Format.nSamplesPerSec);
 903     const double speaker_pos =
 904         SpeakerStreamPosInMilliseconds(audio_output_clock);
 905     const double render_delay_ms = stream_pos - speaker_pos;
 906     const double fifo_delay_ms = VarispeedMode() ?
 907       FrameCountToMilliseconds(target_fifo_frames_, input_format_) : 0;
 908
 909     // Derive the total delay, i.e., the sum of the input and output
 910     // delays. Also convert the value into byte units. An extra FIFO delay
 911     // is added for varispeed usage cases.
 912     total_delay_ms_ = VarispeedMode() ?
 913       capture_delay_ms_ + render_delay_ms + fifo_delay_ms :
 914       capture_delay_ms_ + render_delay_ms;
 915     DVLOG(2) << "total_delay_ms   : " << total_delay_ms_;
 916     DVLOG(3) << " capture_delay_ms: " << capture_delay_ms_;
 917     DVLOG(3) << " render_delay_ms : " << render_delay_ms;
 918     DVLOG(3) << " fifo_delay_ms   : " << fifo_delay_ms;
 919     total_delay_bytes_ = MillisecondsToBytes(total_delay_ms_, output_format_);
 920
 921     // Wait for new signal from the capture side.
 922     update_output_delay_ = false;
 923   }
 924
 925   // Select source depending on if varispeed is utilized or not.
 926   // Also, the source might be the output of a channel mixer if channel mixing
 927   // is required to match the native input channels to the number of input
 928   // channels used by the client (given by |input_channels_| in this case).
 929   AudioBus* input_bus = VarispeedMode() ?
 930       resampled_bus_.get() : input_bus_.get();
 931   if (channel_mixer_) {
 932     DCHECK_EQ(input_bus->frames(), channel_bus_->frames());
 933     // Most common case is 1->2 channel upmixing.
 934     channel_mixer_->Transform(input_bus, channel_bus_.get());
 935     // Use the output from the channel mixer as new input bus.
 936     input_bus = channel_bus_.get();
 937   }
 938
 939   // Prepare for rendering by calling OnMoreIOData().
 940   int frames_filled = source_->OnMoreIOData(
 941       input_bus,
 942       output_bus_.get(),
 943       AudioBuffersState(0, total_delay_bytes_));
 944   DCHECK_EQ(frames_filled, output_bus_->frames());
 945
 946   // Keep track of number of rendered frames since we need it for
 947   // our delay calculations.
 948   num_written_frames_ += frames_filled;
 949
 950   // Derive the the amount of available space in the endpoint buffer.
 951   // Avoid render attempt if there is no room for a captured packet.
 952   UINT32 num_queued_frames = 0;
 953   audio_output_client_->GetCurrentPadding(&num_queued_frames);
 954   if (endpoint_render_buffer_size_frames_ - num_queued_frames <
 955       output_buffer_size_frames_)
 956     return;
 957
 958   // Grab all available space in the rendering endpoint buffer
 959   // into which the client can write a data packet.
 960   uint8* audio_data = NULL;
 961   HRESULT hr = audio_render_client_->GetBuffer(output_buffer_size_frames_,
 962                                                &audio_data);
 963   if (FAILED(hr)) {
 964     DLOG(ERROR) << "Failed to access render buffer";
 965     return;
 966   }
 967
 968   const int bytes_per_sample = output_format_.Format.wBitsPerSample >> 3;
 969
 970   // Convert the audio bus content to interleaved integer data using
 971   // |audio_data| as destination.
 972   output_bus_->Scale(volume_);
 973   output_bus_->ToInterleaved(
 974       output_buffer_size_frames_, bytes_per_sample, audio_data);
 975
 976   // Release the buffer space acquired in the GetBuffer() call.
 977   audio_render_client_->ReleaseBuffer(output_buffer_size_frames_, 0);
 978   DLOG_IF(ERROR, FAILED(hr)) << "Failed to release render buffer";
 979
 980   return;
 981 }
 982
 983 void WASAPIUnifiedStream::HandleError(HRESULT err) {
 984   CHECK((started() && GetCurrentThreadId() == audio_io_thread_->tid()) ||
 985         (!started() && GetCurrentThreadId() == creating_thread_id_));
 986   NOTREACHED() << "Error code: " << std::hex << err;
 987   if (source_)
 988     source_->OnError(this);
 989 }
 990
 991 void WASAPIUnifiedStream::StopAndJoinThread(HRESULT err) {
 992   CHECK(GetCurrentThreadId() == creating_thread_id_);
 993   DCHECK(audio_io_thread_.get());
 994   SetEvent(stop_streaming_event_.Get());
 995   audio_io_thread_->Join();
 996   audio_io_thread_.reset();
 997   HandleError(err);
 998 }
 999
1000 }  // namespace media