content/renderer/media/webrtc_audio_device_impl.h

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
   6 #define CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
   7
   8 #include <string>
   9 #include <vector>
  10
  11 #include "base/basictypes.h"
  12 #include "base/compiler_specific.h"
  13 #include "base/files/file.h"
  14 #include "base/logging.h"
  15 #include "base/memory/ref_counted.h"
  16 #include "base/memory/scoped_ptr.h"
  17 #include "base/threading/thread_checker.h"
  18 #include "content/common/content_export.h"
  19 #include "content/renderer/media/webrtc_audio_capturer.h"
  20 #include "content/renderer/media/webrtc_audio_device_not_impl.h"
  21 #include "ipc/ipc_platform_file.h"
  22 #include "media/base/audio_capturer_source.h"
  23 #include "media/base/audio_renderer_sink.h"
  24
  25 // A WebRtcAudioDeviceImpl instance implements the abstract interface
  26 // webrtc::AudioDeviceModule which makes it possible for a user (e.g. webrtc::
  27 // VoiceEngine) to register this class as an external AudioDeviceModule (ADM).
  28 // Then WebRtcAudioDeviceImpl::SetSessionId() needs to be called to set the
  29 // session id that tells which device to use. The user can then call
  30 // WebRtcAudioDeviceImpl::StartPlayout() and
  31 // WebRtcAudioDeviceImpl::StartRecording() from the render process to initiate
  32 // and start audio rendering and capturing in the browser process. IPC is
  33 // utilized to set up the media streams.
  34 //
  35 // Usage example:
  36 //
  37 //   using namespace webrtc;
  38 //
  39 //   {
  40 //      scoped_refptr<WebRtcAudioDeviceImpl> external_adm;
  41 //      external_adm = new WebRtcAudioDeviceImpl();
  42 //      external_adm->SetSessionId(session_id);
  43 //      VoiceEngine* voe = VoiceEngine::Create();
  44 //      VoEBase* base = VoEBase::GetInterface(voe);
  45 //      base->Init(external_adm);
  46 //      int ch = base->CreateChannel();
  47 //      ...
  48 //      base->StartReceive(ch)
  49 //      base->StartPlayout(ch);
  50 //      base->StartSending(ch);
  51 //      ...
  52 //      <== full-duplex audio session with AGC enabled ==>
  53 //      ...
  54 //      base->DeleteChannel(ch);
  55 //      base->Terminate();
  56 //      base->Release();
  57 //      VoiceEngine::Delete(voe);
  58 //   }
  59 //
  60 // webrtc::VoiceEngine::Init() calls these ADM methods (in this order):
  61 //
  62 //  RegisterAudioCallback(this)
  63 //    webrtc::VoiceEngine is an webrtc::AudioTransport implementation and
  64 //    implements the RecordedDataIsAvailable() and NeedMorePlayData() callbacks.
  65 //
  66 //  Init()
  67 //    Creates and initializes the AudioOutputDevice and AudioInputDevice
  68 //    objects.
  69 //
  70 //  SetAGC(true)
  71 //    Enables the adaptive analog mode of the AGC which ensures that a
  72 //    suitable microphone volume level will be set. This scheme will affect
  73 //    the actual microphone control slider.
  74 //
  75 // AGC overview:
  76 //
  77 // It aims to maintain a constant speech loudness level from the microphone.
  78 // This is done by both controlling the analog microphone gain and applying
  79 // digital gain. The microphone gain on the sound card is slowly
  80 // increased/decreased during speech only. By observing the microphone control
  81 // slider you can see it move when you speak. If you scream, the slider moves
  82 // downwards and then upwards again when you return to normal. It is not
  83 // uncommon that the slider hits the maximum. This means that the maximum
  84 // analog gain is not large enough to give the desired loudness. Nevertheless,
  85 // we can in general still attain the desired loudness. If the microphone
  86 // control slider is moved manually, the gain adaptation restarts and returns
  87 // to roughly the same position as before the change if the circumstances are
  88 // still the same. When the input microphone signal causes saturation, the
  89 // level is decreased dramatically and has to re-adapt towards the old level.
  90 // The adaptation is a slowly varying process and at the beginning of capture
  91 // this is noticed by a slow increase in volume. Smaller changes in microphone
  92 // input level is leveled out by the built-in digital control. For larger
  93 // differences we need to rely on the slow adaptation.
  94 // See http://en.wikipedia.org/wiki/Automatic_gain_control for more details.
  95 //
  96 // AGC implementation details:
  97 //
  98 // The adaptive analog mode of the AGC is always enabled for desktop platforms
  99 // in WebRTC.
 100 //
 101 // Before recording starts, the ADM enables AGC on the AudioInputDevice.
 102 //
 103 // A capture session with AGC is started up as follows (simplified):
 104 //
 105 //                            [renderer]
 106 //                                |
 107 //                     ADM::StartRecording()
 108 //             AudioInputDevice::InitializeOnIOThread()
 109 //           AudioInputHostMsg_CreateStream(..., agc=true)               [IPC]
 110 //                                |
 111 //                       [IPC to the browser]
 112 //                                |
 113 //              AudioInputRendererHost::OnCreateStream()
 114 //              AudioInputController::CreateLowLatency()
 115 //         AudioInputController::DoSetAutomaticGainControl(true)
 116 //            AudioInputStream::SetAutomaticGainControl(true)
 117 //                                |
 118 // AGC is now enabled in the media layer and streaming starts (details omitted).
 119 // The figure below illustrates the AGC scheme which is active in combination
 120 // with the default media flow explained earlier.
 121 //                                |
 122 //                            [browser]
 123 //                                |
 124 //                AudioInputStream::(Capture thread loop)
 125 //  AgcAudioStream<AudioInputStream>::GetAgcVolume() => get latest mic volume
 126 //                 AudioInputData::OnData(..., volume)
 127 //              AudioInputController::OnData(..., volume)
 128 //               AudioInputSyncWriter::Write(..., volume)
 129 //                                |
 130 //      [volume | size | data] is sent to the renderer         [shared memory]
 131 //                                |
 132 //                            [renderer]
 133 //                                |
 134 //          AudioInputDevice::AudioThreadCallback::Process()
 135 //            WebRtcAudioDeviceImpl::Capture(..., volume)
 136 //    AudioTransport::RecordedDataIsAvailable(...,volume, new_volume)
 137 //                                |
 138 // The AGC now uses the current volume input and computes a suitable new
 139 // level given by the |new_level| output. This value is only non-zero if the
 140 // AGC has take a decision that the microphone level should change.
 141 //                                |
 142 //                      if (new_volume != 0)
 143 //              AudioInputDevice::SetVolume(new_volume)
 144 //              AudioInputHostMsg_SetVolume(new_volume)                  [IPC]
 145 //                                |
 146 //                       [IPC to the browser]
 147 //                                |
 148 //                 AudioInputRendererHost::OnSetVolume()
 149 //                  AudioInputController::SetVolume()
 150 //             AudioInputStream::SetVolume(scaled_volume)
 151 //                                |
 152 // Here we set the new microphone level in the media layer and at the same time
 153 // read the new setting (we might not get exactly what is set).
 154 //                                |
 155 //             AudioInputData::OnData(..., updated_volume)
 156 //           AudioInputController::OnData(..., updated_volume)
 157 //                                |
 158 //                                |
 159 // This process repeats until we stop capturing data. Note that, a common
 160 // steady state is that the volume control reaches its max and the new_volume
 161 // value from the AGC is zero. A loud voice input is required to break this
 162 // state and start lowering the level again.
 163 //
 164 // Implementation notes:
 165 //
 166 //  - This class must be created and destroyed on the main render thread and
 167 //    most methods are called on the same thread. However, some methods are
 168 //    also called on a Libjingle worker thread. RenderData is called on the
 169 //    AudioOutputDevice thread and CaptureData on the AudioInputDevice thread.
 170 //    To summarize: this class lives on four different threads, so it is
 171 //    important to be careful with the order in which locks are acquired in
 172 //    order to avoid potential deadlocks.
 173 //  - The webrtc::AudioDeviceModule is reference counted.
 174 //  - AGC is only supported in combination with the WASAPI-based audio layer
 175 //    on Windows, i.e., it is not supported on Windows XP.
 176 //  - All volume levels required for the AGC scheme are transfered in a
 177 //    normalized range [0.0, 1.0]. Scaling takes place in both endpoints
 178 //    (WebRTC client a media layer). This approach ensures that we can avoid
 179 //    transferring maximum levels between the renderer and the browser.
 180 //
 181
 182 namespace content {
 183
 184 class WebRtcAudioCapturer;
 185 class WebRtcAudioRenderer;
 186
 187 // TODO(xians): Move the following two interfaces to webrtc so that
 188 // libjingle can own references to the renderer and capturer.
 189 class WebRtcAudioRendererSource {
 190  public:
 191   // Callback to get the rendered data.
 192   virtual void RenderData(media::AudioBus* audio_bus,
 193                           int sample_rate,
 194                           int audio_delay_milliseconds,
 195                           base::TimeDelta* current_time) = 0;
 196
 197   // Callback to notify the client that the renderer is going away.
 198   virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) = 0;
 199
 200  protected:
 201   virtual ~WebRtcAudioRendererSource() {}
 202 };
 203
 204 // TODO(xians): Merge this interface with WebRtcAudioRendererSource.
 205 // The reason why we could not do it today is that WebRtcAudioRendererSource
 206 // gets the data by pulling, while the data is pushed into
 207 // WebRtcPlayoutDataSource::Sink.
 208 class WebRtcPlayoutDataSource {
 209  public:
 210   class Sink {
 211    public:
 212     // Callback to get the playout data.
 213     // Called on the render audio thread.
 214     virtual void OnPlayoutData(media::AudioBus* audio_bus,
 215                                int sample_rate,
 216                                int audio_delay_milliseconds) = 0;
 217
 218     // Callback to notify the sink that the source has changed.
 219     // Called on the main render thread.
 220     virtual void OnPlayoutDataSourceChanged() = 0;
 221
 222    protected:
 223     virtual ~Sink() {}
 224   };
 225
 226   // Adds/Removes the sink of WebRtcAudioRendererSource to the ADM.
 227   // These methods are used by the MediaStreamAudioProcesssor to get the
 228   // rendered data for AEC.
 229   virtual void AddPlayoutSink(Sink* sink) = 0;
 230   virtual void RemovePlayoutSink(Sink* sink) = 0;
 231
 232  protected:
 233   virtual ~WebRtcPlayoutDataSource() {}
 234 };
 235
 236 // Note that this class inherits from webrtc::AudioDeviceModule but due to
 237 // the high number of non-implemented methods, we move the cruft over to the
 238 // WebRtcAudioDeviceNotImpl.
 239 class CONTENT_EXPORT WebRtcAudioDeviceImpl
 240     : NON_EXPORTED_BASE(public WebRtcAudioDeviceNotImpl),
 241       NON_EXPORTED_BASE(public WebRtcAudioRendererSource),
 242       NON_EXPORTED_BASE(public WebRtcPlayoutDataSource) {
 243  public:
 244   // The maximum volume value WebRtc uses.
 245   static const int kMaxVolumeLevel = 255;
 246
 247   // Instances of this object are created on the main render thread.
 248   WebRtcAudioDeviceImpl();
 249
 250   // webrtc::RefCountedModule implementation.
 251   // The creator must call AddRef() after construction and use Release()
 252   // to release the reference and delete this object.
 253   // Called on the main render thread.
 254   int32_t AddRef() override;
 255   int32_t Release() override;
 256
 257  private:
 258   // webrtc::AudioDeviceModule implementation.
 259   // All implemented methods are called on the main render thread unless
 260   // anything else is stated.
 261
 262   int32_t RegisterAudioCallback(
 263       webrtc::AudioTransport* audio_callback) override;
 264
 265   int32_t Init() override;
 266   int32_t Terminate() override;
 267   bool Initialized() const override;
 268
 269   int32_t PlayoutIsAvailable(bool* available) override;
 270   bool PlayoutIsInitialized() const override;
 271   int32_t RecordingIsAvailable(bool* available) override;
 272   bool RecordingIsInitialized() const override;
 273
 274   // All Start/Stop methods are called on a libJingle worker thread.
 275   int32_t StartPlayout() override;
 276   int32_t StopPlayout() override;
 277   bool Playing() const override;
 278   int32_t StartRecording() override;
 279   int32_t StopRecording() override;
 280   bool Recording() const override;
 281
 282   // Called on the AudioInputDevice worker thread.
 283   int32_t SetMicrophoneVolume(uint32_t volume) override;
 284
 285   // TODO(henrika): sort out calling thread once we start using this API.
 286   int32_t MicrophoneVolume(uint32_t* volume) const override;
 287
 288   int32_t MaxMicrophoneVolume(uint32_t* max_volume) const override;
 289   int32_t MinMicrophoneVolume(uint32_t* min_volume) const override;
 290   int32_t StereoPlayoutIsAvailable(bool* available) const override;
 291   int32_t StereoRecordingIsAvailable(bool* available) const override;
 292   int32_t PlayoutDelay(uint16_t* delay_ms) const override;
 293   int32_t RecordingDelay(uint16_t* delay_ms) const override;
 294   int32_t RecordingSampleRate(uint32_t* sample_rate) const override;
 295   int32_t PlayoutSampleRate(uint32_t* sample_rate) const override;
 296
 297  public:
 298   // Sets the |renderer_|, returns false if |renderer_| already exists.
 299   // Called on the main renderer thread.
 300   bool SetAudioRenderer(WebRtcAudioRenderer* renderer);
 301
 302   // Adds/Removes the capturer to the ADM.
 303   // TODO(xians): Remove these two methods once the ADM does not need to pass
 304   // hardware information up to WebRtc.
 305   void AddAudioCapturer(const scoped_refptr<WebRtcAudioCapturer>& capturer);
 306   void RemoveAudioCapturer(const scoped_refptr<WebRtcAudioCapturer>& capturer);
 307
 308   // Gets the default capturer, which is the last capturer in |capturers_|.
 309   // The method can be called by both Libjingle thread and main render thread.
 310   scoped_refptr<WebRtcAudioCapturer> GetDefaultCapturer() const;
 311
 312   // Gets paired device information of the capture device for the audio
 313   // renderer. This is used to pass on a session id, sample rate and buffer
 314   // size to a webrtc audio renderer (either local or remote), so that audio
 315   // will be rendered to a matching output device.
 316   // Returns true if the capture device has a paired output device, otherwise
 317   // false. Note that if there are more than one open capture device the
 318   // function will not be able to pick an appropriate device and return false.
 319   bool GetAuthorizedDeviceInfoForAudioRenderer(
 320       int* session_id, int* output_sample_rate, int* output_buffer_size);
 321
 322   const scoped_refptr<WebRtcAudioRenderer>& renderer() const {
 323     return renderer_;
 324   }
 325
 326  private:
 327   typedef std::list<scoped_refptr<WebRtcAudioCapturer> > CapturerList;
 328   typedef std::list<WebRtcPlayoutDataSource::Sink*> PlayoutDataSinkList;
 329   class RenderBuffer;
 330
 331   // Make destructor private to ensure that we can only be deleted by Release().
 332   ~WebRtcAudioDeviceImpl() override;
 333
 334   // WebRtcAudioRendererSource implementation.
 335
 336   // Called on the AudioOutputDevice worker thread.
 337   void RenderData(media::AudioBus* audio_bus,
 338                   int sample_rate,
 339                   int audio_delay_milliseconds,
 340                   base::TimeDelta* current_time) override;
 341
 342   // Called on the main render thread.
 343   void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) override;
 344
 345   // WebRtcPlayoutDataSource implementation.
 346   void AddPlayoutSink(WebRtcPlayoutDataSource::Sink* sink) override;
 347   void RemovePlayoutSink(WebRtcPlayoutDataSource::Sink* sink) override;
 348
 349   // Used to check methods that run on the main render thread.
 350   base::ThreadChecker main_thread_checker_;
 351   // Used to check methods that are called on libjingle's signaling thread.
 352   base::ThreadChecker signaling_thread_checker_;
 353   base::ThreadChecker worker_thread_checker_;
 354
 355   int ref_count_;
 356
 357   // List of captures which provides access to the native audio input layer
 358   // in the browser process.
 359   CapturerList capturers_;
 360
 361   // Provides access to the audio renderer in the browser process.
 362   scoped_refptr<WebRtcAudioRenderer> renderer_;
 363
 364   // A list of raw pointer of WebRtcPlayoutDataSource::Sink objects which want
 365   // to get the playout data, the sink need to call RemovePlayoutSink()
 366   // before it goes away.
 367   PlayoutDataSinkList playout_sinks_;
 368
 369   // Weak reference to the audio callback.
 370   // The webrtc client defines |audio_transport_callback_| by calling
 371   // RegisterAudioCallback().
 372   webrtc::AudioTransport* audio_transport_callback_;
 373
 374   // Cached value of the current audio delay on the input/capture side.
 375   int input_delay_ms_;
 376
 377   // Cached value of the current audio delay on the output/renderer side.
 378   int output_delay_ms_;
 379
 380   // Protects |recording_|, |output_delay_ms_|, |input_delay_ms_|, |renderer_|
 381   // |recording_| and |microphone_volume_|.
 382   mutable base::Lock lock_;
 383
 384   // Used to protect the racing of calling OnData() since there can be more
 385   // than one input stream calling OnData().
 386   mutable base::Lock capture_callback_lock_;
 387
 388   bool initialized_;
 389   bool playing_;
 390   bool recording_;
 391
 392   // Stores latest microphone volume received in a CaptureData() callback.
 393   // Range is [0, 255].
 394   uint32_t microphone_volume_;
 395
 396   // Buffer used for temporary storage during render callback.
 397   // It is only accessed by the audio render thread.
 398   std::vector<int16> render_buffer_;
 399
 400   DISALLOW_COPY_AND_ASSIGN(WebRtcAudioDeviceImpl);
 401 };
 402
 403 }  // namespace content
 404
 405 #endif  // CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_