content/renderer/media/speech_recognition_audio_sink.h

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_
   6 #define CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_
   7
   8 #include "base/callback.h"
   9 #include "base/memory/scoped_ptr.h"
  10 #include "base/memory/shared_memory.h"
  11 #include "base/sync_socket.h"
  12 #include "base/threading/thread_checker.h"
  13 #include "content/common/content_export.h"
  14 #include "content/public/renderer/media_stream_audio_sink.h"
  15 #include "media/audio/audio_parameters.h"
  16 #include "media/base/audio_converter.h"
  17 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"
  18
  19 namespace media {
  20 class AudioBus;
  21 class AudioFifo;
  22 }
  23
  24 namespace content {
  25
  26 // SpeechRecognitionAudioSink works as an audio sink to the
  27 // WebRtcLocalAudioTrack. It stores the capture data into a FIFO.
  28 // When the FIFO has enough data for resampling, it converts it,
  29 // passes the buffer to the WebSpeechRecognizer via SharedMemory
  30 // and notifies it via SyncSocket followed by incrementing the |buffer_index_|.
  31 // WebSpeechRecognizer increments the shared buffer index to synchronize.
  32 class CONTENT_EXPORT SpeechRecognitionAudioSink
  33     : NON_EXPORTED_BASE(public media::AudioConverter::InputCallback),
  34       NON_EXPORTED_BASE(public MediaStreamAudioSink) {
  35  public:
  36   typedef base::Callback<void()> OnStoppedCB;
  37
  38   // Socket ownership is transferred to the class via constructor.
  39   SpeechRecognitionAudioSink(const blink::WebMediaStreamTrack& track,
  40                              const media::AudioParameters& params,
  41                              const base::SharedMemoryHandle memory,
  42                              scoped_ptr<base::SyncSocket> socket,
  43                              const OnStoppedCB& on_stopped_cb);
  44
  45   ~SpeechRecognitionAudioSink() override;
  46
  47   // Returns whether the provided track is supported.
  48   static bool IsSupportedTrack(const blink::WebMediaStreamTrack& track);
  49
  50  private:
  51   // content::MediaStreamAudioSink implementation.
  52   void OnReadyStateChanged(
  53       blink::WebMediaStreamSource::ReadyState state) override;
  54
  55   void OnData(const media::AudioBus& audio_bus,
  56               base::TimeTicks estimated_capture_time) override;
  57   void OnSetFormat(const media::AudioParameters& params) override;
  58
  59   // media::AudioConverter::Inputcallback implementation.
  60   double ProvideInput(media::AudioBus* audio_bus,
  61                       base::TimeDelta buffer_delay) override;
  62
  63   // Returns the pointer to the audio input buffer mapped in the shared memory.
  64   media::AudioInputBuffer* GetAudioInputBuffer() const;
  65
  66   // Number of frames per buffer in FIFO. When the buffer is full we convert and
  67   // consume it on the |output_bus_|. Size of the buffer depends on the
  68   // resampler. Example: for 44.1 to 16.0 conversion, it should be 4100 frames.
  69   int fifo_buffer_size_;
  70
  71   // Used to DCHECK that some methods are called on the main render thread.
  72   base::ThreadChecker main_render_thread_checker_;
  73
  74   // Used to DCHECK that some methods are called on the capture audio thread.
  75   base::ThreadChecker capture_thread_checker_;
  76
  77   // The audio track that this audio sink is connected to.
  78   const blink::WebMediaStreamTrack track_;
  79
  80   // Shared memory used by audio buses on both browser and renderer processes.
  81   base::SharedMemory shared_memory_;
  82
  83   // Socket for synchronization of audio bus reads/writes.
  84   // Created on the renderer client and passed here. Accessed on capture thread.
  85   scoped_ptr<base::SyncSocket> socket_;
  86
  87   // Used as a resampler to deliver appropriate format to speech recognition.
  88   scoped_ptr<media::AudioConverter> audio_converter_;
  89
  90   // FIFO is used for queuing audio frames before we resample.
  91   scoped_ptr<media::AudioFifo> fifo_;
  92
  93   // Audio bus shared with the browser process via |shared_memory_|.
  94   scoped_ptr<media::AudioBus> output_bus_;
  95
  96   // Params of the source audio. Can change when |OnSetFormat()| occurs.
  97   media::AudioParameters input_params_;
  98
  99   // Params used by speech recognition.
 100   const media::AudioParameters output_params_;
 101
 102   // Whether the track has been stopped.
 103   bool track_stopped_;
 104
 105   // Local counter of audio buffers for synchronization.
 106   uint32 buffer_index_;
 107
 108   // Callback for the renderer client. Called when the audio track was stopped.
 109   const OnStoppedCB on_stopped_cb_;
 110
 111   DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionAudioSink);
 112 };
 113
 114 }  // namespace content
 115
 116 #endif  // CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_