content/renderer/media/speech_recognition_audio_sink.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/renderer/media/speech_recognition_audio_sink.h"
   6
   7 #include "base/logging.h"
   8 #include "base/memory/shared_memory.h"
   9 #include "base/time/time.h"
  10 #include "content/renderer/media/media_stream_audio_source.h"
  11 #include "media/audio/audio_parameters.h"
  12 #include "media/base/audio_fifo.h"
  13
  14 namespace content {
  15
  16 SpeechRecognitionAudioSink::SpeechRecognitionAudioSink(
  17     const blink::WebMediaStreamTrack& track,
  18     const media::AudioParameters& params,
  19     const base::SharedMemoryHandle memory,
  20     scoped_ptr<base::SyncSocket> socket,
  21     const OnStoppedCB& on_stopped_cb)
  22     : track_(track),
  23       shared_memory_(memory, false),
  24       socket_(socket.Pass()),
  25       output_params_(params),
  26       track_stopped_(false),
  27       buffer_index_(0),
  28       on_stopped_cb_(on_stopped_cb) {
  29   DCHECK(socket_.get());
  30   DCHECK(main_render_thread_checker_.CalledOnValidThread());
  31   DCHECK(params.IsValid());
  32   DCHECK(IsSupportedTrack(track));
  33   const size_t kSharedMemorySize = sizeof(media::AudioInputBufferParameters) +
  34                                    media::AudioBus::CalculateMemorySize(params);
  35   CHECK(shared_memory_.Map(kSharedMemorySize));
  36
  37   media::AudioInputBuffer* buffer =
  38       static_cast<media::AudioInputBuffer*>(shared_memory_.memory());
  39
  40   // The peer must manage their own counter and reset it to 0.
  41   DCHECK_EQ(0U, buffer->params.size);
  42   output_bus_ = media::AudioBus::WrapMemory(params, buffer->audio);
  43
  44   // Connect this audio sink to the track
  45   MediaStreamAudioSink::AddToAudioTrack(this, track_);
  46 }
  47
  48 SpeechRecognitionAudioSink::~SpeechRecognitionAudioSink() {
  49   DCHECK(main_render_thread_checker_.CalledOnValidThread());
  50   if (audio_converter_.get())
  51     audio_converter_->RemoveInput(this);
  52
  53   // Notify the track before this sink goes away.
  54   if (!track_stopped_)
  55     MediaStreamAudioSink::RemoveFromAudioTrack(this, track_);
  56 }
  57
  58 // static
  59 bool SpeechRecognitionAudioSink::IsSupportedTrack(
  60     const blink::WebMediaStreamTrack& track) {
  61   if (track.source().type() != blink::WebMediaStreamSource::TypeAudio)
  62     return false;
  63
  64   MediaStreamAudioSource* native_source =
  65       static_cast<MediaStreamAudioSource*>(track.source().extraData());
  66   if (!native_source)
  67     return false;
  68
  69   const StreamDeviceInfo& device_info = native_source->device_info();
  70   // Purposely only support tracks from an audio device. Dissallow WebAudio.
  71   return (device_info.device.type == content::MEDIA_DEVICE_AUDIO_CAPTURE);
  72 }
  73
  74 void SpeechRecognitionAudioSink::OnSetFormat(
  75     const media::AudioParameters& input_params) {
  76   DCHECK(input_params.IsValid());
  77   DCHECK_LE(
  78       input_params.frames_per_buffer() * 1000 / input_params.sample_rate(),
  79       output_params_.frames_per_buffer() * 1000 / output_params_.sample_rate());
  80
  81   // Detach the thread here because it will be a new capture thread
  82   // calling OnSetFormat() and OnData() if the source is restarted.
  83   capture_thread_checker_.DetachFromThread();
  84
  85   input_params_ = input_params;
  86   fifo_buffer_size_ =
  87       std::ceil(output_params_.frames_per_buffer() *
  88                 static_cast<double>(input_params_.sample_rate()) /
  89                     output_params_.sample_rate());
  90   DCHECK_GE(fifo_buffer_size_, input_params_.frames_per_buffer());
  91
  92   // Allows for some delays on the peer.
  93   static const int kNumberOfBuffersInFifo = 2;
  94   int frames_in_fifo = kNumberOfBuffersInFifo * fifo_buffer_size_;
  95   fifo_.reset(new media::AudioFifo(input_params.channels(), frames_in_fifo));
  96
  97   // Create the audio converter with |disable_fifo| as false so that the
  98   // converter will request input_params.frames_per_buffer() each time.
  99   // This will not increase the complexity as there is only one client to
 100   // the converter.
 101   audio_converter_.reset(
 102       new media::AudioConverter(input_params, output_params_, false));
 103   audio_converter_->AddInput(this);
 104 }
 105
 106 void SpeechRecognitionAudioSink::OnReadyStateChanged(
 107     blink::WebMediaStreamSource::ReadyState state) {
 108   DCHECK(main_render_thread_checker_.CalledOnValidThread());
 109   DCHECK(!track_stopped_);
 110
 111   if (state == blink::WebMediaStreamSource::ReadyStateEnded) {
 112     track_stopped_ = true;
 113
 114     if (!on_stopped_cb_.is_null())
 115       on_stopped_cb_.Run();
 116   }
 117 }
 118
 119 void SpeechRecognitionAudioSink::OnData(
 120     const media::AudioBus& audio_bus,
 121     base::TimeTicks estimated_capture_time) {
 122   DCHECK(capture_thread_checker_.CalledOnValidThread());
 123   DCHECK_EQ(audio_bus.frames(), input_params_.frames_per_buffer());
 124   DCHECK_EQ(audio_bus.channels(), input_params_.channels());
 125   if (fifo_->frames() + audio_bus.frames() > fifo_->max_frames()) {
 126     // This would indicate a serious issue with the browser process or the
 127     // SyncSocket and/or SharedMemory. We drop any previous buffers and try to
 128     // recover by resuming where the peer left of.
 129     DLOG(ERROR) << "Audio FIFO overflow";
 130     fifo_->Clear();
 131     buffer_index_ = GetAudioInputBuffer()->params.size;
 132   }
 133
 134   fifo_->Push(&audio_bus);
 135   // Wait for FIFO to have at least |fifo_buffer_size_| frames ready.
 136   if (fifo_->frames() < fifo_buffer_size_)
 137     return;
 138
 139   // Make sure the previous output buffer was consumed by the peer before we
 140   // send the next buffer.
 141   // The peer must write to it (incrementing by 1) once the the buffer was
 142   // consumed. This is intentional not to block this audio capturing thread.
 143   if (buffer_index_ != GetAudioInputBuffer()->params.size) {
 144     DVLOG(1) << "Buffer synchronization lag";
 145     return;
 146   }
 147
 148   audio_converter_->Convert(output_bus_.get());
 149
 150   // Notify peer to consume buffer |buffer_index_| on |output_bus_|.
 151   const size_t bytes_sent =
 152       socket_->Send(&buffer_index_, sizeof(buffer_index_));
 153   if (bytes_sent != sizeof(buffer_index_)) {
 154     // The send ocasionally fails if the user changes their input audio device.
 155     DVLOG(1) << "Failed sending buffer index to peer";
 156     // We have discarded this buffer, but could still recover on the next one.
 157     return;
 158   }
 159
 160   // Count the sent buffer. We expect the peer to do the same on their end.
 161   ++buffer_index_;
 162 }
 163
 164 double SpeechRecognitionAudioSink::ProvideInput(media::AudioBus* audio_bus,
 165                                                 base::TimeDelta buffer_delay) {
 166   DCHECK(capture_thread_checker_.CalledOnValidThread());
 167   if (fifo_->frames() >= audio_bus->frames())
 168     fifo_->Consume(audio_bus, 0, audio_bus->frames());
 169   else
 170     audio_bus->Zero();
 171
 172   // Return volume greater than zero to indicate we have more data.
 173   return 1.0;
 174 }
 175
 176 media::AudioInputBuffer*
 177 SpeechRecognitionAudioSink::GetAudioInputBuffer() const {
 178   DCHECK(capture_thread_checker_.CalledOnValidThread());
 179   DCHECK(shared_memory_.memory());
 180   return static_cast<media::AudioInputBuffer*>(shared_memory_.memory());
 181 }
 182
 183 }  // namespace content