content/renderer/speech_recognition_dispatcher.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/renderer/speech_recognition_dispatcher.h"
   6
   7 #include "base/basictypes.h"
   8 #include "base/strings/utf_string_conversions.h"
   9 #include "content/common/speech_recognition_messages.h"
  10 #include "content/renderer/render_view_impl.h"
  11 #include "third_party/WebKit/public/platform/WebString.h"
  12 #include "third_party/WebKit/public/platform/WebVector.h"
  13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"
  14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"
  15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"
  16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"
  17
  18 #if defined(ENABLE_WEBRTC)
  19 #include "content/renderer/media/speech_recognition_audio_sink.h"
  20 #endif
  21
  22 using blink::WebVector;
  23 using blink::WebString;
  24 using blink::WebSpeechGrammar;
  25 using blink::WebSpeechRecognitionHandle;
  26 using blink::WebSpeechRecognitionResult;
  27 using blink::WebSpeechRecognitionParams;
  28 using blink::WebSpeechRecognizerClient;
  29
  30 namespace content {
  31
  32 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
  33     RenderViewImpl* render_view)
  34     : RenderViewObserver(render_view),
  35       recognizer_client_(NULL),
  36       next_id_(1) {}
  37
  38 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {}
  39
  40 void SpeechRecognitionDispatcher::AbortAllRecognitions() {
  41   ResetAudioSink();
  42   Send(new SpeechRecognitionHostMsg_AbortAllRequests(
  43       routing_id()));
  44 }
  45
  46 bool SpeechRecognitionDispatcher::OnMessageReceived(
  47     const IPC::Message& message) {
  48   bool handled = true;
  49   IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)
  50     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)
  51     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)
  52     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)
  53     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)
  54     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)
  55     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)
  56     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)
  57     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,
  58                         OnResultsRetrieved)
  59     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioReceiverReady,
  60                         OnAudioReceiverReady)
  61     IPC_MESSAGE_UNHANDLED(handled = false)
  62   IPC_END_MESSAGE_MAP()
  63   return handled;
  64 }
  65
  66 void SpeechRecognitionDispatcher::start(
  67     const WebSpeechRecognitionHandle& handle,
  68     const WebSpeechRecognitionParams& params,
  69     WebSpeechRecognizerClient* recognizer_client) {
  70   DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client);
  71   recognizer_client_ = recognizer_client;
  72
  73 #if defined(ENABLE_WEBRTC)
  74   const blink::WebMediaStreamTrack track = params.audioTrack();
  75   if (!track.isNull()) {
  76     // Check if this type of track is allowed by implemented policy.
  77     if (SpeechRecognitionAudioSink::IsSupportedTrack(track)) {
  78       audio_track_.assign(track);
  79     } else {
  80       audio_track_.reset();
  81       // Notify user that the track used is not supported.
  82       recognizer_client_->didReceiveError(
  83           handle,
  84           WebString("Provided audioTrack is not supported."),
  85           WebSpeechRecognizerClient::AudioCaptureError);
  86
  87       return;
  88     }
  89   }
  90
  91   // Destroy any previous instance to detach from the audio track.
  92   // Each new session should reinstantiate the provider once the track is ready.
  93   ResetAudioSink();
  94 #endif
  95
  96   SpeechRecognitionHostMsg_StartRequest_Params msg_params;
  97   for (size_t i = 0; i < params.grammars().size(); ++i) {
  98     const WebSpeechGrammar& grammar = params.grammars()[i];
  99     msg_params.grammars.push_back(
 100         SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));
 101   }
 102   msg_params.language =
 103       base::UTF16ToUTF8(base::StringPiece16(params.language()));
 104   msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());
 105   msg_params.continuous = params.continuous();
 106   msg_params.interim_results = params.interimResults();
 107   msg_params.origin_url = params.origin().toString().utf8();
 108   msg_params.render_view_id = routing_id();
 109   msg_params.request_id = GetOrCreateIDForHandle(handle);
 110 #if defined(ENABLE_WEBRTC)
 111   // Fall back to default input when the track is not allowed.
 112   msg_params.using_audio_track = !audio_track_.isNull();
 113 #else
 114   msg_params.using_audio_track = false;
 115 #endif
 116   // The handle mapping will be removed in |OnRecognitionEnd|.
 117   Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));
 118 }
 119
 120 void SpeechRecognitionDispatcher::stop(
 121     const WebSpeechRecognitionHandle& handle,
 122     WebSpeechRecognizerClient* recognizer_client) {
 123   ResetAudioSink();
 124   // Ignore a |stop| issued without a matching |start|.
 125   if (recognizer_client_ != recognizer_client || !HandleExists(handle))
 126     return;
 127   Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
 128       routing_id(), GetOrCreateIDForHandle(handle)));
 129 }
 130
 131 void SpeechRecognitionDispatcher::abort(
 132     const WebSpeechRecognitionHandle& handle,
 133     WebSpeechRecognizerClient* recognizer_client) {
 134   ResetAudioSink();
 135   // Ignore an |abort| issued without a matching |start|.
 136   if (recognizer_client_ != recognizer_client || !HandleExists(handle))
 137     return;
 138   Send(new SpeechRecognitionHostMsg_AbortRequest(
 139       routing_id(), GetOrCreateIDForHandle(handle)));
 140 }
 141
 142 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {
 143   recognizer_client_->didStart(GetHandleFromID(request_id));
 144 }
 145
 146 void SpeechRecognitionDispatcher::OnAudioStarted(int request_id) {
 147   recognizer_client_->didStartAudio(GetHandleFromID(request_id));
 148 }
 149
 150 void SpeechRecognitionDispatcher::OnSoundStarted(int request_id) {
 151   recognizer_client_->didStartSound(GetHandleFromID(request_id));
 152 }
 153
 154 void SpeechRecognitionDispatcher::OnSoundEnded(int request_id) {
 155   recognizer_client_->didEndSound(GetHandleFromID(request_id));
 156 }
 157
 158 void SpeechRecognitionDispatcher::OnAudioEnded(int request_id) {
 159   recognizer_client_->didEndAudio(GetHandleFromID(request_id));
 160 }
 161
 162 static WebSpeechRecognizerClient::ErrorCode WebKitErrorCode(
 163     SpeechRecognitionErrorCode e) {
 164   switch (e) {
 165     case SPEECH_RECOGNITION_ERROR_NONE:
 166       NOTREACHED();
 167       return WebSpeechRecognizerClient::OtherError;
 168     case SPEECH_RECOGNITION_ERROR_NO_SPEECH:
 169       return WebSpeechRecognizerClient::NoSpeechError;
 170     case SPEECH_RECOGNITION_ERROR_ABORTED:
 171       return WebSpeechRecognizerClient::AbortedError;
 172     case SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE:
 173       return WebSpeechRecognizerClient::AudioCaptureError;
 174     case SPEECH_RECOGNITION_ERROR_NETWORK:
 175       return WebSpeechRecognizerClient::NetworkError;
 176     case SPEECH_RECOGNITION_ERROR_NOT_ALLOWED:
 177       return WebSpeechRecognizerClient::NotAllowedError;
 178     case SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED:
 179       return WebSpeechRecognizerClient::ServiceNotAllowedError;
 180     case SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR:
 181       return WebSpeechRecognizerClient::BadGrammarError;
 182     case SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED:
 183       return WebSpeechRecognizerClient::LanguageNotSupportedError;
 184     case SPEECH_RECOGNITION_ERROR_NO_MATCH:
 185       NOTREACHED();
 186       return WebSpeechRecognizerClient::OtherError;
 187   }
 188   NOTREACHED();
 189   return WebSpeechRecognizerClient::OtherError;
 190 }
 191
 192 void SpeechRecognitionDispatcher::OnErrorOccurred(
 193     int request_id, const SpeechRecognitionError& error) {
 194   if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {
 195     recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),
 196                                           WebSpeechRecognitionResult());
 197   } else {
 198     ResetAudioSink();
 199     recognizer_client_->didReceiveError(
 200         GetHandleFromID(request_id),
 201         WebString(),  // TODO(primiano): message?
 202         WebKitErrorCode(error.code));
 203   }
 204 }
 205
 206 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {
 207   // TODO(tommi): It is possible that the handle isn't found in the array if
 208   // the user just refreshed the page. It seems that we then get a notification
 209   // for the previously loaded instance of the page.
 210   HandleMap::iterator iter = handle_map_.find(request_id);
 211   if (iter == handle_map_.end()) {
 212     DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";
 213   } else {
 214     WebSpeechRecognitionHandle handle = iter->second;
 215     // Note: we need to erase the handle from the map *before* calling didEnd.
 216     // didEnd may call back synchronously to start a new recognition session,
 217     // and we don't want to delete the handle from the map after that happens.
 218     handle_map_.erase(request_id);
 219     ResetAudioSink();
 220     recognizer_client_->didEnd(handle);
 221   }
 222 }
 223
 224 void SpeechRecognitionDispatcher::OnResultsRetrieved(
 225     int request_id, const SpeechRecognitionResults& results) {
 226   size_t provisional_count = 0;
 227   SpeechRecognitionResults::const_iterator it = results.begin();
 228   for (; it != results.end(); ++it) {
 229     if (it->is_provisional)
 230       ++provisional_count;
 231   }
 232
 233   WebVector<WebSpeechRecognitionResult> provisional(provisional_count);
 234   WebVector<WebSpeechRecognitionResult> final(
 235       results.size() - provisional_count);
 236
 237   int provisional_index = 0, final_index = 0;
 238   for (it = results.begin(); it != results.end(); ++it) {
 239     const SpeechRecognitionResult& result = (*it);
 240     WebSpeechRecognitionResult* webkit_result = result.is_provisional ?
 241         &provisional[provisional_index++] : &final[final_index++];
 242
 243     const size_t num_hypotheses = result.hypotheses.size();
 244     WebVector<WebString> transcripts(num_hypotheses);
 245     WebVector<float> confidences(num_hypotheses);
 246     for (size_t i = 0; i < num_hypotheses; ++i) {
 247       transcripts[i] = result.hypotheses[i].utterance;
 248       confidences[i] = static_cast<float>(result.hypotheses[i].confidence);
 249     }
 250     webkit_result->assign(transcripts, confidences, !result.is_provisional);
 251   }
 252
 253   recognizer_client_->didReceiveResults(
 254       GetHandleFromID(request_id), final, provisional);
 255 }
 256
 257 void SpeechRecognitionDispatcher::OnAudioReceiverReady(
 258     int request_id,
 259     const media::AudioParameters& params,
 260     const base::SharedMemoryHandle memory,
 261     const base::SyncSocket::TransitDescriptor descriptor) {
 262 #if defined(ENABLE_WEBRTC)
 263   DCHECK(!speech_audio_sink_.get());
 264   if (audio_track_.isNull()) {
 265     ResetAudioSink();
 266     return;
 267   }
 268
 269   // The instantiation and type of SyncSocket is up to the client since it
 270   // is dependency injected to the SpeechRecognitionAudioSink.
 271   scoped_ptr<base::SyncSocket> socket(new base::CancelableSyncSocket(
 272       base::SyncSocket::UnwrapHandle(descriptor)));
 273
 274   speech_audio_sink_.reset(new SpeechRecognitionAudioSink(
 275       audio_track_, params, memory, socket.Pass(),
 276       base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink,
 277                  base::Unretained(this))));
 278 #endif
 279 }
 280
 281 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
 282     const WebSpeechRecognitionHandle& handle) {
 283   // Search first for an existing mapping.
 284   for (HandleMap::iterator iter = handle_map_.begin();
 285       iter != handle_map_.end();
 286       ++iter) {
 287     if (iter->second.equals(handle))
 288       return iter->first;
 289   }
 290   // If no existing mapping found, create a new one.
 291   const int new_id = next_id_;
 292   handle_map_[new_id] = handle;
 293   ++next_id_;
 294   return new_id;
 295 }
 296
 297 bool SpeechRecognitionDispatcher::HandleExists(
 298     const WebSpeechRecognitionHandle& handle) {
 299   for (HandleMap::iterator iter = handle_map_.begin();
 300       iter != handle_map_.end();
 301       ++iter) {
 302     if (iter->second.equals(handle))
 303       return true;
 304   }
 305   return false;
 306 }
 307
 308 void SpeechRecognitionDispatcher::ResetAudioSink() {
 309 #if defined(ENABLE_WEBRTC)
 310   speech_audio_sink_.reset();
 311 #endif
 312 }
 313
 314 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(
 315     int request_id) {
 316   HandleMap::iterator iter = handle_map_.find(request_id);
 317   CHECK(iter != handle_map_.end());
 318   return iter->second;
 319 }
 320
 321 }  // namespace content