content/renderer/speech_recognition_dispatcher.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/renderer/speech_recognition_dispatcher.h"
   6
   7 #include "base/basictypes.h"
   8 #include "base/strings/utf_string_conversions.h"
   9 #include "content/common/speech_recognition_messages.h"
  10 #include "content/renderer/render_view_impl.h"
  11 #include "third_party/WebKit/public/platform/WebString.h"
  12 #include "third_party/WebKit/public/platform/WebVector.h"
  13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"
  14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"
  15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"
  16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"
  17
  18 #if defined(ENABLE_WEBRTC)
  19 #include "content/renderer/media/speech_recognition_audio_sink.h"
  20 #endif
  21
  22 using blink::WebVector;
  23 using blink::WebString;
  24 using blink::WebSpeechGrammar;
  25 using blink::WebSpeechRecognitionHandle;
  26 using blink::WebSpeechRecognitionResult;
  27 using blink::WebSpeechRecognitionParams;
  28 using blink::WebSpeechRecognizerClient;
  29
  30 namespace content {
  31
  32 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
  33     RenderViewImpl* render_view)
  34     : RenderViewObserver(render_view),
  35       recognizer_client_(NULL),
  36       next_id_(1) {}
  37
  38 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {}
  39
  40 void SpeechRecognitionDispatcher::AbortAllRecognitions() {
  41   ResetAudioSink();
  42   Send(new SpeechRecognitionHostMsg_AbortAllRequests(
  43       routing_id()));
  44 }
  45
  46 bool SpeechRecognitionDispatcher::OnMessageReceived(
  47     const IPC::Message& message) {
  48   bool handled = true;
  49   IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)
  50     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)
  51     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)
  52     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)
  53     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)
  54     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)
  55     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)
  56     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)
  57     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,
  58                         OnResultsRetrieved)
  59     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioReceiverReady,
  60                         OnAudioReceiverReady)
  61     IPC_MESSAGE_UNHANDLED(handled = false)
  62   IPC_END_MESSAGE_MAP()
  63   return handled;
  64 }
  65
  66 void SpeechRecognitionDispatcher::start(
  67     const WebSpeechRecognitionHandle& handle,
  68     const WebSpeechRecognitionParams& params,
  69     WebSpeechRecognizerClient* recognizer_client) {
  70   DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client);
  71   recognizer_client_ = recognizer_client;
  72
  73 #if defined(ENABLE_WEBRTC)
  74   const blink::WebMediaStreamTrack track = params.audioTrack();
  75   if (!track.isNull()) {
  76     // Check if this type of track is allowed by implemented policy.
  77     if (SpeechRecognitionAudioSink::IsSupportedTrack(track)) {
  78       audio_track_.assign(track);
  79     } else {
  80       audio_track_.reset();
  81       // Notify user that the track used is not supported.
  82       recognizer_client_->didReceiveError(
  83           handle,
  84           WebString("Provided audioTrack is not supported."),
  85           WebSpeechRecognizerClient::AudioCaptureError);
  86
  87       return;
  88     }
  89   }
  90
  91   // Destroy any previous instance to detach from the audio track.
  92   // Each new session should reinstantiate the provider once the track is ready.
  93   ResetAudioSink();
  94 #endif
  95
  96   SpeechRecognitionHostMsg_StartRequest_Params msg_params;
  97   for (size_t i = 0; i < params.grammars().size(); ++i) {
  98     const WebSpeechGrammar& grammar = params.grammars()[i];
  99     msg_params.grammars.push_back(
 100         SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));
 101   }
 102   msg_params.language = base::UTF16ToUTF8(params.language());
 103   msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());
 104   msg_params.continuous = params.continuous();
 105   msg_params.interim_results = params.interimResults();
 106   msg_params.origin_url = params.origin().toString().utf8();
 107   msg_params.render_view_id = routing_id();
 108   msg_params.request_id = GetOrCreateIDForHandle(handle);
 109 #if defined(ENABLE_WEBRTC)
 110   // Fall back to default input when the track is not allowed.
 111   msg_params.using_audio_track = !audio_track_.isNull();
 112 #else
 113   msg_params.using_audio_track = false;
 114 #endif
 115   // The handle mapping will be removed in |OnRecognitionEnd|.
 116   Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));
 117 }
 118
 119 void SpeechRecognitionDispatcher::stop(
 120     const WebSpeechRecognitionHandle& handle,
 121     WebSpeechRecognizerClient* recognizer_client) {
 122   ResetAudioSink();
 123   // Ignore a |stop| issued without a matching |start|.
 124   if (recognizer_client_ != recognizer_client || !HandleExists(handle))
 125     return;
 126   Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
 127       routing_id(), GetOrCreateIDForHandle(handle)));
 128 }
 129
 130 void SpeechRecognitionDispatcher::abort(
 131     const WebSpeechRecognitionHandle& handle,
 132     WebSpeechRecognizerClient* recognizer_client) {
 133   ResetAudioSink();
 134   // Ignore an |abort| issued without a matching |start|.
 135   if (recognizer_client_ != recognizer_client || !HandleExists(handle))
 136     return;
 137   Send(new SpeechRecognitionHostMsg_AbortRequest(
 138       routing_id(), GetOrCreateIDForHandle(handle)));
 139 }
 140
 141 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {
 142   recognizer_client_->didStart(GetHandleFromID(request_id));
 143 }
 144
 145 void SpeechRecognitionDispatcher::OnAudioStarted(int request_id) {
 146   recognizer_client_->didStartAudio(GetHandleFromID(request_id));
 147 }
 148
 149 void SpeechRecognitionDispatcher::OnSoundStarted(int request_id) {
 150   recognizer_client_->didStartSound(GetHandleFromID(request_id));
 151 }
 152
 153 void SpeechRecognitionDispatcher::OnSoundEnded(int request_id) {
 154   recognizer_client_->didEndSound(GetHandleFromID(request_id));
 155 }
 156
 157 void SpeechRecognitionDispatcher::OnAudioEnded(int request_id) {
 158   recognizer_client_->didEndAudio(GetHandleFromID(request_id));
 159 }
 160
 161 static WebSpeechRecognizerClient::ErrorCode WebKitErrorCode(
 162     SpeechRecognitionErrorCode e) {
 163   switch (e) {
 164     case SPEECH_RECOGNITION_ERROR_NONE:
 165       NOTREACHED();
 166       return WebSpeechRecognizerClient::OtherError;
 167     case SPEECH_RECOGNITION_ERROR_NO_SPEECH:
 168       return WebSpeechRecognizerClient::NoSpeechError;
 169     case SPEECH_RECOGNITION_ERROR_ABORTED:
 170       return WebSpeechRecognizerClient::AbortedError;
 171     case SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE:
 172       return WebSpeechRecognizerClient::AudioCaptureError;
 173     case SPEECH_RECOGNITION_ERROR_NETWORK:
 174       return WebSpeechRecognizerClient::NetworkError;
 175     case SPEECH_RECOGNITION_ERROR_NOT_ALLOWED:
 176       return WebSpeechRecognizerClient::NotAllowedError;
 177     case SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED:
 178       return WebSpeechRecognizerClient::ServiceNotAllowedError;
 179     case SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR:
 180       return WebSpeechRecognizerClient::BadGrammarError;
 181     case SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED:
 182       return WebSpeechRecognizerClient::LanguageNotSupportedError;
 183     case SPEECH_RECOGNITION_ERROR_NO_MATCH:
 184       NOTREACHED();
 185       return WebSpeechRecognizerClient::OtherError;
 186   }
 187   NOTREACHED();
 188   return WebSpeechRecognizerClient::OtherError;
 189 }
 190
 191 void SpeechRecognitionDispatcher::OnErrorOccurred(
 192     int request_id, const SpeechRecognitionError& error) {
 193   if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {
 194     recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),
 195                                           WebSpeechRecognitionResult());
 196   } else {
 197     ResetAudioSink();
 198     recognizer_client_->didReceiveError(
 199         GetHandleFromID(request_id),
 200         WebString(),  // TODO(primiano): message?
 201         WebKitErrorCode(error.code));
 202   }
 203 }
 204
 205 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {
 206   // TODO(tommi): It is possible that the handle isn't found in the array if
 207   // the user just refreshed the page. It seems that we then get a notification
 208   // for the previously loaded instance of the page.
 209   HandleMap::iterator iter = handle_map_.find(request_id);
 210   if (iter == handle_map_.end()) {
 211     DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";
 212   } else {
 213     WebSpeechRecognitionHandle handle = iter->second;
 214     // Note: we need to erase the handle from the map *before* calling didEnd.
 215     // didEnd may call back synchronously to start a new recognition session,
 216     // and we don't want to delete the handle from the map after that happens.
 217     handle_map_.erase(request_id);
 218     ResetAudioSink();
 219     recognizer_client_->didEnd(handle);
 220   }
 221 }
 222
 223 void SpeechRecognitionDispatcher::OnResultsRetrieved(
 224     int request_id, const SpeechRecognitionResults& results) {
 225   size_t provisional_count = 0;
 226   SpeechRecognitionResults::const_iterator it = results.begin();
 227   for (; it != results.end(); ++it) {
 228     if (it->is_provisional)
 229       ++provisional_count;
 230   }
 231
 232   WebVector<WebSpeechRecognitionResult> provisional(provisional_count);
 233   WebVector<WebSpeechRecognitionResult> final(
 234       results.size() - provisional_count);
 235
 236   int provisional_index = 0, final_index = 0;
 237   for (it = results.begin(); it != results.end(); ++it) {
 238     const SpeechRecognitionResult& result = (*it);
 239     WebSpeechRecognitionResult* webkit_result = result.is_provisional ?
 240         &provisional[provisional_index++] : &final[final_index++];
 241
 242     const size_t num_hypotheses = result.hypotheses.size();
 243     WebVector<WebString> transcripts(num_hypotheses);
 244     WebVector<float> confidences(num_hypotheses);
 245     for (size_t i = 0; i < num_hypotheses; ++i) {
 246       transcripts[i] = result.hypotheses[i].utterance;
 247       confidences[i] = static_cast<float>(result.hypotheses[i].confidence);
 248     }
 249     webkit_result->assign(transcripts, confidences, !result.is_provisional);
 250   }
 251
 252   recognizer_client_->didReceiveResults(
 253       GetHandleFromID(request_id), final, provisional);
 254 }
 255
 256 void SpeechRecognitionDispatcher::OnAudioReceiverReady(
 257     int request_id,
 258     const media::AudioParameters& params,
 259     const base::SharedMemoryHandle memory,
 260     const base::SyncSocket::TransitDescriptor descriptor) {
 261 #if defined(ENABLE_WEBRTC)
 262   DCHECK(!speech_audio_sink_.get());
 263   if (audio_track_.isNull()) {
 264     ResetAudioSink();
 265     return;
 266   }
 267
 268   // The instantiation and type of SyncSocket is up to the client since it
 269   // is dependency injected to the SpeechRecognitionAudioSink.
 270   scoped_ptr<base::SyncSocket> socket(new base::CancelableSyncSocket(
 271       base::SyncSocket::UnwrapHandle(descriptor)));
 272
 273   speech_audio_sink_.reset(new SpeechRecognitionAudioSink(
 274       audio_track_, params, memory, socket.Pass(),
 275       base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink,
 276                  base::Unretained(this))));
 277 #endif
 278 }
 279
 280 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
 281     const WebSpeechRecognitionHandle& handle) {
 282   // Search first for an existing mapping.
 283   for (HandleMap::iterator iter = handle_map_.begin();
 284       iter != handle_map_.end();
 285       ++iter) {
 286     if (iter->second.equals(handle))
 287       return iter->first;
 288   }
 289   // If no existing mapping found, create a new one.
 290   const int new_id = next_id_;
 291   handle_map_[new_id] = handle;
 292   ++next_id_;
 293   return new_id;
 294 }
 295
 296 bool SpeechRecognitionDispatcher::HandleExists(
 297     const WebSpeechRecognitionHandle& handle) {
 298   for (HandleMap::iterator iter = handle_map_.begin();
 299       iter != handle_map_.end();
 300       ++iter) {
 301     if (iter->second.equals(handle))
 302       return true;
 303   }
 304   return false;
 305 }
 306
 307 void SpeechRecognitionDispatcher::ResetAudioSink() {
 308 #if defined(ENABLE_WEBRTC)
 309   speech_audio_sink_.reset();
 310 #endif
 311 }
 312
 313 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(
 314     int request_id) {
 315   HandleMap::iterator iter = handle_map_.find(request_id);
 316   DCHECK(iter != handle_map_.end());
 317   return iter->second;
 318 }
 319
 320 }  // namespace content