content/renderer/speech_recognition_dispatcher.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/renderer/speech_recognition_dispatcher.h"
   6
   7 #include "base/basictypes.h"
   8 #include "base/strings/utf_string_conversions.h"
   9 #include "content/common/speech_recognition_messages.h"
  10 #include "content/renderer/render_view_impl.h"
  11 #include "third_party/WebKit/public/platform/WebString.h"
  12 #include "third_party/WebKit/public/platform/WebVector.h"
  13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"
  14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"
  15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"
  16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"
  17
  18 #if defined(ENABLE_WEBRTC)
  19 #include "content/renderer/media/speech_recognition_audio_sink.h"
  20 #endif
  21
  22 using blink::WebVector;
  23 using blink::WebString;
  24 using blink::WebSpeechGrammar;
  25 using blink::WebSpeechRecognitionHandle;
  26 using blink::WebSpeechRecognitionResult;
  27 using blink::WebSpeechRecognitionParams;
  28 using blink::WebSpeechRecognizerClient;
  29
  30 namespace content {
  31
  32 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
  33     RenderViewImpl* render_view)
  34     : RenderViewObserver(render_view),
  35       recognizer_client_(NULL),
  36       next_id_(1) {}
  37
  38 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {}
  39
  40 void SpeechRecognitionDispatcher::AbortAllRecognitions() {
  41   ResetAudioSink();
  42   Send(new SpeechRecognitionHostMsg_AbortAllRequests(
  43       routing_id()));
  44 }
  45
  46 bool SpeechRecognitionDispatcher::OnMessageReceived(
  47     const IPC::Message& message) {
  48   bool handled = true;
  49   IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)
  50     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)
  51     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)
  52     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)
  53     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)
  54     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)
  55     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)
  56     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)
  57     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,
  58                         OnResultsRetrieved)
  59     IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioReceiverReady,
  60                         OnAudioReceiverReady)
  61     IPC_MESSAGE_UNHANDLED(handled = false)
  62   IPC_END_MESSAGE_MAP()
  63   return handled;
  64 }
  65
  66 void SpeechRecognitionDispatcher::start(
  67     const WebSpeechRecognitionHandle& handle,
  68     const WebSpeechRecognitionParams& params,
  69     WebSpeechRecognizerClient* recognizer_client) {
  70   DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client);
  71   recognizer_client_ = recognizer_client;
  72
  73 #if defined(ENABLE_WEBRTC)
  74   const blink::WebMediaStreamTrack track = params.audioTrack();
  75   if (!track.isNull()) {
  76     // Check if this type of track is allowed by implemented policy.
  77     if (SpeechRecognitionAudioSink::IsSupportedTrack(track)) {
  78       audio_track_.assign(track);
  79     } else {
  80       audio_track_.reset();
  81       // Notify user that the track used is not supported.
  82       recognizer_client_->didReceiveError(
  83           handle,
  84           WebString("Provided audioTrack is not supported."),
  85           WebSpeechRecognizerClient::AudioCaptureError);
  86
  87       return;
  88     }
  89   }
  90
  91   // Destroy any previous instance to detach from the audio track.
  92   // Each new session should reinstantiate the provider once the track is ready.
  93   ResetAudioSink();
  94 #endif
  95
  96   SpeechRecognitionHostMsg_StartRequest_Params msg_params;
  97   for (size_t i = 0; i < params.grammars().size(); ++i) {
  98     const WebSpeechGrammar& grammar = params.grammars()[i];
  99     msg_params.grammars.push_back(
 100         SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));
 101   }
 102   msg_params.language = base::UTF16ToUTF8(params.language());
 103   msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());
 104   msg_params.continuous = params.continuous();
 105   msg_params.interim_results = params.interimResults();
 106   msg_params.origin_url = params.origin().toString().utf8();
 107   msg_params.render_view_id = routing_id();
 108   msg_params.request_id = GetOrCreateIDForHandle(handle);
 109 #if defined(ENABLE_WEBRTC)
 110   // Fall back to default input when the track is not allowed.
 111   msg_params.using_audio_track = !audio_track_.isNull();
 112 #else
 113   msg_params.using_audio_track = false;
 114 #endif
 115   // The handle mapping will be removed in |OnRecognitionEnd|.
 116   Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));
 117 }
 118
 119 void SpeechRecognitionDispatcher::stop(
 120     const WebSpeechRecognitionHandle& handle,
 121     WebSpeechRecognizerClient* recognizer_client) {
 122   ResetAudioSink();
 123   // Ignore a |stop| issued without a matching |start|.
 124   if (recognizer_client_ != recognizer_client || !HandleExists(handle))
 125     return;
 126   Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
 127       routing_id(), GetOrCreateIDForHandle(handle)));
 128 }
 129
 130 void SpeechRecognitionDispatcher::abort(
 131     const WebSpeechRecognitionHandle& handle,
 132     WebSpeechRecognizerClient* recognizer_client) {
 133   ResetAudioSink();
 134   // Ignore an |abort| issued without a matching |start|.
 135   if (recognizer_client_ != recognizer_client || !HandleExists(handle))
 136     return;
 137   Send(new SpeechRecognitionHostMsg_AbortRequest(
 138       routing_id(), GetOrCreateIDForHandle(handle)));
 139 }
 140
 141 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {
 142   recognizer_client_->didStart(GetHandleFromID(request_id));
 143 }
 144
 145 void SpeechRecognitionDispatcher::OnAudioStarted(int request_id) {
 146   recognizer_client_->didStartAudio(GetHandleFromID(request_id));
 147 }
 148
 149 void SpeechRecognitionDispatcher::OnSoundStarted(int request_id) {
 150   recognizer_client_->didStartSound(GetHandleFromID(request_id));
 151 }
 152
 153 void SpeechRecognitionDispatcher::OnSoundEnded(int request_id) {
 154   recognizer_client_->didEndSound(GetHandleFromID(request_id));
 155 }
 156
 157 void SpeechRecognitionDispatcher::OnAudioEnded(int request_id) {
 158   recognizer_client_->didEndAudio(GetHandleFromID(request_id));
 159 }
 160
 161 static WebSpeechRecognizerClient::ErrorCode WebKitErrorCode(
 162     SpeechRecognitionErrorCode e) {
 163   switch (e) {
 164     case SPEECH_RECOGNITION_ERROR_NONE:
 165       NOTREACHED();
 166       return WebSpeechRecognizerClient::OtherError;
 167     case SPEECH_RECOGNITION_ERROR_ABORTED:
 168       return WebSpeechRecognizerClient::AbortedError;
 169     case SPEECH_RECOGNITION_ERROR_AUDIO:
 170       return WebSpeechRecognizerClient::AudioCaptureError;
 171     case SPEECH_RECOGNITION_ERROR_NETWORK:
 172       return WebSpeechRecognizerClient::NetworkError;
 173     case SPEECH_RECOGNITION_ERROR_NOT_ALLOWED:
 174       return WebSpeechRecognizerClient::NotAllowedError;
 175     case SPEECH_RECOGNITION_ERROR_NO_SPEECH:
 176       return WebSpeechRecognizerClient::NoSpeechError;
 177     case SPEECH_RECOGNITION_ERROR_NO_MATCH:
 178       NOTREACHED();
 179       return WebSpeechRecognizerClient::OtherError;
 180     case SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR:
 181       return WebSpeechRecognizerClient::BadGrammarError;
 182   }
 183   NOTREACHED();
 184   return WebSpeechRecognizerClient::OtherError;
 185 }
 186
 187 void SpeechRecognitionDispatcher::OnErrorOccurred(
 188     int request_id, const SpeechRecognitionError& error) {
 189   if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {
 190     recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),
 191                                           WebSpeechRecognitionResult());
 192   } else {
 193     ResetAudioSink();
 194     recognizer_client_->didReceiveError(
 195         GetHandleFromID(request_id),
 196         WebString(),  // TODO(primiano): message?
 197         WebKitErrorCode(error.code));
 198   }
 199 }
 200
 201 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {
 202   // TODO(tommi): It is possible that the handle isn't found in the array if
 203   // the user just refreshed the page. It seems that we then get a notification
 204   // for the previously loaded instance of the page.
 205   HandleMap::iterator iter = handle_map_.find(request_id);
 206   if (iter == handle_map_.end()) {
 207     DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";
 208   } else {
 209     WebSpeechRecognitionHandle handle = iter->second;
 210     // Note: we need to erase the handle from the map *before* calling didEnd.
 211     // didEnd may call back synchronously to start a new recognition session,
 212     // and we don't want to delete the handle from the map after that happens.
 213     handle_map_.erase(request_id);
 214     ResetAudioSink();
 215     recognizer_client_->didEnd(handle);
 216   }
 217 }
 218
 219 void SpeechRecognitionDispatcher::OnResultsRetrieved(
 220     int request_id, const SpeechRecognitionResults& results) {
 221   size_t provisional_count = 0;
 222   SpeechRecognitionResults::const_iterator it = results.begin();
 223   for (; it != results.end(); ++it) {
 224     if (it->is_provisional)
 225       ++provisional_count;
 226   }
 227
 228   WebVector<WebSpeechRecognitionResult> provisional(provisional_count);
 229   WebVector<WebSpeechRecognitionResult> final(
 230       results.size() - provisional_count);
 231
 232   int provisional_index = 0, final_index = 0;
 233   for (it = results.begin(); it != results.end(); ++it) {
 234     const SpeechRecognitionResult& result = (*it);
 235     WebSpeechRecognitionResult* webkit_result = result.is_provisional ?
 236         &provisional[provisional_index++] : &final[final_index++];
 237
 238     const size_t num_hypotheses = result.hypotheses.size();
 239     WebVector<WebString> transcripts(num_hypotheses);
 240     WebVector<float> confidences(num_hypotheses);
 241     for (size_t i = 0; i < num_hypotheses; ++i) {
 242       transcripts[i] = result.hypotheses[i].utterance;
 243       confidences[i] = static_cast<float>(result.hypotheses[i].confidence);
 244     }
 245     webkit_result->assign(transcripts, confidences, !result.is_provisional);
 246   }
 247
 248   recognizer_client_->didReceiveResults(
 249       GetHandleFromID(request_id), final, provisional);
 250 }
 251
 252 void SpeechRecognitionDispatcher::OnAudioReceiverReady(
 253     int request_id,
 254     const media::AudioParameters& params,
 255     const base::SharedMemoryHandle memory,
 256     const base::SyncSocket::TransitDescriptor descriptor) {
 257 #if defined(ENABLE_WEBRTC)
 258   DCHECK(!speech_audio_sink_.get());
 259   if (audio_track_.isNull()) {
 260     ResetAudioSink();
 261     return;
 262   }
 263
 264   // The instantiation and type of SyncSocket is up to the client since it
 265   // is dependency injected to the SpeechRecognitionAudioSink.
 266   scoped_ptr<base::SyncSocket> socket(new base::CancelableSyncSocket(
 267       base::SyncSocket::UnwrapHandle(descriptor)));
 268
 269   speech_audio_sink_.reset(new SpeechRecognitionAudioSink(
 270       audio_track_, params, memory, socket.Pass(),
 271       base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink,
 272                  base::Unretained(this))));
 273 #endif
 274 }
 275
 276 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
 277     const WebSpeechRecognitionHandle& handle) {
 278   // Search first for an existing mapping.
 279   for (HandleMap::iterator iter = handle_map_.begin();
 280       iter != handle_map_.end();
 281       ++iter) {
 282     if (iter->second.equals(handle))
 283       return iter->first;
 284   }
 285   // If no existing mapping found, create a new one.
 286   const int new_id = next_id_;
 287   handle_map_[new_id] = handle;
 288   ++next_id_;
 289   return new_id;
 290 }
 291
 292 bool SpeechRecognitionDispatcher::HandleExists(
 293     const WebSpeechRecognitionHandle& handle) {
 294   for (HandleMap::iterator iter = handle_map_.begin();
 295       iter != handle_map_.end();
 296       ++iter) {
 297     if (iter->second.equals(handle))
 298       return true;
 299   }
 300   return false;
 301 }
 302
 303 void SpeechRecognitionDispatcher::ResetAudioSink() {
 304 #if defined(ENABLE_WEBRTC)
 305   speech_audio_sink_.reset();
 306 #endif
 307 }
 308
 309 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(
 310     int request_id) {
 311   HandleMap::iterator iter = handle_map_.find(request_id);
 312   DCHECK(iter != handle_map_.end());
 313   return iter->second;
 314 }
 315
 316 }  // namespace content