Roll src/third_party/WebKit a452221:9ff6d11 (svn 202117:202119)
[chromium-blink-merge.git] / content / renderer / speech_recognition_dispatcher.cc
blob67d9fb37823cc4ccd77308bfdc21372ca141c64d
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/speech_recognition_dispatcher.h"
7 #include "base/basictypes.h"
8 #include "base/strings/utf_string_conversions.h"
9 #include "content/common/speech_recognition_messages.h"
10 #include "content/renderer/render_view_impl.h"
11 #include "third_party/WebKit/public/platform/WebString.h"
12 #include "third_party/WebKit/public/platform/WebVector.h"
13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"
14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"
15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"
16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"
18 #if defined(ENABLE_WEBRTC)
19 #include "content/renderer/media/speech_recognition_audio_sink.h"
20 #endif
22 using blink::WebVector;
23 using blink::WebString;
24 using blink::WebSpeechGrammar;
25 using blink::WebSpeechRecognitionHandle;
26 using blink::WebSpeechRecognitionResult;
27 using blink::WebSpeechRecognitionParams;
28 using blink::WebSpeechRecognizerClient;
30 namespace content {
32 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
33 RenderViewImpl* render_view)
34 : RenderViewObserver(render_view),
35 recognizer_client_(NULL),
36 next_id_(1) {}
38 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {}
40 void SpeechRecognitionDispatcher::AbortAllRecognitions() {
41 ResetAudioSink();
42 Send(new SpeechRecognitionHostMsg_AbortAllRequests(
43 routing_id()));
46 bool SpeechRecognitionDispatcher::OnMessageReceived(
47 const IPC::Message& message) {
48 bool handled = true;
49 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)
50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)
51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)
52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)
53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)
54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)
55 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)
56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)
57 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,
58 OnResultsRetrieved)
59 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioReceiverReady,
60 OnAudioReceiverReady)
61 IPC_MESSAGE_UNHANDLED(handled = false)
62 IPC_END_MESSAGE_MAP()
63 return handled;
66 void SpeechRecognitionDispatcher::start(
67 const WebSpeechRecognitionHandle& handle,
68 const WebSpeechRecognitionParams& params,
69 WebSpeechRecognizerClient* recognizer_client) {
70 DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client);
71 recognizer_client_ = recognizer_client;
73 #if defined(ENABLE_WEBRTC)
74 const blink::WebMediaStreamTrack track = params.audioTrack();
75 if (!track.isNull()) {
76 // Check if this type of track is allowed by implemented policy.
77 if (SpeechRecognitionAudioSink::IsSupportedTrack(track)) {
78 audio_track_.assign(track);
79 } else {
80 audio_track_.reset();
81 // Notify user that the track used is not supported.
82 recognizer_client_->didReceiveError(
83 handle,
84 WebString("Provided audioTrack is not supported."),
85 WebSpeechRecognizerClient::AudioCaptureError);
87 return;
91 // Destroy any previous instance to detach from the audio track.
92 // Each new session should reinstantiate the provider once the track is ready.
93 ResetAudioSink();
94 #endif
96 SpeechRecognitionHostMsg_StartRequest_Params msg_params;
97 for (size_t i = 0; i < params.grammars().size(); ++i) {
98 const WebSpeechGrammar& grammar = params.grammars()[i];
99 msg_params.grammars.push_back(
100 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));
102 msg_params.language =
103 base::UTF16ToUTF8(base::StringPiece16(params.language()));
104 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());
105 msg_params.continuous = params.continuous();
106 msg_params.interim_results = params.interimResults();
107 msg_params.origin_url = params.origin().toString().utf8();
108 msg_params.render_view_id = routing_id();
109 msg_params.request_id = GetOrCreateIDForHandle(handle);
110 #if defined(ENABLE_WEBRTC)
111 // Fall back to default input when the track is not allowed.
112 msg_params.using_audio_track = !audio_track_.isNull();
113 #else
114 msg_params.using_audio_track = false;
115 #endif
116 // The handle mapping will be removed in |OnRecognitionEnd|.
117 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));
120 void SpeechRecognitionDispatcher::stop(
121 const WebSpeechRecognitionHandle& handle,
122 WebSpeechRecognizerClient* recognizer_client) {
123 ResetAudioSink();
124 // Ignore a |stop| issued without a matching |start|.
125 if (recognizer_client_ != recognizer_client || !HandleExists(handle))
126 return;
127 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
128 routing_id(), GetOrCreateIDForHandle(handle)));
131 void SpeechRecognitionDispatcher::abort(
132 const WebSpeechRecognitionHandle& handle,
133 WebSpeechRecognizerClient* recognizer_client) {
134 ResetAudioSink();
135 // Ignore an |abort| issued without a matching |start|.
136 if (recognizer_client_ != recognizer_client || !HandleExists(handle))
137 return;
138 Send(new SpeechRecognitionHostMsg_AbortRequest(
139 routing_id(), GetOrCreateIDForHandle(handle)));
142 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {
143 recognizer_client_->didStart(GetHandleFromID(request_id));
146 void SpeechRecognitionDispatcher::OnAudioStarted(int request_id) {
147 recognizer_client_->didStartAudio(GetHandleFromID(request_id));
150 void SpeechRecognitionDispatcher::OnSoundStarted(int request_id) {
151 recognizer_client_->didStartSound(GetHandleFromID(request_id));
154 void SpeechRecognitionDispatcher::OnSoundEnded(int request_id) {
155 recognizer_client_->didEndSound(GetHandleFromID(request_id));
158 void SpeechRecognitionDispatcher::OnAudioEnded(int request_id) {
159 recognizer_client_->didEndAudio(GetHandleFromID(request_id));
162 static WebSpeechRecognizerClient::ErrorCode WebKitErrorCode(
163 SpeechRecognitionErrorCode e) {
164 switch (e) {
165 case SPEECH_RECOGNITION_ERROR_NONE:
166 NOTREACHED();
167 return WebSpeechRecognizerClient::OtherError;
168 case SPEECH_RECOGNITION_ERROR_NO_SPEECH:
169 return WebSpeechRecognizerClient::NoSpeechError;
170 case SPEECH_RECOGNITION_ERROR_ABORTED:
171 return WebSpeechRecognizerClient::AbortedError;
172 case SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE:
173 return WebSpeechRecognizerClient::AudioCaptureError;
174 case SPEECH_RECOGNITION_ERROR_NETWORK:
175 return WebSpeechRecognizerClient::NetworkError;
176 case SPEECH_RECOGNITION_ERROR_NOT_ALLOWED:
177 return WebSpeechRecognizerClient::NotAllowedError;
178 case SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED:
179 return WebSpeechRecognizerClient::ServiceNotAllowedError;
180 case SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR:
181 return WebSpeechRecognizerClient::BadGrammarError;
182 case SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED:
183 return WebSpeechRecognizerClient::LanguageNotSupportedError;
184 case SPEECH_RECOGNITION_ERROR_NO_MATCH:
185 NOTREACHED();
186 return WebSpeechRecognizerClient::OtherError;
188 NOTREACHED();
189 return WebSpeechRecognizerClient::OtherError;
192 void SpeechRecognitionDispatcher::OnErrorOccurred(
193 int request_id, const SpeechRecognitionError& error) {
194 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {
195 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),
196 WebSpeechRecognitionResult());
197 } else {
198 ResetAudioSink();
199 recognizer_client_->didReceiveError(
200 GetHandleFromID(request_id),
201 WebString(), // TODO(primiano): message?
202 WebKitErrorCode(error.code));
206 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {
207 // TODO(tommi): It is possible that the handle isn't found in the array if
208 // the user just refreshed the page. It seems that we then get a notification
209 // for the previously loaded instance of the page.
210 HandleMap::iterator iter = handle_map_.find(request_id);
211 if (iter == handle_map_.end()) {
212 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";
213 } else {
214 WebSpeechRecognitionHandle handle = iter->second;
215 // Note: we need to erase the handle from the map *before* calling didEnd.
216 // didEnd may call back synchronously to start a new recognition session,
217 // and we don't want to delete the handle from the map after that happens.
218 handle_map_.erase(request_id);
219 ResetAudioSink();
220 recognizer_client_->didEnd(handle);
224 void SpeechRecognitionDispatcher::OnResultsRetrieved(
225 int request_id, const SpeechRecognitionResults& results) {
226 size_t provisional_count = 0;
227 SpeechRecognitionResults::const_iterator it = results.begin();
228 for (; it != results.end(); ++it) {
229 if (it->is_provisional)
230 ++provisional_count;
233 WebVector<WebSpeechRecognitionResult> provisional(provisional_count);
234 WebVector<WebSpeechRecognitionResult> final(
235 results.size() - provisional_count);
237 int provisional_index = 0, final_index = 0;
238 for (it = results.begin(); it != results.end(); ++it) {
239 const SpeechRecognitionResult& result = (*it);
240 WebSpeechRecognitionResult* webkit_result = result.is_provisional ?
241 &provisional[provisional_index++] : &final[final_index++];
243 const size_t num_hypotheses = result.hypotheses.size();
244 WebVector<WebString> transcripts(num_hypotheses);
245 WebVector<float> confidences(num_hypotheses);
246 for (size_t i = 0; i < num_hypotheses; ++i) {
247 transcripts[i] = result.hypotheses[i].utterance;
248 confidences[i] = static_cast<float>(result.hypotheses[i].confidence);
250 webkit_result->assign(transcripts, confidences, !result.is_provisional);
253 recognizer_client_->didReceiveResults(
254 GetHandleFromID(request_id), final, provisional);
257 void SpeechRecognitionDispatcher::OnAudioReceiverReady(
258 int request_id,
259 const media::AudioParameters& params,
260 const base::SharedMemoryHandle memory,
261 const base::SyncSocket::TransitDescriptor descriptor) {
262 #if defined(ENABLE_WEBRTC)
263 DCHECK(!speech_audio_sink_.get());
264 if (audio_track_.isNull()) {
265 ResetAudioSink();
266 return;
269 // The instantiation and type of SyncSocket is up to the client since it
270 // is dependency injected to the SpeechRecognitionAudioSink.
271 scoped_ptr<base::SyncSocket> socket(new base::CancelableSyncSocket(
272 base::SyncSocket::UnwrapHandle(descriptor)));
274 speech_audio_sink_.reset(new SpeechRecognitionAudioSink(
275 audio_track_, params, memory, socket.Pass(),
276 base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink,
277 base::Unretained(this))));
278 #endif
281 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
282 const WebSpeechRecognitionHandle& handle) {
283 // Search first for an existing mapping.
284 for (HandleMap::iterator iter = handle_map_.begin();
285 iter != handle_map_.end();
286 ++iter) {
287 if (iter->second.equals(handle))
288 return iter->first;
290 // If no existing mapping found, create a new one.
291 const int new_id = next_id_;
292 handle_map_[new_id] = handle;
293 ++next_id_;
294 return new_id;
297 bool SpeechRecognitionDispatcher::HandleExists(
298 const WebSpeechRecognitionHandle& handle) {
299 for (HandleMap::iterator iter = handle_map_.begin();
300 iter != handle_map_.end();
301 ++iter) {
302 if (iter->second.equals(handle))
303 return true;
305 return false;
308 void SpeechRecognitionDispatcher::ResetAudioSink() {
309 #if defined(ENABLE_WEBRTC)
310 speech_audio_sink_.reset();
311 #endif
314 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(
315 int request_id) {
316 HandleMap::iterator iter = handle_map_.find(request_id);
317 CHECK(iter != handle_map_.end());
318 return iter->second;
321 } // namespace content