1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/speech_recognition_dispatcher.h"
7 #include "base/basictypes.h"
8 #include "base/strings/utf_string_conversions.h"
9 #include "content/common/speech_recognition_messages.h"
10 #include "content/renderer/render_view_impl.h"
11 #include "third_party/WebKit/public/platform/WebString.h"
12 #include "third_party/WebKit/public/platform/WebVector.h"
13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"
14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"
15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"
16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"
18 #if defined(ENABLE_WEBRTC)
19 #include "content/renderer/media/speech_recognition_audio_sink.h"
22 using blink::WebVector
;
23 using blink::WebString
;
24 using blink::WebSpeechGrammar
;
25 using blink::WebSpeechRecognitionHandle
;
26 using blink::WebSpeechRecognitionResult
;
27 using blink::WebSpeechRecognitionParams
;
28 using blink::WebSpeechRecognizerClient
;
32 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
33 RenderViewImpl
* render_view
)
34 : RenderViewObserver(render_view
),
35 recognizer_client_(NULL
),
38 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {}
40 void SpeechRecognitionDispatcher::AbortAllRecognitions() {
42 Send(new SpeechRecognitionHostMsg_AbortAllRequests(
46 bool SpeechRecognitionDispatcher::OnMessageReceived(
47 const IPC::Message
& message
) {
49 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher
, message
)
50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started
, OnRecognitionStarted
)
51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted
, OnAudioStarted
)
52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted
, OnSoundStarted
)
53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded
, OnSoundEnded
)
54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded
, OnAudioEnded
)
55 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred
, OnErrorOccurred
)
56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended
, OnRecognitionEnded
)
57 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved
,
59 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioReceiverReady
,
61 IPC_MESSAGE_UNHANDLED(handled
= false)
66 void SpeechRecognitionDispatcher::start(
67 const WebSpeechRecognitionHandle
& handle
,
68 const WebSpeechRecognitionParams
& params
,
69 WebSpeechRecognizerClient
* recognizer_client
) {
70 DCHECK(!recognizer_client_
|| recognizer_client_
== recognizer_client
);
71 recognizer_client_
= recognizer_client
;
73 #if defined(ENABLE_WEBRTC)
74 const blink::WebMediaStreamTrack track
= params
.audioTrack();
75 if (!track
.isNull()) {
76 // Check if this type of track is allowed by implemented policy.
77 if (SpeechRecognitionAudioSink::IsSupportedTrack(track
)) {
78 audio_track_
.assign(track
);
81 // Notify user that the track used is not supported.
82 recognizer_client_
->didReceiveError(
84 WebString("Provided audioTrack is not supported."),
85 WebSpeechRecognizerClient::AudioCaptureError
);
91 // Destroy any previous instance to detach from the audio track.
92 // Each new session should reinstantiate the provider once the track is ready.
96 SpeechRecognitionHostMsg_StartRequest_Params msg_params
;
97 for (size_t i
= 0; i
< params
.grammars().size(); ++i
) {
98 const WebSpeechGrammar
& grammar
= params
.grammars()[i
];
99 msg_params
.grammars
.push_back(
100 SpeechRecognitionGrammar(grammar
.src().spec(), grammar
.weight()));
102 msg_params
.language
= base::UTF16ToUTF8(params
.language());
103 msg_params
.max_hypotheses
= static_cast<uint32
>(params
.maxAlternatives());
104 msg_params
.continuous
= params
.continuous();
105 msg_params
.interim_results
= params
.interimResults();
106 msg_params
.origin_url
= params
.origin().toString().utf8();
107 msg_params
.render_view_id
= routing_id();
108 msg_params
.request_id
= GetOrCreateIDForHandle(handle
);
109 #if defined(ENABLE_WEBRTC)
110 // Fall back to default input when the track is not allowed.
111 msg_params
.using_audio_track
= !audio_track_
.isNull();
113 msg_params
.using_audio_track
= false;
115 // The handle mapping will be removed in |OnRecognitionEnd|.
116 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params
));
119 void SpeechRecognitionDispatcher::stop(
120 const WebSpeechRecognitionHandle
& handle
,
121 WebSpeechRecognizerClient
* recognizer_client
) {
123 // Ignore a |stop| issued without a matching |start|.
124 if (recognizer_client_
!= recognizer_client
|| !HandleExists(handle
))
126 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
127 routing_id(), GetOrCreateIDForHandle(handle
)));
130 void SpeechRecognitionDispatcher::abort(
131 const WebSpeechRecognitionHandle
& handle
,
132 WebSpeechRecognizerClient
* recognizer_client
) {
134 // Ignore an |abort| issued without a matching |start|.
135 if (recognizer_client_
!= recognizer_client
|| !HandleExists(handle
))
137 Send(new SpeechRecognitionHostMsg_AbortRequest(
138 routing_id(), GetOrCreateIDForHandle(handle
)));
141 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id
) {
142 recognizer_client_
->didStart(GetHandleFromID(request_id
));
145 void SpeechRecognitionDispatcher::OnAudioStarted(int request_id
) {
146 recognizer_client_
->didStartAudio(GetHandleFromID(request_id
));
149 void SpeechRecognitionDispatcher::OnSoundStarted(int request_id
) {
150 recognizer_client_
->didStartSound(GetHandleFromID(request_id
));
153 void SpeechRecognitionDispatcher::OnSoundEnded(int request_id
) {
154 recognizer_client_
->didEndSound(GetHandleFromID(request_id
));
157 void SpeechRecognitionDispatcher::OnAudioEnded(int request_id
) {
158 recognizer_client_
->didEndAudio(GetHandleFromID(request_id
));
161 static WebSpeechRecognizerClient::ErrorCode
WebKitErrorCode(
162 SpeechRecognitionErrorCode e
) {
164 case SPEECH_RECOGNITION_ERROR_NONE
:
166 return WebSpeechRecognizerClient::OtherError
;
167 case SPEECH_RECOGNITION_ERROR_ABORTED
:
168 return WebSpeechRecognizerClient::AbortedError
;
169 case SPEECH_RECOGNITION_ERROR_AUDIO
:
170 return WebSpeechRecognizerClient::AudioCaptureError
;
171 case SPEECH_RECOGNITION_ERROR_NETWORK
:
172 return WebSpeechRecognizerClient::NetworkError
;
173 case SPEECH_RECOGNITION_ERROR_NOT_ALLOWED
:
174 return WebSpeechRecognizerClient::NotAllowedError
;
175 case SPEECH_RECOGNITION_ERROR_NO_SPEECH
:
176 return WebSpeechRecognizerClient::NoSpeechError
;
177 case SPEECH_RECOGNITION_ERROR_NO_MATCH
:
179 return WebSpeechRecognizerClient::OtherError
;
180 case SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR
:
181 return WebSpeechRecognizerClient::BadGrammarError
;
184 return WebSpeechRecognizerClient::OtherError
;
187 void SpeechRecognitionDispatcher::OnErrorOccurred(
188 int request_id
, const SpeechRecognitionError
& error
) {
189 if (error
.code
== SPEECH_RECOGNITION_ERROR_NO_MATCH
) {
190 recognizer_client_
->didReceiveNoMatch(GetHandleFromID(request_id
),
191 WebSpeechRecognitionResult());
194 recognizer_client_
->didReceiveError(
195 GetHandleFromID(request_id
),
196 WebString(), // TODO(primiano): message?
197 WebKitErrorCode(error
.code
));
201 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id
) {
202 // TODO(tommi): It is possible that the handle isn't found in the array if
203 // the user just refreshed the page. It seems that we then get a notification
204 // for the previously loaded instance of the page.
205 HandleMap::iterator iter
= handle_map_
.find(request_id
);
206 if (iter
== handle_map_
.end()) {
207 DLOG(ERROR
) << "OnRecognitionEnded called for a handle that doesn't exist";
209 WebSpeechRecognitionHandle handle
= iter
->second
;
210 // Note: we need to erase the handle from the map *before* calling didEnd.
211 // didEnd may call back synchronously to start a new recognition session,
212 // and we don't want to delete the handle from the map after that happens.
213 handle_map_
.erase(request_id
);
215 recognizer_client_
->didEnd(handle
);
219 void SpeechRecognitionDispatcher::OnResultsRetrieved(
220 int request_id
, const SpeechRecognitionResults
& results
) {
221 size_t provisional_count
= 0;
222 SpeechRecognitionResults::const_iterator it
= results
.begin();
223 for (; it
!= results
.end(); ++it
) {
224 if (it
->is_provisional
)
228 WebVector
<WebSpeechRecognitionResult
> provisional(provisional_count
);
229 WebVector
<WebSpeechRecognitionResult
> final(
230 results
.size() - provisional_count
);
232 int provisional_index
= 0, final_index
= 0;
233 for (it
= results
.begin(); it
!= results
.end(); ++it
) {
234 const SpeechRecognitionResult
& result
= (*it
);
235 WebSpeechRecognitionResult
* webkit_result
= result
.is_provisional
?
236 &provisional
[provisional_index
++] : &final
[final_index
++];
238 const size_t num_hypotheses
= result
.hypotheses
.size();
239 WebVector
<WebString
> transcripts(num_hypotheses
);
240 WebVector
<float> confidences(num_hypotheses
);
241 for (size_t i
= 0; i
< num_hypotheses
; ++i
) {
242 transcripts
[i
] = result
.hypotheses
[i
].utterance
;
243 confidences
[i
] = static_cast<float>(result
.hypotheses
[i
].confidence
);
245 webkit_result
->assign(transcripts
, confidences
, !result
.is_provisional
);
248 recognizer_client_
->didReceiveResults(
249 GetHandleFromID(request_id
), final
, provisional
);
252 void SpeechRecognitionDispatcher::OnAudioReceiverReady(
254 const media::AudioParameters
& params
,
255 const base::SharedMemoryHandle memory
,
256 const base::SyncSocket::TransitDescriptor descriptor
) {
257 #if defined(ENABLE_WEBRTC)
258 DCHECK(!speech_audio_sink_
.get());
259 if (audio_track_
.isNull()) {
264 // The instantiation and type of SyncSocket is up to the client since it
265 // is dependency injected to the SpeechRecognitionAudioSink.
266 scoped_ptr
<base::SyncSocket
> socket(new base::CancelableSyncSocket(
267 base::SyncSocket::UnwrapHandle(descriptor
)));
269 speech_audio_sink_
.reset(new SpeechRecognitionAudioSink(
270 audio_track_
, params
, memory
, socket
.Pass(),
271 base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink
,
272 base::Unretained(this))));
276 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
277 const WebSpeechRecognitionHandle
& handle
) {
278 // Search first for an existing mapping.
279 for (HandleMap::iterator iter
= handle_map_
.begin();
280 iter
!= handle_map_
.end();
282 if (iter
->second
.equals(handle
))
285 // If no existing mapping found, create a new one.
286 const int new_id
= next_id_
;
287 handle_map_
[new_id
] = handle
;
292 bool SpeechRecognitionDispatcher::HandleExists(
293 const WebSpeechRecognitionHandle
& handle
) {
294 for (HandleMap::iterator iter
= handle_map_
.begin();
295 iter
!= handle_map_
.end();
297 if (iter
->second
.equals(handle
))
303 void SpeechRecognitionDispatcher::ResetAudioSink() {
304 #if defined(ENABLE_WEBRTC)
305 speech_audio_sink_
.reset();
309 const WebSpeechRecognitionHandle
& SpeechRecognitionDispatcher::GetHandleFromID(
311 HandleMap::iterator iter
= handle_map_
.find(request_id
);
312 DCHECK(iter
!= handle_map_
.end());
316 } // namespace content