1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/speech_recognition_dispatcher.h"
7 #include "base/basictypes.h"
8 #include "base/strings/utf_string_conversions.h"
9 #include "content/common/speech_recognition_messages.h"
10 #include "content/renderer/render_view_impl.h"
11 #include "third_party/WebKit/public/platform/WebString.h"
12 #include "third_party/WebKit/public/platform/WebVector.h"
13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"
14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"
15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"
16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"
18 #if defined(ENABLE_WEBRTC)
19 #include "content/renderer/media/speech_recognition_audio_sink.h"
22 using blink::WebVector
;
23 using blink::WebString
;
24 using blink::WebSpeechGrammar
;
25 using blink::WebSpeechRecognitionHandle
;
26 using blink::WebSpeechRecognitionResult
;
27 using blink::WebSpeechRecognitionParams
;
28 using blink::WebSpeechRecognizerClient
;
32 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
33 RenderViewImpl
* render_view
)
34 : RenderViewObserver(render_view
),
35 recognizer_client_(NULL
),
38 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {}
40 void SpeechRecognitionDispatcher::AbortAllRecognitions() {
42 Send(new SpeechRecognitionHostMsg_AbortAllRequests(
46 bool SpeechRecognitionDispatcher::OnMessageReceived(
47 const IPC::Message
& message
) {
49 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher
, message
)
50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started
, OnRecognitionStarted
)
51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted
, OnAudioStarted
)
52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted
, OnSoundStarted
)
53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded
, OnSoundEnded
)
54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded
, OnAudioEnded
)
55 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred
, OnErrorOccurred
)
56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended
, OnRecognitionEnded
)
57 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved
,
59 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioReceiverReady
,
61 IPC_MESSAGE_UNHANDLED(handled
= false)
66 void SpeechRecognitionDispatcher::start(
67 const WebSpeechRecognitionHandle
& handle
,
68 const WebSpeechRecognitionParams
& params
,
69 WebSpeechRecognizerClient
* recognizer_client
) {
70 DCHECK(!recognizer_client_
|| recognizer_client_
== recognizer_client
);
71 recognizer_client_
= recognizer_client
;
73 #if defined(ENABLE_WEBRTC)
74 const blink::WebMediaStreamTrack track
= params
.audioTrack();
75 if (!track
.isNull()) {
76 // Check if this type of track is allowed by implemented policy.
77 if (SpeechRecognitionAudioSink::IsSupportedTrack(track
)) {
78 audio_track_
.assign(track
);
81 // Notify user that the track used is not supported.
82 recognizer_client_
->didReceiveError(
84 WebString("Provided audioTrack is not supported."),
85 WebSpeechRecognizerClient::AudioCaptureError
);
91 // Destroy any previous instance to detach from the audio track.
92 // Each new session should reinstantiate the provider once the track is ready.
96 SpeechRecognitionHostMsg_StartRequest_Params msg_params
;
97 for (size_t i
= 0; i
< params
.grammars().size(); ++i
) {
98 const WebSpeechGrammar
& grammar
= params
.grammars()[i
];
99 msg_params
.grammars
.push_back(
100 SpeechRecognitionGrammar(grammar
.src().spec(), grammar
.weight()));
102 msg_params
.language
=
103 base::UTF16ToUTF8(base::StringPiece16(params
.language()));
104 msg_params
.max_hypotheses
= static_cast<uint32
>(params
.maxAlternatives());
105 msg_params
.continuous
= params
.continuous();
106 msg_params
.interim_results
= params
.interimResults();
107 msg_params
.origin_url
= params
.origin().toString().utf8();
108 msg_params
.render_view_id
= routing_id();
109 msg_params
.request_id
= GetOrCreateIDForHandle(handle
);
110 #if defined(ENABLE_WEBRTC)
111 // Fall back to default input when the track is not allowed.
112 msg_params
.using_audio_track
= !audio_track_
.isNull();
114 msg_params
.using_audio_track
= false;
116 // The handle mapping will be removed in |OnRecognitionEnd|.
117 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params
));
120 void SpeechRecognitionDispatcher::stop(
121 const WebSpeechRecognitionHandle
& handle
,
122 WebSpeechRecognizerClient
* recognizer_client
) {
124 // Ignore a |stop| issued without a matching |start|.
125 if (recognizer_client_
!= recognizer_client
|| !HandleExists(handle
))
127 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
128 routing_id(), GetOrCreateIDForHandle(handle
)));
131 void SpeechRecognitionDispatcher::abort(
132 const WebSpeechRecognitionHandle
& handle
,
133 WebSpeechRecognizerClient
* recognizer_client
) {
135 // Ignore an |abort| issued without a matching |start|.
136 if (recognizer_client_
!= recognizer_client
|| !HandleExists(handle
))
138 Send(new SpeechRecognitionHostMsg_AbortRequest(
139 routing_id(), GetOrCreateIDForHandle(handle
)));
142 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id
) {
143 recognizer_client_
->didStart(GetHandleFromID(request_id
));
146 void SpeechRecognitionDispatcher::OnAudioStarted(int request_id
) {
147 recognizer_client_
->didStartAudio(GetHandleFromID(request_id
));
150 void SpeechRecognitionDispatcher::OnSoundStarted(int request_id
) {
151 recognizer_client_
->didStartSound(GetHandleFromID(request_id
));
154 void SpeechRecognitionDispatcher::OnSoundEnded(int request_id
) {
155 recognizer_client_
->didEndSound(GetHandleFromID(request_id
));
158 void SpeechRecognitionDispatcher::OnAudioEnded(int request_id
) {
159 recognizer_client_
->didEndAudio(GetHandleFromID(request_id
));
162 static WebSpeechRecognizerClient::ErrorCode
WebKitErrorCode(
163 SpeechRecognitionErrorCode e
) {
165 case SPEECH_RECOGNITION_ERROR_NONE
:
167 return WebSpeechRecognizerClient::OtherError
;
168 case SPEECH_RECOGNITION_ERROR_NO_SPEECH
:
169 return WebSpeechRecognizerClient::NoSpeechError
;
170 case SPEECH_RECOGNITION_ERROR_ABORTED
:
171 return WebSpeechRecognizerClient::AbortedError
;
172 case SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE
:
173 return WebSpeechRecognizerClient::AudioCaptureError
;
174 case SPEECH_RECOGNITION_ERROR_NETWORK
:
175 return WebSpeechRecognizerClient::NetworkError
;
176 case SPEECH_RECOGNITION_ERROR_NOT_ALLOWED
:
177 return WebSpeechRecognizerClient::NotAllowedError
;
178 case SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED
:
179 return WebSpeechRecognizerClient::ServiceNotAllowedError
;
180 case SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR
:
181 return WebSpeechRecognizerClient::BadGrammarError
;
182 case SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED
:
183 return WebSpeechRecognizerClient::LanguageNotSupportedError
;
184 case SPEECH_RECOGNITION_ERROR_NO_MATCH
:
186 return WebSpeechRecognizerClient::OtherError
;
189 return WebSpeechRecognizerClient::OtherError
;
192 void SpeechRecognitionDispatcher::OnErrorOccurred(
193 int request_id
, const SpeechRecognitionError
& error
) {
194 if (error
.code
== SPEECH_RECOGNITION_ERROR_NO_MATCH
) {
195 recognizer_client_
->didReceiveNoMatch(GetHandleFromID(request_id
),
196 WebSpeechRecognitionResult());
199 recognizer_client_
->didReceiveError(
200 GetHandleFromID(request_id
),
201 WebString(), // TODO(primiano): message?
202 WebKitErrorCode(error
.code
));
206 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id
) {
207 // TODO(tommi): It is possible that the handle isn't found in the array if
208 // the user just refreshed the page. It seems that we then get a notification
209 // for the previously loaded instance of the page.
210 HandleMap::iterator iter
= handle_map_
.find(request_id
);
211 if (iter
== handle_map_
.end()) {
212 DLOG(ERROR
) << "OnRecognitionEnded called for a handle that doesn't exist";
214 WebSpeechRecognitionHandle handle
= iter
->second
;
215 // Note: we need to erase the handle from the map *before* calling didEnd.
216 // didEnd may call back synchronously to start a new recognition session,
217 // and we don't want to delete the handle from the map after that happens.
218 handle_map_
.erase(request_id
);
220 recognizer_client_
->didEnd(handle
);
224 void SpeechRecognitionDispatcher::OnResultsRetrieved(
225 int request_id
, const SpeechRecognitionResults
& results
) {
226 size_t provisional_count
= 0;
227 SpeechRecognitionResults::const_iterator it
= results
.begin();
228 for (; it
!= results
.end(); ++it
) {
229 if (it
->is_provisional
)
233 WebVector
<WebSpeechRecognitionResult
> provisional(provisional_count
);
234 WebVector
<WebSpeechRecognitionResult
> final(
235 results
.size() - provisional_count
);
237 int provisional_index
= 0, final_index
= 0;
238 for (it
= results
.begin(); it
!= results
.end(); ++it
) {
239 const SpeechRecognitionResult
& result
= (*it
);
240 WebSpeechRecognitionResult
* webkit_result
= result
.is_provisional
?
241 &provisional
[provisional_index
++] : &final
[final_index
++];
243 const size_t num_hypotheses
= result
.hypotheses
.size();
244 WebVector
<WebString
> transcripts(num_hypotheses
);
245 WebVector
<float> confidences(num_hypotheses
);
246 for (size_t i
= 0; i
< num_hypotheses
; ++i
) {
247 transcripts
[i
] = result
.hypotheses
[i
].utterance
;
248 confidences
[i
] = static_cast<float>(result
.hypotheses
[i
].confidence
);
250 webkit_result
->assign(transcripts
, confidences
, !result
.is_provisional
);
253 recognizer_client_
->didReceiveResults(
254 GetHandleFromID(request_id
), final
, provisional
);
257 void SpeechRecognitionDispatcher::OnAudioReceiverReady(
259 const media::AudioParameters
& params
,
260 const base::SharedMemoryHandle memory
,
261 const base::SyncSocket::TransitDescriptor descriptor
) {
262 #if defined(ENABLE_WEBRTC)
263 DCHECK(!speech_audio_sink_
.get());
264 if (audio_track_
.isNull()) {
269 // The instantiation and type of SyncSocket is up to the client since it
270 // is dependency injected to the SpeechRecognitionAudioSink.
271 scoped_ptr
<base::SyncSocket
> socket(new base::CancelableSyncSocket(
272 base::SyncSocket::UnwrapHandle(descriptor
)));
274 speech_audio_sink_
.reset(new SpeechRecognitionAudioSink(
275 audio_track_
, params
, memory
, socket
.Pass(),
276 base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink
,
277 base::Unretained(this))));
281 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
282 const WebSpeechRecognitionHandle
& handle
) {
283 // Search first for an existing mapping.
284 for (HandleMap::iterator iter
= handle_map_
.begin();
285 iter
!= handle_map_
.end();
287 if (iter
->second
.equals(handle
))
290 // If no existing mapping found, create a new one.
291 const int new_id
= next_id_
;
292 handle_map_
[new_id
] = handle
;
297 bool SpeechRecognitionDispatcher::HandleExists(
298 const WebSpeechRecognitionHandle
& handle
) {
299 for (HandleMap::iterator iter
= handle_map_
.begin();
300 iter
!= handle_map_
.end();
302 if (iter
->second
.equals(handle
))
308 void SpeechRecognitionDispatcher::ResetAudioSink() {
309 #if defined(ENABLE_WEBRTC)
310 speech_audio_sink_
.reset();
314 const WebSpeechRecognitionHandle
& SpeechRecognitionDispatcher::GetHandleFromID(
316 HandleMap::iterator iter
= handle_map_
.find(request_id
);
317 CHECK(iter
!= handle_map_
.end());
321 } // namespace content