1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/speech_recognition_dispatcher.h"
7 #include "base/basictypes.h"
8 #include "base/strings/utf_string_conversions.h"
9 #include "content/common/speech_recognition_messages.h"
10 #include "content/renderer/render_view_impl.h"
11 #include "third_party/WebKit/public/platform/WebString.h"
12 #include "third_party/WebKit/public/platform/WebVector.h"
13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"
14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"
15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"
16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"
18 #if defined(ENABLE_WEBRTC)
19 #include "content/renderer/media/speech_recognition_audio_sink.h"
22 using blink::WebVector
;
23 using blink::WebString
;
24 using blink::WebSpeechGrammar
;
25 using blink::WebSpeechRecognitionHandle
;
26 using blink::WebSpeechRecognitionResult
;
27 using blink::WebSpeechRecognitionParams
;
28 using blink::WebSpeechRecognizerClient
;
32 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
33 RenderViewImpl
* render_view
)
34 : RenderViewObserver(render_view
),
35 recognizer_client_(NULL
),
38 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {}
40 void SpeechRecognitionDispatcher::AbortAllRecognitions() {
42 Send(new SpeechRecognitionHostMsg_AbortAllRequests(
46 bool SpeechRecognitionDispatcher::OnMessageReceived(
47 const IPC::Message
& message
) {
49 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher
, message
)
50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started
, OnRecognitionStarted
)
51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted
, OnAudioStarted
)
52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted
, OnSoundStarted
)
53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded
, OnSoundEnded
)
54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded
, OnAudioEnded
)
55 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred
, OnErrorOccurred
)
56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended
, OnRecognitionEnded
)
57 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved
,
59 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioReceiverReady
,
61 IPC_MESSAGE_UNHANDLED(handled
= false)
66 void SpeechRecognitionDispatcher::start(
67 const WebSpeechRecognitionHandle
& handle
,
68 const WebSpeechRecognitionParams
& params
,
69 WebSpeechRecognizerClient
* recognizer_client
) {
70 DCHECK(!recognizer_client_
|| recognizer_client_
== recognizer_client
);
71 recognizer_client_
= recognizer_client
;
73 #if defined(ENABLE_WEBRTC)
74 const blink::WebMediaStreamTrack track
= params
.audioTrack();
75 if (!track
.isNull()) {
76 // Check if this type of track is allowed by implemented policy.
77 if (SpeechRecognitionAudioSink::IsSupportedTrack(track
)) {
78 audio_track_
.assign(track
);
81 // Notify user that the track used is not supported.
82 recognizer_client_
->didReceiveError(
84 WebString("Provided audioTrack is not supported."),
85 WebSpeechRecognizerClient::AudioCaptureError
);
91 // Destroy any previous instance to detach from the audio track.
92 // Each new session should reinstantiate the provider once the track is ready.
96 SpeechRecognitionHostMsg_StartRequest_Params msg_params
;
97 for (size_t i
= 0; i
< params
.grammars().size(); ++i
) {
98 const WebSpeechGrammar
& grammar
= params
.grammars()[i
];
99 msg_params
.grammars
.push_back(
100 SpeechRecognitionGrammar(grammar
.src().spec(), grammar
.weight()));
102 msg_params
.language
= base::UTF16ToUTF8(params
.language());
103 msg_params
.max_hypotheses
= static_cast<uint32
>(params
.maxAlternatives());
104 msg_params
.continuous
= params
.continuous();
105 msg_params
.interim_results
= params
.interimResults();
106 msg_params
.origin_url
= params
.origin().toString().utf8();
107 msg_params
.render_view_id
= routing_id();
108 msg_params
.request_id
= GetOrCreateIDForHandle(handle
);
109 #if defined(ENABLE_WEBRTC)
110 // Fall back to default input when the track is not allowed.
111 msg_params
.using_audio_track
= !audio_track_
.isNull();
113 msg_params
.using_audio_track
= false;
115 // The handle mapping will be removed in |OnRecognitionEnd|.
116 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params
));
119 void SpeechRecognitionDispatcher::stop(
120 const WebSpeechRecognitionHandle
& handle
,
121 WebSpeechRecognizerClient
* recognizer_client
) {
123 // Ignore a |stop| issued without a matching |start|.
124 if (recognizer_client_
!= recognizer_client
|| !HandleExists(handle
))
126 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
127 routing_id(), GetOrCreateIDForHandle(handle
)));
130 void SpeechRecognitionDispatcher::abort(
131 const WebSpeechRecognitionHandle
& handle
,
132 WebSpeechRecognizerClient
* recognizer_client
) {
134 // Ignore an |abort| issued without a matching |start|.
135 if (recognizer_client_
!= recognizer_client
|| !HandleExists(handle
))
137 Send(new SpeechRecognitionHostMsg_AbortRequest(
138 routing_id(), GetOrCreateIDForHandle(handle
)));
141 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id
) {
142 recognizer_client_
->didStart(GetHandleFromID(request_id
));
145 void SpeechRecognitionDispatcher::OnAudioStarted(int request_id
) {
146 recognizer_client_
->didStartAudio(GetHandleFromID(request_id
));
149 void SpeechRecognitionDispatcher::OnSoundStarted(int request_id
) {
150 recognizer_client_
->didStartSound(GetHandleFromID(request_id
));
153 void SpeechRecognitionDispatcher::OnSoundEnded(int request_id
) {
154 recognizer_client_
->didEndSound(GetHandleFromID(request_id
));
157 void SpeechRecognitionDispatcher::OnAudioEnded(int request_id
) {
158 recognizer_client_
->didEndAudio(GetHandleFromID(request_id
));
161 static WebSpeechRecognizerClient::ErrorCode
WebKitErrorCode(
162 SpeechRecognitionErrorCode e
) {
164 case SPEECH_RECOGNITION_ERROR_NONE
:
166 return WebSpeechRecognizerClient::OtherError
;
167 case SPEECH_RECOGNITION_ERROR_NO_SPEECH
:
168 return WebSpeechRecognizerClient::NoSpeechError
;
169 case SPEECH_RECOGNITION_ERROR_ABORTED
:
170 return WebSpeechRecognizerClient::AbortedError
;
171 case SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE
:
172 return WebSpeechRecognizerClient::AudioCaptureError
;
173 case SPEECH_RECOGNITION_ERROR_NETWORK
:
174 return WebSpeechRecognizerClient::NetworkError
;
175 case SPEECH_RECOGNITION_ERROR_NOT_ALLOWED
:
176 return WebSpeechRecognizerClient::NotAllowedError
;
177 case SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED
:
178 return WebSpeechRecognizerClient::ServiceNotAllowedError
;
179 case SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR
:
180 return WebSpeechRecognizerClient::BadGrammarError
;
181 case SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED
:
182 return WebSpeechRecognizerClient::LanguageNotSupportedError
;
183 case SPEECH_RECOGNITION_ERROR_NO_MATCH
:
185 return WebSpeechRecognizerClient::OtherError
;
188 return WebSpeechRecognizerClient::OtherError
;
191 void SpeechRecognitionDispatcher::OnErrorOccurred(
192 int request_id
, const SpeechRecognitionError
& error
) {
193 if (error
.code
== SPEECH_RECOGNITION_ERROR_NO_MATCH
) {
194 recognizer_client_
->didReceiveNoMatch(GetHandleFromID(request_id
),
195 WebSpeechRecognitionResult());
198 recognizer_client_
->didReceiveError(
199 GetHandleFromID(request_id
),
200 WebString(), // TODO(primiano): message?
201 WebKitErrorCode(error
.code
));
205 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id
) {
206 // TODO(tommi): It is possible that the handle isn't found in the array if
207 // the user just refreshed the page. It seems that we then get a notification
208 // for the previously loaded instance of the page.
209 HandleMap::iterator iter
= handle_map_
.find(request_id
);
210 if (iter
== handle_map_
.end()) {
211 DLOG(ERROR
) << "OnRecognitionEnded called for a handle that doesn't exist";
213 WebSpeechRecognitionHandle handle
= iter
->second
;
214 // Note: we need to erase the handle from the map *before* calling didEnd.
215 // didEnd may call back synchronously to start a new recognition session,
216 // and we don't want to delete the handle from the map after that happens.
217 handle_map_
.erase(request_id
);
219 recognizer_client_
->didEnd(handle
);
223 void SpeechRecognitionDispatcher::OnResultsRetrieved(
224 int request_id
, const SpeechRecognitionResults
& results
) {
225 size_t provisional_count
= 0;
226 SpeechRecognitionResults::const_iterator it
= results
.begin();
227 for (; it
!= results
.end(); ++it
) {
228 if (it
->is_provisional
)
232 WebVector
<WebSpeechRecognitionResult
> provisional(provisional_count
);
233 WebVector
<WebSpeechRecognitionResult
> final(
234 results
.size() - provisional_count
);
236 int provisional_index
= 0, final_index
= 0;
237 for (it
= results
.begin(); it
!= results
.end(); ++it
) {
238 const SpeechRecognitionResult
& result
= (*it
);
239 WebSpeechRecognitionResult
* webkit_result
= result
.is_provisional
?
240 &provisional
[provisional_index
++] : &final
[final_index
++];
242 const size_t num_hypotheses
= result
.hypotheses
.size();
243 WebVector
<WebString
> transcripts(num_hypotheses
);
244 WebVector
<float> confidences(num_hypotheses
);
245 for (size_t i
= 0; i
< num_hypotheses
; ++i
) {
246 transcripts
[i
] = result
.hypotheses
[i
].utterance
;
247 confidences
[i
] = static_cast<float>(result
.hypotheses
[i
].confidence
);
249 webkit_result
->assign(transcripts
, confidences
, !result
.is_provisional
);
252 recognizer_client_
->didReceiveResults(
253 GetHandleFromID(request_id
), final
, provisional
);
256 void SpeechRecognitionDispatcher::OnAudioReceiverReady(
258 const media::AudioParameters
& params
,
259 const base::SharedMemoryHandle memory
,
260 const base::SyncSocket::TransitDescriptor descriptor
) {
261 #if defined(ENABLE_WEBRTC)
262 DCHECK(!speech_audio_sink_
.get());
263 if (audio_track_
.isNull()) {
268 // The instantiation and type of SyncSocket is up to the client since it
269 // is dependency injected to the SpeechRecognitionAudioSink.
270 scoped_ptr
<base::SyncSocket
> socket(new base::CancelableSyncSocket(
271 base::SyncSocket::UnwrapHandle(descriptor
)));
273 speech_audio_sink_
.reset(new SpeechRecognitionAudioSink(
274 audio_track_
, params
, memory
, socket
.Pass(),
275 base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink
,
276 base::Unretained(this))));
280 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
281 const WebSpeechRecognitionHandle
& handle
) {
282 // Search first for an existing mapping.
283 for (HandleMap::iterator iter
= handle_map_
.begin();
284 iter
!= handle_map_
.end();
286 if (iter
->second
.equals(handle
))
289 // If no existing mapping found, create a new one.
290 const int new_id
= next_id_
;
291 handle_map_
[new_id
] = handle
;
296 bool SpeechRecognitionDispatcher::HandleExists(
297 const WebSpeechRecognitionHandle
& handle
) {
298 for (HandleMap::iterator iter
= handle_map_
.begin();
299 iter
!= handle_map_
.end();
301 if (iter
->second
.equals(handle
))
307 void SpeechRecognitionDispatcher::ResetAudioSink() {
308 #if defined(ENABLE_WEBRTC)
309 speech_audio_sink_
.reset();
313 const WebSpeechRecognitionHandle
& SpeechRecognitionDispatcher::GetHandleFromID(
315 HandleMap::iterator iter
= handle_map_
.find(request_id
);
316 CHECK(iter
!= handle_map_
.end());
320 } // namespace content