1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/ui/app_list/speech_recognizer.h"
10 #include "base/strings/string16.h"
11 #include "base/timer/timer.h"
12 #include "chrome/browser/ui/app_list/speech_recognizer_delegate.h"
13 #include "content/public/browser/browser_thread.h"
14 #include "content/public/browser/render_process_host.h"
15 #include "content/public/browser/speech_recognition_event_listener.h"
16 #include "content/public/browser/speech_recognition_manager.h"
17 #include "content/public/browser/speech_recognition_session_config.h"
18 #include "content/public/browser/speech_recognition_session_preamble.h"
19 #include "content/public/common/child_process_host.h"
20 #include "content/public/common/speech_recognition_error.h"
21 #include "net/url_request/url_request_context_getter.h"
22 #include "ui/app_list/speech_ui_model_observer.h"
26 // Length of timeout to cancel recognition if there's no speech heard.
27 static const int kNoSpeechTimeoutInSeconds
= 5;
29 // Length of timeout to cancel recognition if no different results are received.
30 static const int kNoNewSpeechTimeoutInSeconds
= 3;
32 // Invalid speech session.
33 static const int kInvalidSessionId
= -1;
35 // Speech recognizer listener. This is separate from SpeechRecognizer because
36 // the speech recognition engine must function from the IO thread. Because of
37 // this, the lifecycle of this class must be decoupled from the lifecycle of
38 // SpeechRecognizer. To avoid circular references, this class has no reference
39 // to SpeechRecognizer. Instead, it has a reference to the
40 // SpeechRecognizerDelegate via a weak pointer that is only ever referenced from
42 class SpeechRecognizer::EventListener
43 : public base::RefCountedThreadSafe
<SpeechRecognizer::EventListener
>,
44 public content::SpeechRecognitionEventListener
{
46 EventListener(const base::WeakPtr
<SpeechRecognizerDelegate
>& delegate
,
47 net::URLRequestContextGetter
* url_request_context_getter
,
48 const std::string
& locale
);
51 const std::string
& auth_scope
,
52 const std::string
& auth_token
,
53 const scoped_refptr
<content::SpeechRecognitionSessionPreamble
>& preamble
);
54 void StopOnIOThread();
57 friend class base::RefCountedThreadSafe
<SpeechRecognizer::EventListener
>;
58 ~EventListener() override
;
60 void NotifyRecognitionStateChanged(SpeechRecognitionState new_state
);
62 // Starts a timer for |timeout_seconds|. When the timer expires, will stop
63 // capturing audio and get a final utterance from the recognition manager.
64 void StartSpeechTimeout(int timeout_seconds
);
65 void StopSpeechTimeout();
68 // Overidden from content::SpeechRecognitionEventListener:
69 // These are always called on the IO thread.
70 void OnRecognitionStart(int session_id
) override
;
71 void OnRecognitionEnd(int session_id
) override
;
72 void OnRecognitionResults(
74 const content::SpeechRecognitionResults
& results
) override
;
75 void OnRecognitionError(
76 int session_id
, const content::SpeechRecognitionError
& error
) override
;
77 void OnSoundStart(int session_id
) override
;
78 void OnSoundEnd(int session_id
) override
;
79 void OnAudioLevelsChange(
80 int session_id
, float volume
, float noise_volume
) override
;
81 void OnEnvironmentEstimationComplete(int session_id
) override
;
82 void OnAudioStart(int session_id
) override
;
83 void OnAudioEnd(int session_id
) override
;
85 // Only dereferenced from the UI thread, but copied on IO thread.
86 base::WeakPtr
<SpeechRecognizerDelegate
> delegate_
;
88 // All remaining members only accessed from the IO thread.
89 scoped_refptr
<net::URLRequestContextGetter
> url_request_context_getter_
;
91 base::Timer speech_timeout_
;
93 base::string16 last_result_str_
;
95 base::WeakPtrFactory
<EventListener
> weak_factory_
;
97 DISALLOW_COPY_AND_ASSIGN(EventListener
);
100 SpeechRecognizer::EventListener::EventListener(
101 const base::WeakPtr
<SpeechRecognizerDelegate
>& delegate
,
102 net::URLRequestContextGetter
* url_request_context_getter
,
103 const std::string
& locale
)
104 : delegate_(delegate
),
105 url_request_context_getter_(url_request_context_getter
),
107 speech_timeout_(false, false),
108 session_(kInvalidSessionId
),
109 weak_factory_(this) {
110 DCHECK_CURRENTLY_ON(content::BrowserThread::UI
);
113 SpeechRecognizer::EventListener::~EventListener() {
114 DCHECK(!speech_timeout_
.IsRunning());
117 void SpeechRecognizer::EventListener::StartOnIOThread(
118 const std::string
& auth_scope
,
119 const std::string
& auth_token
,
120 const scoped_refptr
<content::SpeechRecognitionSessionPreamble
>& preamble
) {
121 DCHECK_CURRENTLY_ON(content::BrowserThread::IO
);
122 if (session_
!= kInvalidSessionId
)
125 content::SpeechRecognitionSessionConfig config
;
126 config
.language
= locale_
;
127 config
.is_legacy_api
= false;
128 config
.continuous
= true;
129 config
.interim_results
= true;
130 config
.max_hypotheses
= 1;
131 config
.filter_profanities
= true;
132 config
.url_request_context_getter
= url_request_context_getter_
;
133 config
.event_listener
= weak_factory_
.GetWeakPtr();
134 // kInvalidUniqueID is not a valid render process, so the speech permission
135 // check allows the request through.
136 config
.initial_context
.render_process_id
=
137 content::ChildProcessHost::kInvalidUniqueID
;
138 config
.auth_scope
= auth_scope
;
139 config
.auth_token
= auth_token
;
140 config
.preamble
= preamble
;
142 auto speech_instance
= content::SpeechRecognitionManager::GetInstance();
143 session_
= speech_instance
->CreateSession(config
);
144 speech_instance
->StartSession(session_
);
147 void SpeechRecognizer::EventListener::StopOnIOThread() {
148 DCHECK_CURRENTLY_ON(content::BrowserThread::IO
);
149 if (session_
== kInvalidSessionId
)
152 // Prevent recursion.
153 int session
= session_
;
154 session_
= kInvalidSessionId
;
156 content::SpeechRecognitionManager::GetInstance()->StopAudioCaptureForSession(
160 void SpeechRecognizer::EventListener::NotifyRecognitionStateChanged(
161 SpeechRecognitionState new_state
) {
162 content::BrowserThread::PostTask(
163 content::BrowserThread::UI
,
165 base::Bind(&SpeechRecognizerDelegate::OnSpeechRecognitionStateChanged
,
170 void SpeechRecognizer::EventListener::StartSpeechTimeout(int timeout_seconds
) {
171 DCHECK_CURRENTLY_ON(content::BrowserThread::IO
);
172 speech_timeout_
.Start(
174 base::TimeDelta::FromSeconds(timeout_seconds
),
175 base::Bind(&SpeechRecognizer::EventListener::SpeechTimeout
, this));
178 void SpeechRecognizer::EventListener::StopSpeechTimeout() {
179 DCHECK_CURRENTLY_ON(content::BrowserThread::IO
);
180 speech_timeout_
.Stop();
183 void SpeechRecognizer::EventListener::SpeechTimeout() {
184 DCHECK_CURRENTLY_ON(content::BrowserThread::IO
);
188 void SpeechRecognizer::EventListener::OnRecognitionStart(int session_id
) {
189 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING
);
192 void SpeechRecognizer::EventListener::OnRecognitionEnd(int session_id
) {
194 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY
);
197 void SpeechRecognizer::EventListener::OnRecognitionResults(
198 int session_id
, const content::SpeechRecognitionResults
& results
) {
199 base::string16 result_str
;
200 size_t final_count
= 0;
201 // The number of results with |is_provisional| false. If |final_count| ==
202 // results.size(), then all results are non-provisional and the recognition is
204 for (const auto& result
: results
) {
205 if (!result
.is_provisional
)
207 result_str
+= result
.hypotheses
[0].utterance
;
209 content::BrowserThread::PostTask(
210 content::BrowserThread::UI
,
212 base::Bind(&SpeechRecognizerDelegate::OnSpeechResult
,
215 final_count
== results
.size()));
217 // Stop the moment we have a final result. If we receive any new or changed
218 // text, restart the timer to give the user more time to speak. (The timer is
219 // recording the amount of time since the most recent utterance.)
220 if (final_count
== results
.size())
222 else if (result_str
!= last_result_str_
)
223 StartSpeechTimeout(kNoNewSpeechTimeoutInSeconds
);
225 last_result_str_
= result_str
;
228 void SpeechRecognizer::EventListener::OnRecognitionError(
229 int session_id
, const content::SpeechRecognitionError
& error
) {
231 if (error
.code
== content::SPEECH_RECOGNITION_ERROR_NETWORK
) {
232 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_NETWORK_ERROR
);
234 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY
);
237 void SpeechRecognizer::EventListener::OnSoundStart(int session_id
) {
238 StartSpeechTimeout(kNoSpeechTimeoutInSeconds
);
239 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_IN_SPEECH
);
242 void SpeechRecognizer::EventListener::OnSoundEnd(int session_id
) {
244 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING
);
247 void SpeechRecognizer::EventListener::OnAudioLevelsChange(
248 int session_id
, float volume
, float noise_volume
) {
249 DCHECK_LE(0.0, volume
);
250 DCHECK_GE(1.0, volume
);
251 DCHECK_LE(0.0, noise_volume
);
252 DCHECK_GE(1.0, noise_volume
);
253 volume
= std::max(0.0f
, volume
- noise_volume
);
254 // Both |volume| and |noise_volume| are defined to be in the range [0.0, 1.0].
255 // See: content/public/browser/speech_recognition_event_listener.h
256 int16_t sound_level
= static_cast<int16_t>(INT16_MAX
* volume
);
257 content::BrowserThread::PostTask(
258 content::BrowserThread::UI
,
260 base::Bind(&SpeechRecognizerDelegate::OnSpeechSoundLevelChanged
,
265 void SpeechRecognizer::EventListener::OnEnvironmentEstimationComplete(
269 void SpeechRecognizer::EventListener::OnAudioStart(int session_id
) {
272 void SpeechRecognizer::EventListener::OnAudioEnd(int session_id
) {
275 SpeechRecognizer::SpeechRecognizer(
276 const base::WeakPtr
<SpeechRecognizerDelegate
>& delegate
,
277 net::URLRequestContextGetter
* url_request_context_getter
,
278 const std::string
& locale
)
279 : delegate_(delegate
),
280 speech_event_listener_(new EventListener(
281 delegate
, url_request_context_getter
, locale
)) {
282 DCHECK_CURRENTLY_ON(content::BrowserThread::UI
);
285 SpeechRecognizer::~SpeechRecognizer() {
286 DCHECK_CURRENTLY_ON(content::BrowserThread::UI
);
290 void SpeechRecognizer::Start(
291 const scoped_refptr
<content::SpeechRecognitionSessionPreamble
>& preamble
) {
292 DCHECK_CURRENTLY_ON(content::BrowserThread::UI
);
293 std::string auth_scope
;
294 std::string auth_token
;
295 delegate_
->GetSpeechAuthParameters(&auth_scope
, &auth_token
);
297 content::BrowserThread::PostTask(
298 content::BrowserThread::IO
,
300 base::Bind(&SpeechRecognizer::EventListener::StartOnIOThread
,
301 speech_event_listener_
,
307 void SpeechRecognizer::Stop() {
308 DCHECK_CURRENTLY_ON(content::BrowserThread::UI
);
309 content::BrowserThread::PostTask(
310 content::BrowserThread::IO
,
312 base::Bind(&SpeechRecognizer::EventListener::StopOnIOThread
,
313 speech_event_listener_
));
316 } // namespace app_list