Add a timeout for app list voice search for when the query doesn't change.
[chromium-blink-merge.git] / chrome / browser / ui / app_list / speech_recognizer.cc
blob8f6eafadeccb1b42cfd2a0e8ca0bd0c69c5bc9a2
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/ui/app_list/speech_recognizer.h"
7 #include <algorithm>
9 #include "base/bind.h"
10 #include "base/strings/string16.h"
11 #include "base/timer/timer.h"
12 #include "chrome/browser/ui/app_list/speech_recognizer_delegate.h"
13 #include "content/public/browser/browser_thread.h"
14 #include "content/public/browser/render_process_host.h"
15 #include "content/public/browser/speech_recognition_event_listener.h"
16 #include "content/public/browser/speech_recognition_manager.h"
17 #include "content/public/browser/speech_recognition_session_config.h"
18 #include "content/public/browser/speech_recognition_session_preamble.h"
19 #include "content/public/common/child_process_host.h"
20 #include "content/public/common/speech_recognition_error.h"
21 #include "net/url_request/url_request_context_getter.h"
22 #include "ui/app_list/speech_ui_model_observer.h"
24 namespace app_list {
26 // Length of timeout to cancel recognition if there's no speech heard.
27 static const int kNoSpeechTimeoutInSeconds = 5;
29 // Length of timeout to cancel recognition if no different results are received.
30 static const int kNoNewSpeechTimeoutInSeconds = 3;
32 // Invalid speech session.
33 static const int kInvalidSessionId = -1;
35 // Speech recognizer listener. This is separate from SpeechRecognizer because
36 // the speech recognition engine must function from the IO thread. Because of
37 // this, the lifecycle of this class must be decoupled from the lifecycle of
38 // SpeechRecognizer. To avoid circular references, this class has no reference
39 // to SpeechRecognizer. Instead, it has a reference to the
40 // SpeechRecognizerDelegate via a weak pointer that is only ever referenced from
41 // the UI thread.
42 class SpeechRecognizer::EventListener
43 : public base::RefCountedThreadSafe<SpeechRecognizer::EventListener>,
44 public content::SpeechRecognitionEventListener {
45 public:
46 EventListener(const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
47 net::URLRequestContextGetter* url_request_context_getter,
48 const std::string& locale);
50 void StartOnIOThread(
51 const std::string& auth_scope,
52 const std::string& auth_token,
53 const scoped_refptr<content::SpeechRecognitionSessionPreamble>& preamble);
54 void StopOnIOThread();
56 private:
57 friend class base::RefCountedThreadSafe<SpeechRecognizer::EventListener>;
58 ~EventListener() override;
60 void NotifyRecognitionStateChanged(SpeechRecognitionState new_state);
62 // Starts a timer for |timeout_seconds|. When the timer expires, will stop
63 // capturing audio and get a final utterance from the recognition manager.
64 void StartSpeechTimeout(int timeout_seconds);
65 void StopSpeechTimeout();
66 void SpeechTimeout();
68 // Overidden from content::SpeechRecognitionEventListener:
69 // These are always called on the IO thread.
70 void OnRecognitionStart(int session_id) override;
71 void OnRecognitionEnd(int session_id) override;
72 void OnRecognitionResults(
73 int session_id,
74 const content::SpeechRecognitionResults& results) override;
75 void OnRecognitionError(
76 int session_id, const content::SpeechRecognitionError& error) override;
77 void OnSoundStart(int session_id) override;
78 void OnSoundEnd(int session_id) override;
79 void OnAudioLevelsChange(
80 int session_id, float volume, float noise_volume) override;
81 void OnEnvironmentEstimationComplete(int session_id) override;
82 void OnAudioStart(int session_id) override;
83 void OnAudioEnd(int session_id) override;
85 // Only dereferenced from the UI thread, but copied on IO thread.
86 base::WeakPtr<SpeechRecognizerDelegate> delegate_;
88 // All remaining members only accessed from the IO thread.
89 scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
90 std::string locale_;
91 base::Timer speech_timeout_;
92 int session_;
93 base::string16 last_result_str_;
95 base::WeakPtrFactory<EventListener> weak_factory_;
97 DISALLOW_COPY_AND_ASSIGN(EventListener);
100 SpeechRecognizer::EventListener::EventListener(
101 const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
102 net::URLRequestContextGetter* url_request_context_getter,
103 const std::string& locale)
104 : delegate_(delegate),
105 url_request_context_getter_(url_request_context_getter),
106 locale_(locale),
107 speech_timeout_(false, false),
108 session_(kInvalidSessionId),
109 weak_factory_(this) {
110 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
113 SpeechRecognizer::EventListener::~EventListener() {
114 DCHECK(!speech_timeout_.IsRunning());
117 void SpeechRecognizer::EventListener::StartOnIOThread(
118 const std::string& auth_scope,
119 const std::string& auth_token,
120 const scoped_refptr<content::SpeechRecognitionSessionPreamble>& preamble) {
121 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
122 if (session_ != kInvalidSessionId)
123 StopOnIOThread();
125 content::SpeechRecognitionSessionConfig config;
126 config.language = locale_;
127 config.is_legacy_api = false;
128 config.continuous = true;
129 config.interim_results = true;
130 config.max_hypotheses = 1;
131 config.filter_profanities = true;
132 config.url_request_context_getter = url_request_context_getter_;
133 config.event_listener = weak_factory_.GetWeakPtr();
134 // kInvalidUniqueID is not a valid render process, so the speech permission
135 // check allows the request through.
136 config.initial_context.render_process_id =
137 content::ChildProcessHost::kInvalidUniqueID;
138 config.auth_scope = auth_scope;
139 config.auth_token = auth_token;
140 config.preamble = preamble;
142 auto speech_instance = content::SpeechRecognitionManager::GetInstance();
143 session_ = speech_instance->CreateSession(config);
144 speech_instance->StartSession(session_);
147 void SpeechRecognizer::EventListener::StopOnIOThread() {
148 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
149 if (session_ == kInvalidSessionId)
150 return;
152 // Prevent recursion.
153 int session = session_;
154 session_ = kInvalidSessionId;
155 StopSpeechTimeout();
156 content::SpeechRecognitionManager::GetInstance()->StopAudioCaptureForSession(
157 session);
160 void SpeechRecognizer::EventListener::NotifyRecognitionStateChanged(
161 SpeechRecognitionState new_state) {
162 content::BrowserThread::PostTask(
163 content::BrowserThread::UI,
164 FROM_HERE,
165 base::Bind(&SpeechRecognizerDelegate::OnSpeechRecognitionStateChanged,
166 delegate_,
167 new_state));
170 void SpeechRecognizer::EventListener::StartSpeechTimeout(int timeout_seconds) {
171 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
172 speech_timeout_.Start(
173 FROM_HERE,
174 base::TimeDelta::FromSeconds(timeout_seconds),
175 base::Bind(&SpeechRecognizer::EventListener::SpeechTimeout, this));
178 void SpeechRecognizer::EventListener::StopSpeechTimeout() {
179 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
180 speech_timeout_.Stop();
183 void SpeechRecognizer::EventListener::SpeechTimeout() {
184 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
185 StopOnIOThread();
188 void SpeechRecognizer::EventListener::OnRecognitionStart(int session_id) {
189 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);
192 void SpeechRecognizer::EventListener::OnRecognitionEnd(int session_id) {
193 StopOnIOThread();
194 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);
197 void SpeechRecognizer::EventListener::OnRecognitionResults(
198 int session_id, const content::SpeechRecognitionResults& results) {
199 base::string16 result_str;
200 size_t final_count = 0;
201 // The number of results with |is_provisional| false. If |final_count| ==
202 // results.size(), then all results are non-provisional and the recognition is
203 // complete.
204 for (const auto& result : results) {
205 if (!result.is_provisional)
206 final_count++;
207 result_str += result.hypotheses[0].utterance;
209 content::BrowserThread::PostTask(
210 content::BrowserThread::UI,
211 FROM_HERE,
212 base::Bind(&SpeechRecognizerDelegate::OnSpeechResult,
213 delegate_,
214 result_str,
215 final_count == results.size()));
217 // Stop the moment we have a final result. If we receive any new or changed
218 // text, restart the timer to give the user more time to speak. (The timer is
219 // recording the amount of time since the most recent utterance.)
220 if (final_count == results.size())
221 StopOnIOThread();
222 else if (result_str != last_result_str_)
223 StartSpeechTimeout(kNoNewSpeechTimeoutInSeconds);
225 last_result_str_ = result_str;
228 void SpeechRecognizer::EventListener::OnRecognitionError(
229 int session_id, const content::SpeechRecognitionError& error) {
230 StopOnIOThread();
231 if (error.code == content::SPEECH_RECOGNITION_ERROR_NETWORK) {
232 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_NETWORK_ERROR);
234 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);
237 void SpeechRecognizer::EventListener::OnSoundStart(int session_id) {
238 StartSpeechTimeout(kNoSpeechTimeoutInSeconds);
239 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_IN_SPEECH);
242 void SpeechRecognizer::EventListener::OnSoundEnd(int session_id) {
243 StopOnIOThread();
244 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);
247 void SpeechRecognizer::EventListener::OnAudioLevelsChange(
248 int session_id, float volume, float noise_volume) {
249 DCHECK_LE(0.0, volume);
250 DCHECK_GE(1.0, volume);
251 DCHECK_LE(0.0, noise_volume);
252 DCHECK_GE(1.0, noise_volume);
253 volume = std::max(0.0f, volume - noise_volume);
254 // Both |volume| and |noise_volume| are defined to be in the range [0.0, 1.0].
255 // See: content/public/browser/speech_recognition_event_listener.h
256 int16_t sound_level = static_cast<int16_t>(INT16_MAX * volume);
257 content::BrowserThread::PostTask(
258 content::BrowserThread::UI,
259 FROM_HERE,
260 base::Bind(&SpeechRecognizerDelegate::OnSpeechSoundLevelChanged,
261 delegate_,
262 sound_level));
265 void SpeechRecognizer::EventListener::OnEnvironmentEstimationComplete(
266 int session_id) {
269 void SpeechRecognizer::EventListener::OnAudioStart(int session_id) {
272 void SpeechRecognizer::EventListener::OnAudioEnd(int session_id) {
275 SpeechRecognizer::SpeechRecognizer(
276 const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
277 net::URLRequestContextGetter* url_request_context_getter,
278 const std::string& locale)
279 : delegate_(delegate),
280 speech_event_listener_(new EventListener(
281 delegate, url_request_context_getter, locale)) {
282 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
285 SpeechRecognizer::~SpeechRecognizer() {
286 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
287 Stop();
290 void SpeechRecognizer::Start(
291 const scoped_refptr<content::SpeechRecognitionSessionPreamble>& preamble) {
292 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
293 std::string auth_scope;
294 std::string auth_token;
295 delegate_->GetSpeechAuthParameters(&auth_scope, &auth_token);
297 content::BrowserThread::PostTask(
298 content::BrowserThread::IO,
299 FROM_HERE,
300 base::Bind(&SpeechRecognizer::EventListener::StartOnIOThread,
301 speech_event_listener_,
302 auth_scope,
303 auth_token,
304 preamble));
307 void SpeechRecognizer::Stop() {
308 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
309 content::BrowserThread::PostTask(
310 content::BrowserThread::IO,
311 FROM_HERE,
312 base::Bind(&SpeechRecognizer::EventListener::StopOnIOThread,
313 speech_event_listener_));
316 } // namespace app_list