IndexedDBFactory now ForceCloses databases.
[chromium-blink-merge.git] / content / browser / speech / speech_recognizer_impl_unittest.cc
blob54d970906daa929349ff17e828258e4cd11c6cc1
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <vector>
7 #include "content/browser/browser_thread_impl.h"
8 #include "content/browser/speech/google_one_shot_remote_engine.h"
9 #include "content/browser/speech/speech_recognizer_impl.h"
10 #include "content/public/browser/speech_recognition_event_listener.h"
11 #include "media/audio/audio_manager_base.h"
12 #include "media/audio/fake_audio_input_stream.h"
13 #include "media/audio/fake_audio_output_stream.h"
14 #include "media/audio/mock_audio_manager.h"
15 #include "media/audio/test_audio_input_controller_factory.h"
16 #include "net/base/net_errors.h"
17 #include "net/url_request/test_url_fetcher_factory.h"
18 #include "net/url_request/url_request_status.h"
19 #include "testing/gtest/include/gtest/gtest.h"
21 using base::MessageLoopProxy;
22 using media::AudioInputController;
23 using media::AudioInputStream;
24 using media::AudioManager;
25 using media::AudioOutputStream;
26 using media::AudioParameters;
27 using media::TestAudioInputController;
28 using media::TestAudioInputControllerFactory;
30 namespace content {
32 class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
33 public testing::Test {
34 public:
35 SpeechRecognizerImplTest()
36 : io_thread_(BrowserThread::IO, &message_loop_),
37 recognition_started_(false),
38 recognition_ended_(false),
39 result_received_(false),
40 audio_started_(false),
41 audio_ended_(false),
42 sound_started_(false),
43 sound_ended_(false),
44 error_(SPEECH_RECOGNITION_ERROR_NONE),
45 volume_(-1.0f) {
46 // SpeechRecognizer takes ownership of sr_engine.
47 SpeechRecognitionEngine* sr_engine =
48 new GoogleOneShotRemoteEngine(NULL /* URLRequestContextGetter */);
49 SpeechRecognitionEngineConfig config;
50 config.audio_num_bits_per_sample =
51 SpeechRecognizerImpl::kNumBitsPerAudioSample;
52 config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate;
53 config.filter_profanities = false;
54 sr_engine->SetConfig(config);
56 const int kTestingSessionId = 1;
57 const bool kOneShotMode = true;
58 recognizer_ = new SpeechRecognizerImpl(
59 this, kTestingSessionId, kOneShotMode, sr_engine);
60 audio_manager_.reset(new media::MockAudioManager(
61 base::MessageLoop::current()->message_loop_proxy().get()));
62 recognizer_->SetAudioManagerForTesting(audio_manager_.get());
64 int audio_packet_length_bytes =
65 (SpeechRecognizerImpl::kAudioSampleRate *
66 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs *
67 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout) *
68 SpeechRecognizerImpl::kNumBitsPerAudioSample) / (8 * 1000);
69 audio_packet_.resize(audio_packet_length_bytes);
72 void CheckEventsConsistency() {
73 // Note: "!x || y" == "x implies y".
74 EXPECT_TRUE(!recognition_ended_ || recognition_started_);
75 EXPECT_TRUE(!audio_ended_ || audio_started_);
76 EXPECT_TRUE(!sound_ended_ || sound_started_);
77 EXPECT_TRUE(!audio_started_ || recognition_started_);
78 EXPECT_TRUE(!sound_started_ || audio_started_);
79 EXPECT_TRUE(!audio_ended_ || (sound_ended_ || !sound_started_));
80 EXPECT_TRUE(!recognition_ended_ || (audio_ended_ || !audio_started_));
83 void CheckFinalEventsConsistency() {
84 // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
85 EXPECT_FALSE(recognition_started_ ^ recognition_ended_);
86 EXPECT_FALSE(audio_started_ ^ audio_ended_);
87 EXPECT_FALSE(sound_started_ ^ sound_ended_);
90 // Overridden from SpeechRecognitionEventListener:
91 virtual void OnAudioStart(int session_id) OVERRIDE {
92 audio_started_ = true;
93 CheckEventsConsistency();
96 virtual void OnAudioEnd(int session_id) OVERRIDE {
97 audio_ended_ = true;
98 CheckEventsConsistency();
101 virtual void OnRecognitionResults(
102 int session_id, const SpeechRecognitionResults& results) OVERRIDE {
103 result_received_ = true;
106 virtual void OnRecognitionError(
107 int session_id, const SpeechRecognitionError& error) OVERRIDE {
108 EXPECT_TRUE(recognition_started_);
109 EXPECT_FALSE(recognition_ended_);
110 error_ = error.code;
113 virtual void OnAudioLevelsChange(int session_id, float volume,
114 float noise_volume) OVERRIDE {
115 volume_ = volume;
116 noise_volume_ = noise_volume;
119 virtual void OnRecognitionEnd(int session_id) OVERRIDE {
120 recognition_ended_ = true;
121 CheckEventsConsistency();
124 virtual void OnRecognitionStart(int session_id) OVERRIDE {
125 recognition_started_ = true;
126 CheckEventsConsistency();
129 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE {}
131 virtual void OnSoundStart(int session_id) OVERRIDE {
132 sound_started_ = true;
133 CheckEventsConsistency();
136 virtual void OnSoundEnd(int session_id) OVERRIDE {
137 sound_ended_ = true;
138 CheckEventsConsistency();
141 // testing::Test methods.
142 virtual void SetUp() OVERRIDE {
143 AudioInputController::set_factory_for_testing(
144 &audio_input_controller_factory_);
147 virtual void TearDown() OVERRIDE {
148 AudioInputController::set_factory_for_testing(NULL);
151 void FillPacketWithTestWaveform() {
152 // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
153 for (size_t i = 0; i < audio_packet_.size(); ++i)
154 audio_packet_[i] = static_cast<uint8>(i);
157 void FillPacketWithNoise() {
158 int value = 0;
159 int factor = 175;
160 for (size_t i = 0; i < audio_packet_.size(); ++i) {
161 value += factor;
162 audio_packet_[i] = value % 100;
166 protected:
167 base::MessageLoopForIO message_loop_;
168 BrowserThreadImpl io_thread_;
169 scoped_refptr<SpeechRecognizerImpl> recognizer_;
170 scoped_ptr<AudioManager> audio_manager_;
171 bool recognition_started_;
172 bool recognition_ended_;
173 bool result_received_;
174 bool audio_started_;
175 bool audio_ended_;
176 bool sound_started_;
177 bool sound_ended_;
178 SpeechRecognitionErrorCode error_;
179 net::TestURLFetcherFactory url_fetcher_factory_;
180 TestAudioInputControllerFactory audio_input_controller_factory_;
181 std::vector<uint8> audio_packet_;
182 float volume_;
183 float noise_volume_;
186 TEST_F(SpeechRecognizerImplTest, StopNoData) {
187 // Check for callbacks when stopping record before any audio gets recorded.
188 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
189 recognizer_->StopAudioCapture();
190 base::MessageLoop::current()->RunUntilIdle();
191 EXPECT_TRUE(recognition_started_);
192 EXPECT_FALSE(audio_started_);
193 EXPECT_FALSE(result_received_);
194 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
195 CheckFinalEventsConsistency();
198 TEST_F(SpeechRecognizerImplTest, CancelNoData) {
199 // Check for callbacks when canceling recognition before any audio gets
200 // recorded.
201 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
202 recognizer_->AbortRecognition();
203 base::MessageLoop::current()->RunUntilIdle();
204 EXPECT_TRUE(recognition_started_);
205 EXPECT_FALSE(audio_started_);
206 EXPECT_FALSE(result_received_);
207 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
208 CheckFinalEventsConsistency();
211 TEST_F(SpeechRecognizerImplTest, StopWithData) {
212 // Start recording, give some data and then stop. This should wait for the
213 // network callback to arrive before completion.
214 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
215 base::MessageLoop::current()->RunUntilIdle();
216 TestAudioInputController* controller =
217 audio_input_controller_factory_.controller();
218 ASSERT_TRUE(controller);
220 // Try sending 5 chunks of mock audio data and verify that each of them
221 // resulted immediately in a packet sent out via the network. This verifies
222 // that we are streaming out encoded data as chunks without waiting for the
223 // full recording to complete.
224 const size_t kNumChunks = 5;
225 for (size_t i = 0; i < kNumChunks; ++i) {
226 controller->event_handler()->OnData(controller, &audio_packet_[0],
227 audio_packet_.size());
228 base::MessageLoop::current()->RunUntilIdle();
229 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
230 ASSERT_TRUE(fetcher);
231 EXPECT_EQ(i + 1, fetcher->upload_chunks().size());
234 recognizer_->StopAudioCapture();
235 base::MessageLoop::current()->RunUntilIdle();
236 EXPECT_TRUE(audio_started_);
237 EXPECT_TRUE(audio_ended_);
238 EXPECT_FALSE(recognition_ended_);
239 EXPECT_FALSE(result_received_);
240 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
242 // Issue the network callback to complete the process.
243 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
244 ASSERT_TRUE(fetcher);
246 fetcher->set_url(fetcher->GetOriginalURL());
247 net::URLRequestStatus status;
248 status.set_status(net::URLRequestStatus::SUCCESS);
249 fetcher->set_status(status);
250 fetcher->set_response_code(200);
251 fetcher->SetResponseString(
252 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
253 fetcher->delegate()->OnURLFetchComplete(fetcher);
254 base::MessageLoop::current()->RunUntilIdle();
255 EXPECT_TRUE(recognition_ended_);
256 EXPECT_TRUE(result_received_);
257 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
258 CheckFinalEventsConsistency();
261 TEST_F(SpeechRecognizerImplTest, CancelWithData) {
262 // Start recording, give some data and then cancel.
263 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
264 base::MessageLoop::current()->RunUntilIdle();
265 TestAudioInputController* controller =
266 audio_input_controller_factory_.controller();
267 ASSERT_TRUE(controller);
268 controller->event_handler()->OnData(controller, &audio_packet_[0],
269 audio_packet_.size());
270 base::MessageLoop::current()->RunUntilIdle();
271 recognizer_->AbortRecognition();
272 base::MessageLoop::current()->RunUntilIdle();
273 ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
274 EXPECT_TRUE(recognition_started_);
275 EXPECT_TRUE(audio_started_);
276 EXPECT_FALSE(result_received_);
277 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
278 CheckFinalEventsConsistency();
281 TEST_F(SpeechRecognizerImplTest, ConnectionError) {
282 // Start recording, give some data and then stop. Issue the network callback
283 // with a connection error and verify that the recognizer bubbles the error up
284 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
285 base::MessageLoop::current()->RunUntilIdle();
286 TestAudioInputController* controller =
287 audio_input_controller_factory_.controller();
288 ASSERT_TRUE(controller);
289 controller->event_handler()->OnData(controller, &audio_packet_[0],
290 audio_packet_.size());
291 base::MessageLoop::current()->RunUntilIdle();
292 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
293 ASSERT_TRUE(fetcher);
295 recognizer_->StopAudioCapture();
296 base::MessageLoop::current()->RunUntilIdle();
297 EXPECT_TRUE(audio_started_);
298 EXPECT_TRUE(audio_ended_);
299 EXPECT_FALSE(recognition_ended_);
300 EXPECT_FALSE(result_received_);
301 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
303 // Issue the network callback to complete the process.
304 fetcher->set_url(fetcher->GetOriginalURL());
305 net::URLRequestStatus status;
306 status.set_status(net::URLRequestStatus::FAILED);
307 status.set_error(net::ERR_CONNECTION_REFUSED);
308 fetcher->set_status(status);
309 fetcher->set_response_code(0);
310 fetcher->SetResponseString(std::string());
311 fetcher->delegate()->OnURLFetchComplete(fetcher);
312 base::MessageLoop::current()->RunUntilIdle();
313 EXPECT_TRUE(recognition_ended_);
314 EXPECT_FALSE(result_received_);
315 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
316 CheckFinalEventsConsistency();
319 TEST_F(SpeechRecognizerImplTest, ServerError) {
320 // Start recording, give some data and then stop. Issue the network callback
321 // with a 500 error and verify that the recognizer bubbles the error up
322 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
323 base::MessageLoop::current()->RunUntilIdle();
324 TestAudioInputController* controller =
325 audio_input_controller_factory_.controller();
326 ASSERT_TRUE(controller);
327 controller->event_handler()->OnData(controller, &audio_packet_[0],
328 audio_packet_.size());
329 base::MessageLoop::current()->RunUntilIdle();
330 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
331 ASSERT_TRUE(fetcher);
333 recognizer_->StopAudioCapture();
334 base::MessageLoop::current()->RunUntilIdle();
335 EXPECT_TRUE(audio_started_);
336 EXPECT_TRUE(audio_ended_);
337 EXPECT_FALSE(recognition_ended_);
338 EXPECT_FALSE(result_received_);
339 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
341 // Issue the network callback to complete the process.
342 fetcher->set_url(fetcher->GetOriginalURL());
343 net::URLRequestStatus status;
344 status.set_status(net::URLRequestStatus::SUCCESS);
345 fetcher->set_status(status);
346 fetcher->set_response_code(500);
347 fetcher->SetResponseString("Internal Server Error");
348 fetcher->delegate()->OnURLFetchComplete(fetcher);
349 base::MessageLoop::current()->RunUntilIdle();
350 EXPECT_TRUE(recognition_ended_);
351 EXPECT_FALSE(result_received_);
352 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
353 CheckFinalEventsConsistency();
356 TEST_F(SpeechRecognizerImplTest, AudioControllerErrorNoData) {
357 // Check if things tear down properly if AudioInputController threw an error.
358 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
359 base::MessageLoop::current()->RunUntilIdle();
360 TestAudioInputController* controller =
361 audio_input_controller_factory_.controller();
362 ASSERT_TRUE(controller);
363 controller->event_handler()->OnError(controller);
364 base::MessageLoop::current()->RunUntilIdle();
365 EXPECT_TRUE(recognition_started_);
366 EXPECT_FALSE(audio_started_);
367 EXPECT_FALSE(result_received_);
368 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO, error_);
369 CheckFinalEventsConsistency();
372 TEST_F(SpeechRecognizerImplTest, AudioControllerErrorWithData) {
373 // Check if things tear down properly if AudioInputController threw an error
374 // after giving some audio data.
375 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
376 base::MessageLoop::current()->RunUntilIdle();
377 TestAudioInputController* controller =
378 audio_input_controller_factory_.controller();
379 ASSERT_TRUE(controller);
380 controller->event_handler()->OnData(controller, &audio_packet_[0],
381 audio_packet_.size());
382 controller->event_handler()->OnError(controller);
383 base::MessageLoop::current()->RunUntilIdle();
384 ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
385 EXPECT_TRUE(recognition_started_);
386 EXPECT_TRUE(audio_started_);
387 EXPECT_FALSE(result_received_);
388 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO, error_);
389 CheckFinalEventsConsistency();
392 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) {
393 // Start recording and give a lot of packets with audio samples set to zero.
394 // This should trigger the no-speech detector and issue a callback.
395 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
396 base::MessageLoop::current()->RunUntilIdle();
397 TestAudioInputController* controller =
398 audio_input_controller_factory_.controller();
399 ASSERT_TRUE(controller);
401 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
402 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs + 1;
403 // The vector is already filled with zero value samples on create.
404 for (int i = 0; i < num_packets; ++i) {
405 controller->event_handler()->OnData(controller, &audio_packet_[0],
406 audio_packet_.size());
408 base::MessageLoop::current()->RunUntilIdle();
409 EXPECT_TRUE(recognition_started_);
410 EXPECT_TRUE(audio_started_);
411 EXPECT_FALSE(result_received_);
412 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH, error_);
413 CheckFinalEventsConsistency();
416 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
417 // Start recording and give a lot of packets with audio samples set to zero
418 // and then some more with reasonably loud audio samples. This should be
419 // treated as normal speech input and the no-speech detector should not get
420 // triggered.
421 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
422 base::MessageLoop::current()->RunUntilIdle();
423 TestAudioInputController* controller =
424 audio_input_controller_factory_.controller();
425 ASSERT_TRUE(controller);
426 controller = audio_input_controller_factory_.controller();
427 ASSERT_TRUE(controller);
429 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
430 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
432 // The vector is already filled with zero value samples on create.
433 for (int i = 0; i < num_packets / 2; ++i) {
434 controller->event_handler()->OnData(controller, &audio_packet_[0],
435 audio_packet_.size());
438 FillPacketWithTestWaveform();
439 for (int i = 0; i < num_packets / 2; ++i) {
440 controller->event_handler()->OnData(controller, &audio_packet_[0],
441 audio_packet_.size());
444 base::MessageLoop::current()->RunUntilIdle();
445 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
446 EXPECT_TRUE(audio_started_);
447 EXPECT_FALSE(audio_ended_);
448 EXPECT_FALSE(recognition_ended_);
449 recognizer_->AbortRecognition();
450 base::MessageLoop::current()->RunUntilIdle();
451 CheckFinalEventsConsistency();
454 TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
455 // Start recording and give a lot of packets with audio samples set to zero
456 // and then some more with reasonably loud audio samples. Check that we don't
457 // get the callback during estimation phase, then get zero for the silence
458 // samples and proper volume for the loud audio.
459 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
460 base::MessageLoop::current()->RunUntilIdle();
461 TestAudioInputController* controller =
462 audio_input_controller_factory_.controller();
463 ASSERT_TRUE(controller);
464 controller = audio_input_controller_factory_.controller();
465 ASSERT_TRUE(controller);
467 // Feed some samples to begin with for the endpointer to do noise estimation.
468 int num_packets = SpeechRecognizerImpl::kEndpointerEstimationTimeMs /
469 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
470 FillPacketWithNoise();
471 for (int i = 0; i < num_packets; ++i) {
472 controller->event_handler()->OnData(controller, &audio_packet_[0],
473 audio_packet_.size());
475 base::MessageLoop::current()->RunUntilIdle();
476 EXPECT_EQ(-1.0f, volume_); // No audio volume set yet.
478 // The vector is already filled with zero value samples on create.
479 controller->event_handler()->OnData(controller, &audio_packet_[0],
480 audio_packet_.size());
481 base::MessageLoop::current()->RunUntilIdle();
482 EXPECT_FLOAT_EQ(0.74939233f, volume_);
484 FillPacketWithTestWaveform();
485 controller->event_handler()->OnData(controller, &audio_packet_[0],
486 audio_packet_.size());
487 base::MessageLoop::current()->RunUntilIdle();
488 EXPECT_NEAR(0.89926866f, volume_, 0.00001f);
489 EXPECT_FLOAT_EQ(0.75071919f, noise_volume_);
491 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
492 EXPECT_FALSE(audio_ended_);
493 EXPECT_FALSE(recognition_ended_);
494 recognizer_->AbortRecognition();
495 base::MessageLoop::current()->RunUntilIdle();
496 CheckFinalEventsConsistency();
499 } // namespace content