[Session restore] Rename group name Enabled to Restore.
[chromium-blink-merge.git] / content / browser / speech / speech_recognizer_impl_unittest.cc
blob7168fb649739612be1603264bf2adbaa773da19f
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <vector>
7 #include "content/browser/browser_thread_impl.h"
8 #include "content/browser/speech/google_one_shot_remote_engine.h"
9 #include "content/browser/speech/speech_recognizer_impl.h"
10 #include "content/public/browser/speech_recognition_event_listener.h"
11 #include "media/audio/audio_manager_base.h"
12 #include "media/audio/fake_audio_input_stream.h"
13 #include "media/audio/fake_audio_output_stream.h"
14 #include "media/audio/mock_audio_manager.h"
15 #include "media/audio/test_audio_input_controller_factory.h"
16 #include "media/base/audio_bus.h"
17 #include "net/base/net_errors.h"
18 #include "net/url_request/test_url_fetcher_factory.h"
19 #include "net/url_request/url_request_status.h"
20 #include "testing/gtest/include/gtest/gtest.h"
22 using base::MessageLoopProxy;
23 using media::AudioInputController;
24 using media::AudioInputStream;
25 using media::AudioManager;
26 using media::AudioOutputStream;
27 using media::AudioParameters;
28 using media::TestAudioInputController;
29 using media::TestAudioInputControllerFactory;
31 namespace content {
33 class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
34 public testing::Test {
35 public:
36 SpeechRecognizerImplTest()
37 : io_thread_(BrowserThread::IO, &message_loop_),
38 recognition_started_(false),
39 recognition_ended_(false),
40 result_received_(false),
41 audio_started_(false),
42 audio_ended_(false),
43 sound_started_(false),
44 sound_ended_(false),
45 error_(SPEECH_RECOGNITION_ERROR_NONE),
46 volume_(-1.0f) {
47 // SpeechRecognizer takes ownership of sr_engine.
48 SpeechRecognitionEngine* sr_engine =
49 new GoogleOneShotRemoteEngine(NULL /* URLRequestContextGetter */);
50 SpeechRecognitionEngineConfig config;
51 config.audio_num_bits_per_sample =
52 SpeechRecognizerImpl::kNumBitsPerAudioSample;
53 config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate;
54 config.filter_profanities = false;
55 sr_engine->SetConfig(config);
57 const int kTestingSessionId = 1;
58 recognizer_ = new SpeechRecognizerImpl(
59 this, kTestingSessionId, false, false, sr_engine);
60 audio_manager_.reset(new media::MockAudioManager(
61 base::MessageLoop::current()->message_loop_proxy().get()));
62 recognizer_->SetAudioManagerForTesting(audio_manager_.get());
64 int audio_packet_length_bytes =
65 (SpeechRecognizerImpl::kAudioSampleRate *
66 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs *
67 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout) *
68 SpeechRecognizerImpl::kNumBitsPerAudioSample) / (8 * 1000);
69 audio_packet_.resize(audio_packet_length_bytes);
71 const int channels =
72 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout);
73 bytes_per_sample_ = SpeechRecognizerImpl::kNumBitsPerAudioSample / 8;
74 const int frames = audio_packet_length_bytes / channels / bytes_per_sample_;
75 audio_bus_ = media::AudioBus::Create(channels, frames);
76 audio_bus_->Zero();
79 void CheckEventsConsistency() {
80 // Note: "!x || y" == "x implies y".
81 EXPECT_TRUE(!recognition_ended_ || recognition_started_);
82 EXPECT_TRUE(!audio_ended_ || audio_started_);
83 EXPECT_TRUE(!sound_ended_ || sound_started_);
84 EXPECT_TRUE(!audio_started_ || recognition_started_);
85 EXPECT_TRUE(!sound_started_ || audio_started_);
86 EXPECT_TRUE(!audio_ended_ || (sound_ended_ || !sound_started_));
87 EXPECT_TRUE(!recognition_ended_ || (audio_ended_ || !audio_started_));
90 void CheckFinalEventsConsistency() {
91 // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
92 EXPECT_FALSE(recognition_started_ ^ recognition_ended_);
93 EXPECT_FALSE(audio_started_ ^ audio_ended_);
94 EXPECT_FALSE(sound_started_ ^ sound_ended_);
97 // Overridden from SpeechRecognitionEventListener:
98 void OnAudioStart(int session_id) override {
99 audio_started_ = true;
100 CheckEventsConsistency();
103 void OnAudioEnd(int session_id) override {
104 audio_ended_ = true;
105 CheckEventsConsistency();
108 void OnRecognitionResults(int session_id,
109 const SpeechRecognitionResults& results) override {
110 result_received_ = true;
113 void OnRecognitionError(int session_id,
114 const SpeechRecognitionError& error) override {
115 EXPECT_TRUE(recognition_started_);
116 EXPECT_FALSE(recognition_ended_);
117 error_ = error.code;
120 void OnAudioLevelsChange(int session_id,
121 float volume,
122 float noise_volume) override {
123 volume_ = volume;
124 noise_volume_ = noise_volume;
127 void OnRecognitionEnd(int session_id) override {
128 recognition_ended_ = true;
129 CheckEventsConsistency();
132 void OnRecognitionStart(int session_id) override {
133 recognition_started_ = true;
134 CheckEventsConsistency();
137 void OnEnvironmentEstimationComplete(int session_id) override {}
139 void OnSoundStart(int session_id) override {
140 sound_started_ = true;
141 CheckEventsConsistency();
144 void OnSoundEnd(int session_id) override {
145 sound_ended_ = true;
146 CheckEventsConsistency();
149 // testing::Test methods.
150 void SetUp() override {
151 AudioInputController::set_factory_for_testing(
152 &audio_input_controller_factory_);
155 void TearDown() override {
156 AudioInputController::set_factory_for_testing(NULL);
159 void CopyPacketToAudioBus() {
160 // Copy the created signal into an audio bus in a deinterleaved format.
161 audio_bus_->FromInterleaved(
162 &audio_packet_[0], audio_bus_->frames(), bytes_per_sample_);
165 void FillPacketWithTestWaveform() {
166 // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
167 for (size_t i = 0; i < audio_packet_.size(); ++i)
168 audio_packet_[i] = static_cast<uint8>(i);
169 CopyPacketToAudioBus();
172 void FillPacketWithNoise() {
173 int value = 0;
174 int factor = 175;
175 for (size_t i = 0; i < audio_packet_.size(); ++i) {
176 value += factor;
177 audio_packet_[i] = value % 100;
179 CopyPacketToAudioBus();
182 protected:
183 base::MessageLoopForIO message_loop_;
184 BrowserThreadImpl io_thread_;
185 scoped_refptr<SpeechRecognizerImpl> recognizer_;
186 scoped_ptr<AudioManager> audio_manager_;
187 bool recognition_started_;
188 bool recognition_ended_;
189 bool result_received_;
190 bool audio_started_;
191 bool audio_ended_;
192 bool sound_started_;
193 bool sound_ended_;
194 SpeechRecognitionErrorCode error_;
195 net::TestURLFetcherFactory url_fetcher_factory_;
196 TestAudioInputControllerFactory audio_input_controller_factory_;
197 std::vector<uint8> audio_packet_;
198 scoped_ptr<media::AudioBus> audio_bus_;
199 int bytes_per_sample_;
200 float volume_;
201 float noise_volume_;
204 TEST_F(SpeechRecognizerImplTest, StopNoData) {
205 // Check for callbacks when stopping record before any audio gets recorded.
206 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
207 recognizer_->StopAudioCapture();
208 base::MessageLoop::current()->RunUntilIdle();
209 EXPECT_TRUE(recognition_started_);
210 EXPECT_FALSE(audio_started_);
211 EXPECT_FALSE(result_received_);
212 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
213 CheckFinalEventsConsistency();
216 TEST_F(SpeechRecognizerImplTest, CancelNoData) {
217 // Check for callbacks when canceling recognition before any audio gets
218 // recorded.
219 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
220 recognizer_->AbortRecognition();
221 base::MessageLoop::current()->RunUntilIdle();
222 EXPECT_TRUE(recognition_started_);
223 EXPECT_FALSE(audio_started_);
224 EXPECT_FALSE(result_received_);
225 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
226 CheckFinalEventsConsistency();
229 TEST_F(SpeechRecognizerImplTest, StopWithData) {
230 // Start recording, give some data and then stop. This should wait for the
231 // network callback to arrive before completion.
232 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
233 base::MessageLoop::current()->RunUntilIdle();
234 TestAudioInputController* controller =
235 audio_input_controller_factory_.controller();
236 ASSERT_TRUE(controller);
238 // Try sending 5 chunks of mock audio data and verify that each of them
239 // resulted immediately in a packet sent out via the network. This verifies
240 // that we are streaming out encoded data as chunks without waiting for the
241 // full recording to complete.
242 const size_t kNumChunks = 5;
243 for (size_t i = 0; i < kNumChunks; ++i) {
244 controller->event_handler()->OnData(controller, audio_bus_.get());
245 base::MessageLoop::current()->RunUntilIdle();
246 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
247 ASSERT_TRUE(fetcher);
248 EXPECT_EQ(i + 1, fetcher->upload_chunks().size());
251 recognizer_->StopAudioCapture();
252 base::MessageLoop::current()->RunUntilIdle();
253 EXPECT_TRUE(audio_started_);
254 EXPECT_TRUE(audio_ended_);
255 EXPECT_FALSE(recognition_ended_);
256 EXPECT_FALSE(result_received_);
257 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
259 // Issue the network callback to complete the process.
260 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
261 ASSERT_TRUE(fetcher);
263 fetcher->set_url(fetcher->GetOriginalURL());
264 net::URLRequestStatus status;
265 status.set_status(net::URLRequestStatus::SUCCESS);
266 fetcher->set_status(status);
267 fetcher->set_response_code(200);
268 fetcher->SetResponseString(
269 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
270 fetcher->delegate()->OnURLFetchComplete(fetcher);
271 base::MessageLoop::current()->RunUntilIdle();
272 EXPECT_TRUE(recognition_ended_);
273 EXPECT_TRUE(result_received_);
274 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
275 CheckFinalEventsConsistency();
278 TEST_F(SpeechRecognizerImplTest, CancelWithData) {
279 // Start recording, give some data and then cancel.
280 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
281 base::MessageLoop::current()->RunUntilIdle();
282 TestAudioInputController* controller =
283 audio_input_controller_factory_.controller();
284 ASSERT_TRUE(controller);
285 controller->event_handler()->OnData(controller, audio_bus_.get());
286 base::MessageLoop::current()->RunUntilIdle();
287 recognizer_->AbortRecognition();
288 base::MessageLoop::current()->RunUntilIdle();
289 ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
290 EXPECT_TRUE(recognition_started_);
291 EXPECT_TRUE(audio_started_);
292 EXPECT_FALSE(result_received_);
293 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
294 CheckFinalEventsConsistency();
297 TEST_F(SpeechRecognizerImplTest, ConnectionError) {
298 // Start recording, give some data and then stop. Issue the network callback
299 // with a connection error and verify that the recognizer bubbles the error up
300 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
301 base::MessageLoop::current()->RunUntilIdle();
302 TestAudioInputController* controller =
303 audio_input_controller_factory_.controller();
304 ASSERT_TRUE(controller);
305 controller->event_handler()->OnData(controller, audio_bus_.get());
306 base::MessageLoop::current()->RunUntilIdle();
307 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
308 ASSERT_TRUE(fetcher);
310 recognizer_->StopAudioCapture();
311 base::MessageLoop::current()->RunUntilIdle();
312 EXPECT_TRUE(audio_started_);
313 EXPECT_TRUE(audio_ended_);
314 EXPECT_FALSE(recognition_ended_);
315 EXPECT_FALSE(result_received_);
316 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
318 // Issue the network callback to complete the process.
319 fetcher->set_url(fetcher->GetOriginalURL());
320 net::URLRequestStatus status;
321 status.set_status(net::URLRequestStatus::FAILED);
322 status.set_error(net::ERR_CONNECTION_REFUSED);
323 fetcher->set_status(status);
324 fetcher->set_response_code(0);
325 fetcher->SetResponseString(std::string());
326 fetcher->delegate()->OnURLFetchComplete(fetcher);
327 base::MessageLoop::current()->RunUntilIdle();
328 EXPECT_TRUE(recognition_ended_);
329 EXPECT_FALSE(result_received_);
330 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
331 CheckFinalEventsConsistency();
334 TEST_F(SpeechRecognizerImplTest, ServerError) {
335 // Start recording, give some data and then stop. Issue the network callback
336 // with a 500 error and verify that the recognizer bubbles the error up
337 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
338 base::MessageLoop::current()->RunUntilIdle();
339 TestAudioInputController* controller =
340 audio_input_controller_factory_.controller();
341 ASSERT_TRUE(controller);
342 controller->event_handler()->OnData(controller, audio_bus_.get());
343 base::MessageLoop::current()->RunUntilIdle();
344 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
345 ASSERT_TRUE(fetcher);
347 recognizer_->StopAudioCapture();
348 base::MessageLoop::current()->RunUntilIdle();
349 EXPECT_TRUE(audio_started_);
350 EXPECT_TRUE(audio_ended_);
351 EXPECT_FALSE(recognition_ended_);
352 EXPECT_FALSE(result_received_);
353 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
355 // Issue the network callback to complete the process.
356 fetcher->set_url(fetcher->GetOriginalURL());
357 net::URLRequestStatus status;
358 status.set_status(net::URLRequestStatus::SUCCESS);
359 fetcher->set_status(status);
360 fetcher->set_response_code(500);
361 fetcher->SetResponseString("Internal Server Error");
362 fetcher->delegate()->OnURLFetchComplete(fetcher);
363 base::MessageLoop::current()->RunUntilIdle();
364 EXPECT_TRUE(recognition_ended_);
365 EXPECT_FALSE(result_received_);
366 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
367 CheckFinalEventsConsistency();
370 TEST_F(SpeechRecognizerImplTest, AudioControllerErrorNoData) {
371 // Check if things tear down properly if AudioInputController threw an error.
372 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
373 base::MessageLoop::current()->RunUntilIdle();
374 TestAudioInputController* controller =
375 audio_input_controller_factory_.controller();
376 ASSERT_TRUE(controller);
377 controller->event_handler()->OnError(controller,
378 AudioInputController::UNKNOWN_ERROR);
379 base::MessageLoop::current()->RunUntilIdle();
380 EXPECT_TRUE(recognition_started_);
381 EXPECT_FALSE(audio_started_);
382 EXPECT_FALSE(result_received_);
383 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO, error_);
384 CheckFinalEventsConsistency();
387 TEST_F(SpeechRecognizerImplTest, AudioControllerErrorWithData) {
388 // Check if things tear down properly if AudioInputController threw an error
389 // after giving some audio data.
390 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
391 base::MessageLoop::current()->RunUntilIdle();
392 TestAudioInputController* controller =
393 audio_input_controller_factory_.controller();
394 ASSERT_TRUE(controller);
395 controller->event_handler()->OnData(controller, audio_bus_.get());
396 controller->event_handler()->OnError(controller,
397 AudioInputController::UNKNOWN_ERROR);
398 base::MessageLoop::current()->RunUntilIdle();
399 ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
400 EXPECT_TRUE(recognition_started_);
401 EXPECT_TRUE(audio_started_);
402 EXPECT_FALSE(result_received_);
403 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO, error_);
404 CheckFinalEventsConsistency();
407 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) {
408 // Start recording and give a lot of packets with audio samples set to zero.
409 // This should trigger the no-speech detector and issue a callback.
410 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
411 base::MessageLoop::current()->RunUntilIdle();
412 TestAudioInputController* controller =
413 audio_input_controller_factory_.controller();
414 ASSERT_TRUE(controller);
416 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
417 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs + 1;
418 // The vector is already filled with zero value samples on create.
419 for (int i = 0; i < num_packets; ++i) {
420 controller->event_handler()->OnData(controller, audio_bus_.get());
422 base::MessageLoop::current()->RunUntilIdle();
423 EXPECT_TRUE(recognition_started_);
424 EXPECT_TRUE(audio_started_);
425 EXPECT_FALSE(result_received_);
426 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH, error_);
427 CheckFinalEventsConsistency();
430 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
431 // Start recording and give a lot of packets with audio samples set to zero
432 // and then some more with reasonably loud audio samples. This should be
433 // treated as normal speech input and the no-speech detector should not get
434 // triggered.
435 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
436 base::MessageLoop::current()->RunUntilIdle();
437 TestAudioInputController* controller =
438 audio_input_controller_factory_.controller();
439 ASSERT_TRUE(controller);
440 controller = audio_input_controller_factory_.controller();
441 ASSERT_TRUE(controller);
443 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
444 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
446 // The vector is already filled with zero value samples on create.
447 for (int i = 0; i < num_packets / 2; ++i) {
448 controller->event_handler()->OnData(controller, audio_bus_.get());
451 FillPacketWithTestWaveform();
452 for (int i = 0; i < num_packets / 2; ++i) {
453 controller->event_handler()->OnData(controller, audio_bus_.get());
456 base::MessageLoop::current()->RunUntilIdle();
457 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
458 EXPECT_TRUE(audio_started_);
459 EXPECT_FALSE(audio_ended_);
460 EXPECT_FALSE(recognition_ended_);
461 recognizer_->AbortRecognition();
462 base::MessageLoop::current()->RunUntilIdle();
463 CheckFinalEventsConsistency();
466 TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
467 // Start recording and give a lot of packets with audio samples set to zero
468 // and then some more with reasonably loud audio samples. Check that we don't
469 // get the callback during estimation phase, then get zero for the silence
470 // samples and proper volume for the loud audio.
471 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
472 base::MessageLoop::current()->RunUntilIdle();
473 TestAudioInputController* controller =
474 audio_input_controller_factory_.controller();
475 ASSERT_TRUE(controller);
476 controller = audio_input_controller_factory_.controller();
477 ASSERT_TRUE(controller);
479 // Feed some samples to begin with for the endpointer to do noise estimation.
480 int num_packets = SpeechRecognizerImpl::kEndpointerEstimationTimeMs /
481 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
482 FillPacketWithNoise();
483 for (int i = 0; i < num_packets; ++i) {
484 controller->event_handler()->OnData(controller, audio_bus_.get());
486 base::MessageLoop::current()->RunUntilIdle();
487 EXPECT_EQ(-1.0f, volume_); // No audio volume set yet.
489 // The vector is already filled with zero value samples on create.
490 controller->event_handler()->OnData(controller, audio_bus_.get());
491 base::MessageLoop::current()->RunUntilIdle();
492 EXPECT_FLOAT_EQ(0.74939233f, volume_);
494 FillPacketWithTestWaveform();
495 controller->event_handler()->OnData(controller, audio_bus_.get());
496 base::MessageLoop::current()->RunUntilIdle();
497 EXPECT_NEAR(0.89926866f, volume_, 0.00001f);
498 EXPECT_FLOAT_EQ(0.75071919f, noise_volume_);
500 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
501 EXPECT_FALSE(audio_ended_);
502 EXPECT_FALSE(recognition_ended_);
503 recognizer_->AbortRecognition();
504 base::MessageLoop::current()->RunUntilIdle();
505 CheckFinalEventsConsistency();
508 } // namespace content