Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / content / browser / speech / speech_recognizer_impl_unittest.cc
blob320ca8130ac99ee98f73bd0ea96f52d19013f6ce
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <vector>
7 #include "content/browser/browser_thread_impl.h"
8 #include "content/browser/speech/google_one_shot_remote_engine.h"
9 #include "content/browser/speech/speech_recognizer_impl.h"
10 #include "content/public/browser/speech_recognition_event_listener.h"
11 #include "media/audio/audio_manager_base.h"
12 #include "media/audio/fake_audio_input_stream.h"
13 #include "media/audio/fake_audio_output_stream.h"
14 #include "media/audio/mock_audio_manager.h"
15 #include "media/audio/test_audio_input_controller_factory.h"
16 #include "media/base/audio_bus.h"
17 #include "net/base/net_errors.h"
18 #include "net/url_request/test_url_fetcher_factory.h"
19 #include "net/url_request/url_request_status.h"
20 #include "testing/gtest/include/gtest/gtest.h"
22 using media::AudioInputController;
23 using media::AudioInputStream;
24 using media::AudioManager;
25 using media::AudioOutputStream;
26 using media::AudioParameters;
27 using media::TestAudioInputController;
28 using media::TestAudioInputControllerFactory;
30 namespace content {
32 class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
33 public testing::Test {
34 public:
35 SpeechRecognizerImplTest()
36 : io_thread_(BrowserThread::IO, &message_loop_),
37 recognition_started_(false),
38 recognition_ended_(false),
39 result_received_(false),
40 audio_started_(false),
41 audio_ended_(false),
42 sound_started_(false),
43 sound_ended_(false),
44 error_(SPEECH_RECOGNITION_ERROR_NONE),
45 volume_(-1.0f) {
46 // SpeechRecognizer takes ownership of sr_engine.
47 SpeechRecognitionEngine* sr_engine =
48 new GoogleOneShotRemoteEngine(NULL /* URLRequestContextGetter */);
49 SpeechRecognitionEngineConfig config;
50 config.audio_num_bits_per_sample =
51 SpeechRecognizerImpl::kNumBitsPerAudioSample;
52 config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate;
53 config.filter_profanities = false;
54 sr_engine->SetConfig(config);
56 const int kTestingSessionId = 1;
57 recognizer_ = new SpeechRecognizerImpl(
58 this, kTestingSessionId, false, false, sr_engine);
59 audio_manager_.reset(new media::MockAudioManager(
60 base::MessageLoop::current()->task_runner().get()));
61 recognizer_->SetAudioManagerForTesting(audio_manager_.get());
63 int audio_packet_length_bytes =
64 (SpeechRecognizerImpl::kAudioSampleRate *
65 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs *
66 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout) *
67 SpeechRecognizerImpl::kNumBitsPerAudioSample) / (8 * 1000);
68 audio_packet_.resize(audio_packet_length_bytes);
70 const int channels =
71 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout);
72 bytes_per_sample_ = SpeechRecognizerImpl::kNumBitsPerAudioSample / 8;
73 const int frames = audio_packet_length_bytes / channels / bytes_per_sample_;
74 audio_bus_ = media::AudioBus::Create(channels, frames);
75 audio_bus_->Zero();
78 void CheckEventsConsistency() {
79 // Note: "!x || y" == "x implies y".
80 EXPECT_TRUE(!recognition_ended_ || recognition_started_);
81 EXPECT_TRUE(!audio_ended_ || audio_started_);
82 EXPECT_TRUE(!sound_ended_ || sound_started_);
83 EXPECT_TRUE(!audio_started_ || recognition_started_);
84 EXPECT_TRUE(!sound_started_ || audio_started_);
85 EXPECT_TRUE(!audio_ended_ || (sound_ended_ || !sound_started_));
86 EXPECT_TRUE(!recognition_ended_ || (audio_ended_ || !audio_started_));
89 void CheckFinalEventsConsistency() {
90 // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
91 EXPECT_FALSE(recognition_started_ ^ recognition_ended_);
92 EXPECT_FALSE(audio_started_ ^ audio_ended_);
93 EXPECT_FALSE(sound_started_ ^ sound_ended_);
96 // Overridden from SpeechRecognitionEventListener:
97 void OnAudioStart(int session_id) override {
98 audio_started_ = true;
99 CheckEventsConsistency();
102 void OnAudioEnd(int session_id) override {
103 audio_ended_ = true;
104 CheckEventsConsistency();
107 void OnRecognitionResults(int session_id,
108 const SpeechRecognitionResults& results) override {
109 result_received_ = true;
112 void OnRecognitionError(int session_id,
113 const SpeechRecognitionError& error) override {
114 EXPECT_TRUE(recognition_started_);
115 EXPECT_FALSE(recognition_ended_);
116 error_ = error.code;
119 void OnAudioLevelsChange(int session_id,
120 float volume,
121 float noise_volume) override {
122 volume_ = volume;
123 noise_volume_ = noise_volume;
126 void OnRecognitionEnd(int session_id) override {
127 recognition_ended_ = true;
128 CheckEventsConsistency();
131 void OnRecognitionStart(int session_id) override {
132 recognition_started_ = true;
133 CheckEventsConsistency();
136 void OnEnvironmentEstimationComplete(int session_id) override {}
138 void OnSoundStart(int session_id) override {
139 sound_started_ = true;
140 CheckEventsConsistency();
143 void OnSoundEnd(int session_id) override {
144 sound_ended_ = true;
145 CheckEventsConsistency();
148 // testing::Test methods.
149 void SetUp() override {
150 AudioInputController::set_factory_for_testing(
151 &audio_input_controller_factory_);
154 void TearDown() override {
155 AudioInputController::set_factory_for_testing(NULL);
158 void CopyPacketToAudioBus() {
159 // Copy the created signal into an audio bus in a deinterleaved format.
160 audio_bus_->FromInterleaved(
161 &audio_packet_[0], audio_bus_->frames(), bytes_per_sample_);
164 void FillPacketWithTestWaveform() {
165 // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
166 for (size_t i = 0; i < audio_packet_.size(); ++i)
167 audio_packet_[i] = static_cast<uint8>(i);
168 CopyPacketToAudioBus();
171 void FillPacketWithNoise() {
172 int value = 0;
173 int factor = 175;
174 for (size_t i = 0; i < audio_packet_.size(); ++i) {
175 value += factor;
176 audio_packet_[i] = value % 100;
178 CopyPacketToAudioBus();
181 protected:
182 base::MessageLoopForIO message_loop_;
183 BrowserThreadImpl io_thread_;
184 scoped_refptr<SpeechRecognizerImpl> recognizer_;
185 scoped_ptr<AudioManager> audio_manager_;
186 bool recognition_started_;
187 bool recognition_ended_;
188 bool result_received_;
189 bool audio_started_;
190 bool audio_ended_;
191 bool sound_started_;
192 bool sound_ended_;
193 SpeechRecognitionErrorCode error_;
194 net::TestURLFetcherFactory url_fetcher_factory_;
195 TestAudioInputControllerFactory audio_input_controller_factory_;
196 std::vector<uint8> audio_packet_;
197 scoped_ptr<media::AudioBus> audio_bus_;
198 int bytes_per_sample_;
199 float volume_;
200 float noise_volume_;
203 TEST_F(SpeechRecognizerImplTest, StopNoData) {
204 // Check for callbacks when stopping record before any audio gets recorded.
205 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
206 recognizer_->StopAudioCapture();
207 base::MessageLoop::current()->RunUntilIdle();
208 EXPECT_TRUE(recognition_started_);
209 EXPECT_FALSE(audio_started_);
210 EXPECT_FALSE(result_received_);
211 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
212 CheckFinalEventsConsistency();
215 TEST_F(SpeechRecognizerImplTest, CancelNoData) {
216 // Check for callbacks when canceling recognition before any audio gets
217 // recorded.
218 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
219 recognizer_->AbortRecognition();
220 base::MessageLoop::current()->RunUntilIdle();
221 EXPECT_TRUE(recognition_started_);
222 EXPECT_FALSE(audio_started_);
223 EXPECT_FALSE(result_received_);
224 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
225 CheckFinalEventsConsistency();
228 TEST_F(SpeechRecognizerImplTest, StopWithData) {
229 // Start recording, give some data and then stop. This should wait for the
230 // network callback to arrive before completion.
231 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
232 base::MessageLoop::current()->RunUntilIdle();
233 TestAudioInputController* controller =
234 audio_input_controller_factory_.controller();
235 ASSERT_TRUE(controller);
237 // Try sending 5 chunks of mock audio data and verify that each of them
238 // resulted immediately in a packet sent out via the network. This verifies
239 // that we are streaming out encoded data as chunks without waiting for the
240 // full recording to complete.
241 const size_t kNumChunks = 5;
242 for (size_t i = 0; i < kNumChunks; ++i) {
243 controller->event_handler()->OnData(controller, audio_bus_.get());
244 base::MessageLoop::current()->RunUntilIdle();
245 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
246 ASSERT_TRUE(fetcher);
247 EXPECT_EQ(i + 1, fetcher->upload_chunks().size());
250 recognizer_->StopAudioCapture();
251 base::MessageLoop::current()->RunUntilIdle();
252 EXPECT_TRUE(audio_started_);
253 EXPECT_TRUE(audio_ended_);
254 EXPECT_FALSE(recognition_ended_);
255 EXPECT_FALSE(result_received_);
256 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
258 // Issue the network callback to complete the process.
259 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
260 ASSERT_TRUE(fetcher);
262 fetcher->set_url(fetcher->GetOriginalURL());
263 fetcher->set_status(net::URLRequestStatus());
264 fetcher->set_response_code(200);
265 fetcher->SetResponseString(
266 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
267 fetcher->delegate()->OnURLFetchComplete(fetcher);
268 base::MessageLoop::current()->RunUntilIdle();
269 EXPECT_TRUE(recognition_ended_);
270 EXPECT_TRUE(result_received_);
271 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
272 CheckFinalEventsConsistency();
275 TEST_F(SpeechRecognizerImplTest, CancelWithData) {
276 // Start recording, give some data and then cancel.
277 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
278 base::MessageLoop::current()->RunUntilIdle();
279 TestAudioInputController* controller =
280 audio_input_controller_factory_.controller();
281 ASSERT_TRUE(controller);
282 controller->event_handler()->OnData(controller, audio_bus_.get());
283 base::MessageLoop::current()->RunUntilIdle();
284 recognizer_->AbortRecognition();
285 base::MessageLoop::current()->RunUntilIdle();
286 ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
287 EXPECT_TRUE(recognition_started_);
288 EXPECT_TRUE(audio_started_);
289 EXPECT_FALSE(result_received_);
290 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
291 CheckFinalEventsConsistency();
294 TEST_F(SpeechRecognizerImplTest, ConnectionError) {
295 // Start recording, give some data and then stop. Issue the network callback
296 // with a connection error and verify that the recognizer bubbles the error up
297 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
298 base::MessageLoop::current()->RunUntilIdle();
299 TestAudioInputController* controller =
300 audio_input_controller_factory_.controller();
301 ASSERT_TRUE(controller);
302 controller->event_handler()->OnData(controller, audio_bus_.get());
303 base::MessageLoop::current()->RunUntilIdle();
304 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
305 ASSERT_TRUE(fetcher);
307 recognizer_->StopAudioCapture();
308 base::MessageLoop::current()->RunUntilIdle();
309 EXPECT_TRUE(audio_started_);
310 EXPECT_TRUE(audio_ended_);
311 EXPECT_FALSE(recognition_ended_);
312 EXPECT_FALSE(result_received_);
313 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
315 // Issue the network callback to complete the process.
316 fetcher->set_url(fetcher->GetOriginalURL());
317 fetcher->set_status(
318 net::URLRequestStatus::FromError(net::ERR_CONNECTION_REFUSED));
319 fetcher->set_response_code(0);
320 fetcher->SetResponseString(std::string());
321 fetcher->delegate()->OnURLFetchComplete(fetcher);
322 base::MessageLoop::current()->RunUntilIdle();
323 EXPECT_TRUE(recognition_ended_);
324 EXPECT_FALSE(result_received_);
325 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
326 CheckFinalEventsConsistency();
329 TEST_F(SpeechRecognizerImplTest, ServerError) {
330 // Start recording, give some data and then stop. Issue the network callback
331 // with a 500 error and verify that the recognizer bubbles the error up
332 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
333 base::MessageLoop::current()->RunUntilIdle();
334 TestAudioInputController* controller =
335 audio_input_controller_factory_.controller();
336 ASSERT_TRUE(controller);
337 controller->event_handler()->OnData(controller, audio_bus_.get());
338 base::MessageLoop::current()->RunUntilIdle();
339 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
340 ASSERT_TRUE(fetcher);
342 recognizer_->StopAudioCapture();
343 base::MessageLoop::current()->RunUntilIdle();
344 EXPECT_TRUE(audio_started_);
345 EXPECT_TRUE(audio_ended_);
346 EXPECT_FALSE(recognition_ended_);
347 EXPECT_FALSE(result_received_);
348 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
350 // Issue the network callback to complete the process.
351 fetcher->set_url(fetcher->GetOriginalURL());
352 fetcher->set_status(net::URLRequestStatus());
353 fetcher->set_response_code(500);
354 fetcher->SetResponseString("Internal Server Error");
355 fetcher->delegate()->OnURLFetchComplete(fetcher);
356 base::MessageLoop::current()->RunUntilIdle();
357 EXPECT_TRUE(recognition_ended_);
358 EXPECT_FALSE(result_received_);
359 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
360 CheckFinalEventsConsistency();
363 TEST_F(SpeechRecognizerImplTest, AudioControllerErrorNoData) {
364 // Check if things tear down properly if AudioInputController threw an error.
365 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
366 base::MessageLoop::current()->RunUntilIdle();
367 TestAudioInputController* controller =
368 audio_input_controller_factory_.controller();
369 ASSERT_TRUE(controller);
370 controller->event_handler()->OnError(controller,
371 AudioInputController::UNKNOWN_ERROR);
372 base::MessageLoop::current()->RunUntilIdle();
373 EXPECT_TRUE(recognition_started_);
374 EXPECT_FALSE(audio_started_);
375 EXPECT_FALSE(result_received_);
376 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE, error_);
377 CheckFinalEventsConsistency();
380 TEST_F(SpeechRecognizerImplTest, AudioControllerErrorWithData) {
381 // Check if things tear down properly if AudioInputController threw an error
382 // after giving some audio data.
383 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
384 base::MessageLoop::current()->RunUntilIdle();
385 TestAudioInputController* controller =
386 audio_input_controller_factory_.controller();
387 ASSERT_TRUE(controller);
388 controller->event_handler()->OnData(controller, audio_bus_.get());
389 controller->event_handler()->OnError(controller,
390 AudioInputController::UNKNOWN_ERROR);
391 base::MessageLoop::current()->RunUntilIdle();
392 ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
393 EXPECT_TRUE(recognition_started_);
394 EXPECT_TRUE(audio_started_);
395 EXPECT_FALSE(result_received_);
396 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE, error_);
397 CheckFinalEventsConsistency();
400 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) {
401 // Start recording and give a lot of packets with audio samples set to zero.
402 // This should trigger the no-speech detector and issue a callback.
403 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
404 base::MessageLoop::current()->RunUntilIdle();
405 TestAudioInputController* controller =
406 audio_input_controller_factory_.controller();
407 ASSERT_TRUE(controller);
409 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
410 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs + 1;
411 // The vector is already filled with zero value samples on create.
412 for (int i = 0; i < num_packets; ++i) {
413 controller->event_handler()->OnData(controller, audio_bus_.get());
415 base::MessageLoop::current()->RunUntilIdle();
416 EXPECT_TRUE(recognition_started_);
417 EXPECT_TRUE(audio_started_);
418 EXPECT_FALSE(result_received_);
419 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH, error_);
420 CheckFinalEventsConsistency();
423 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
424 // Start recording and give a lot of packets with audio samples set to zero
425 // and then some more with reasonably loud audio samples. This should be
426 // treated as normal speech input and the no-speech detector should not get
427 // triggered.
428 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
429 base::MessageLoop::current()->RunUntilIdle();
430 TestAudioInputController* controller =
431 audio_input_controller_factory_.controller();
432 ASSERT_TRUE(controller);
433 controller = audio_input_controller_factory_.controller();
434 ASSERT_TRUE(controller);
436 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
437 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
439 // The vector is already filled with zero value samples on create.
440 for (int i = 0; i < num_packets / 2; ++i) {
441 controller->event_handler()->OnData(controller, audio_bus_.get());
444 FillPacketWithTestWaveform();
445 for (int i = 0; i < num_packets / 2; ++i) {
446 controller->event_handler()->OnData(controller, audio_bus_.get());
449 base::MessageLoop::current()->RunUntilIdle();
450 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
451 EXPECT_TRUE(audio_started_);
452 EXPECT_FALSE(audio_ended_);
453 EXPECT_FALSE(recognition_ended_);
454 recognizer_->AbortRecognition();
455 base::MessageLoop::current()->RunUntilIdle();
456 CheckFinalEventsConsistency();
459 TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
460 // Start recording and give a lot of packets with audio samples set to zero
461 // and then some more with reasonably loud audio samples. Check that we don't
462 // get the callback during estimation phase, then get zero for the silence
463 // samples and proper volume for the loud audio.
464 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
465 base::MessageLoop::current()->RunUntilIdle();
466 TestAudioInputController* controller =
467 audio_input_controller_factory_.controller();
468 ASSERT_TRUE(controller);
469 controller = audio_input_controller_factory_.controller();
470 ASSERT_TRUE(controller);
472 // Feed some samples to begin with for the endpointer to do noise estimation.
473 int num_packets = SpeechRecognizerImpl::kEndpointerEstimationTimeMs /
474 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
475 FillPacketWithNoise();
476 for (int i = 0; i < num_packets; ++i) {
477 controller->event_handler()->OnData(controller, audio_bus_.get());
479 base::MessageLoop::current()->RunUntilIdle();
480 EXPECT_EQ(-1.0f, volume_); // No audio volume set yet.
482 // The vector is already filled with zero value samples on create.
483 controller->event_handler()->OnData(controller, audio_bus_.get());
484 base::MessageLoop::current()->RunUntilIdle();
485 EXPECT_FLOAT_EQ(0.74939233f, volume_);
487 FillPacketWithTestWaveform();
488 controller->event_handler()->OnData(controller, audio_bus_.get());
489 base::MessageLoop::current()->RunUntilIdle();
490 EXPECT_NEAR(0.89926866f, volume_, 0.00001f);
491 EXPECT_FLOAT_EQ(0.75071919f, noise_volume_);
493 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
494 EXPECT_FALSE(audio_ended_);
495 EXPECT_FALSE(recognition_ended_);
496 recognizer_->AbortRecognition();
497 base::MessageLoop::current()->RunUntilIdle();
498 CheckFinalEventsConsistency();
501 } // namespace content