Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / content / browser / speech / speech_recognizer_impl_unittest.cc
blob13d5eba7dfc14a32db8595f3eea57b6bb02e1329
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <vector>
7 #include "content/browser/browser_thread_impl.h"
8 #include "content/browser/speech/google_one_shot_remote_engine.h"
9 #include "content/browser/speech/speech_recognizer_impl.h"
10 #include "content/public/browser/speech_recognition_event_listener.h"
11 #include "media/audio/audio_manager_base.h"
12 #include "media/audio/fake_audio_input_stream.h"
13 #include "media/audio/fake_audio_output_stream.h"
14 #include "media/audio/mock_audio_manager.h"
15 #include "media/audio/test_audio_input_controller_factory.h"
16 #include "media/base/audio_bus.h"
17 #include "net/base/net_errors.h"
18 #include "net/url_request/test_url_fetcher_factory.h"
19 #include "net/url_request/url_request_status.h"
20 #include "testing/gtest/include/gtest/gtest.h"
22 using base::MessageLoopProxy;
23 using media::AudioInputController;
24 using media::AudioInputStream;
25 using media::AudioManager;
26 using media::AudioOutputStream;
27 using media::AudioParameters;
28 using media::TestAudioInputController;
29 using media::TestAudioInputControllerFactory;
31 namespace content {
33 class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
34 public testing::Test {
35 public:
36 SpeechRecognizerImplTest()
37 : io_thread_(BrowserThread::IO, &message_loop_),
38 recognition_started_(false),
39 recognition_ended_(false),
40 result_received_(false),
41 audio_started_(false),
42 audio_ended_(false),
43 sound_started_(false),
44 sound_ended_(false),
45 error_(SPEECH_RECOGNITION_ERROR_NONE),
46 volume_(-1.0f) {
47 // SpeechRecognizer takes ownership of sr_engine.
48 SpeechRecognitionEngine* sr_engine =
49 new GoogleOneShotRemoteEngine(NULL /* URLRequestContextGetter */);
50 SpeechRecognitionEngineConfig config;
51 config.audio_num_bits_per_sample =
52 SpeechRecognizerImpl::kNumBitsPerAudioSample;
53 config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate;
54 config.filter_profanities = false;
55 sr_engine->SetConfig(config);
57 const int kTestingSessionId = 1;
58 recognizer_ = new SpeechRecognizerImpl(
59 this, kTestingSessionId, false, false, sr_engine);
60 audio_manager_.reset(new media::MockAudioManager(
61 base::MessageLoop::current()->message_loop_proxy().get()));
62 recognizer_->SetAudioManagerForTesting(audio_manager_.get());
64 int audio_packet_length_bytes =
65 (SpeechRecognizerImpl::kAudioSampleRate *
66 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs *
67 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout) *
68 SpeechRecognizerImpl::kNumBitsPerAudioSample) / (8 * 1000);
69 audio_packet_.resize(audio_packet_length_bytes);
71 const int channels =
72 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout);
73 bytes_per_sample_ = SpeechRecognizerImpl::kNumBitsPerAudioSample / 8;
74 const int frames = audio_packet_length_bytes / channels / bytes_per_sample_;
75 audio_bus_ = media::AudioBus::Create(channels, frames);
76 audio_bus_->Zero();
79 void CheckEventsConsistency() {
80 // Note: "!x || y" == "x implies y".
81 EXPECT_TRUE(!recognition_ended_ || recognition_started_);
82 EXPECT_TRUE(!audio_ended_ || audio_started_);
83 EXPECT_TRUE(!sound_ended_ || sound_started_);
84 EXPECT_TRUE(!audio_started_ || recognition_started_);
85 EXPECT_TRUE(!sound_started_ || audio_started_);
86 EXPECT_TRUE(!audio_ended_ || (sound_ended_ || !sound_started_));
87 EXPECT_TRUE(!recognition_ended_ || (audio_ended_ || !audio_started_));
90 void CheckFinalEventsConsistency() {
91 // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
92 EXPECT_FALSE(recognition_started_ ^ recognition_ended_);
93 EXPECT_FALSE(audio_started_ ^ audio_ended_);
94 EXPECT_FALSE(sound_started_ ^ sound_ended_);
97 // Overridden from SpeechRecognitionEventListener:
98 virtual void OnAudioStart(int session_id) OVERRIDE {
99 audio_started_ = true;
100 CheckEventsConsistency();
103 virtual void OnAudioEnd(int session_id) OVERRIDE {
104 audio_ended_ = true;
105 CheckEventsConsistency();
108 virtual void OnRecognitionResults(
109 int session_id, const SpeechRecognitionResults& results) OVERRIDE {
110 result_received_ = true;
113 virtual void OnRecognitionError(
114 int session_id, const SpeechRecognitionError& error) OVERRIDE {
115 EXPECT_TRUE(recognition_started_);
116 EXPECT_FALSE(recognition_ended_);
117 error_ = error.code;
120 virtual void OnAudioLevelsChange(int session_id, float volume,
121 float noise_volume) OVERRIDE {
122 volume_ = volume;
123 noise_volume_ = noise_volume;
126 virtual void OnRecognitionEnd(int session_id) OVERRIDE {
127 recognition_ended_ = true;
128 CheckEventsConsistency();
131 virtual void OnRecognitionStart(int session_id) OVERRIDE {
132 recognition_started_ = true;
133 CheckEventsConsistency();
136 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE {}
138 virtual void OnSoundStart(int session_id) OVERRIDE {
139 sound_started_ = true;
140 CheckEventsConsistency();
143 virtual void OnSoundEnd(int session_id) OVERRIDE {
144 sound_ended_ = true;
145 CheckEventsConsistency();
148 // testing::Test methods.
149 virtual void SetUp() OVERRIDE {
150 AudioInputController::set_factory_for_testing(
151 &audio_input_controller_factory_);
154 virtual void TearDown() OVERRIDE {
155 AudioInputController::set_factory_for_testing(NULL);
158 void CopyPacketToAudioBus() {
159 // Copy the created signal into an audio bus in a deinterleaved format.
160 audio_bus_->FromInterleaved(
161 &audio_packet_[0], audio_bus_->frames(), bytes_per_sample_);
164 void FillPacketWithTestWaveform() {
165 // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
166 for (size_t i = 0; i < audio_packet_.size(); ++i)
167 audio_packet_[i] = static_cast<uint8>(i);
168 CopyPacketToAudioBus();
171 void FillPacketWithNoise() {
172 int value = 0;
173 int factor = 175;
174 for (size_t i = 0; i < audio_packet_.size(); ++i) {
175 value += factor;
176 audio_packet_[i] = value % 100;
178 CopyPacketToAudioBus();
181 protected:
182 base::MessageLoopForIO message_loop_;
183 BrowserThreadImpl io_thread_;
184 scoped_refptr<SpeechRecognizerImpl> recognizer_;
185 scoped_ptr<AudioManager> audio_manager_;
186 bool recognition_started_;
187 bool recognition_ended_;
188 bool result_received_;
189 bool audio_started_;
190 bool audio_ended_;
191 bool sound_started_;
192 bool sound_ended_;
193 SpeechRecognitionErrorCode error_;
194 net::TestURLFetcherFactory url_fetcher_factory_;
195 TestAudioInputControllerFactory audio_input_controller_factory_;
196 std::vector<uint8> audio_packet_;
197 scoped_ptr<media::AudioBus> audio_bus_;
198 int bytes_per_sample_;
199 float volume_;
200 float noise_volume_;
203 TEST_F(SpeechRecognizerImplTest, StopNoData) {
204 // Check for callbacks when stopping record before any audio gets recorded.
205 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
206 recognizer_->StopAudioCapture();
207 base::MessageLoop::current()->RunUntilIdle();
208 EXPECT_TRUE(recognition_started_);
209 EXPECT_FALSE(audio_started_);
210 EXPECT_FALSE(result_received_);
211 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
212 CheckFinalEventsConsistency();
215 TEST_F(SpeechRecognizerImplTest, CancelNoData) {
216 // Check for callbacks when canceling recognition before any audio gets
217 // recorded.
218 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
219 recognizer_->AbortRecognition();
220 base::MessageLoop::current()->RunUntilIdle();
221 EXPECT_TRUE(recognition_started_);
222 EXPECT_FALSE(audio_started_);
223 EXPECT_FALSE(result_received_);
224 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
225 CheckFinalEventsConsistency();
228 TEST_F(SpeechRecognizerImplTest, StopWithData) {
229 // Start recording, give some data and then stop. This should wait for the
230 // network callback to arrive before completion.
231 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
232 base::MessageLoop::current()->RunUntilIdle();
233 TestAudioInputController* controller =
234 audio_input_controller_factory_.controller();
235 ASSERT_TRUE(controller);
237 // Try sending 5 chunks of mock audio data and verify that each of them
238 // resulted immediately in a packet sent out via the network. This verifies
239 // that we are streaming out encoded data as chunks without waiting for the
240 // full recording to complete.
241 const size_t kNumChunks = 5;
242 for (size_t i = 0; i < kNumChunks; ++i) {
243 controller->event_handler()->OnData(controller, audio_bus_.get());
244 base::MessageLoop::current()->RunUntilIdle();
245 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
246 ASSERT_TRUE(fetcher);
247 EXPECT_EQ(i + 1, fetcher->upload_chunks().size());
250 recognizer_->StopAudioCapture();
251 base::MessageLoop::current()->RunUntilIdle();
252 EXPECT_TRUE(audio_started_);
253 EXPECT_TRUE(audio_ended_);
254 EXPECT_FALSE(recognition_ended_);
255 EXPECT_FALSE(result_received_);
256 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
258 // Issue the network callback to complete the process.
259 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
260 ASSERT_TRUE(fetcher);
262 fetcher->set_url(fetcher->GetOriginalURL());
263 net::URLRequestStatus status;
264 status.set_status(net::URLRequestStatus::SUCCESS);
265 fetcher->set_status(status);
266 fetcher->set_response_code(200);
267 fetcher->SetResponseString(
268 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
269 fetcher->delegate()->OnURLFetchComplete(fetcher);
270 base::MessageLoop::current()->RunUntilIdle();
271 EXPECT_TRUE(recognition_ended_);
272 EXPECT_TRUE(result_received_);
273 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
274 CheckFinalEventsConsistency();
277 TEST_F(SpeechRecognizerImplTest, CancelWithData) {
278 // Start recording, give some data and then cancel.
279 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
280 base::MessageLoop::current()->RunUntilIdle();
281 TestAudioInputController* controller =
282 audio_input_controller_factory_.controller();
283 ASSERT_TRUE(controller);
284 controller->event_handler()->OnData(controller, audio_bus_.get());
285 base::MessageLoop::current()->RunUntilIdle();
286 recognizer_->AbortRecognition();
287 base::MessageLoop::current()->RunUntilIdle();
288 ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
289 EXPECT_TRUE(recognition_started_);
290 EXPECT_TRUE(audio_started_);
291 EXPECT_FALSE(result_received_);
292 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
293 CheckFinalEventsConsistency();
296 TEST_F(SpeechRecognizerImplTest, ConnectionError) {
297 // Start recording, give some data and then stop. Issue the network callback
298 // with a connection error and verify that the recognizer bubbles the error up
299 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
300 base::MessageLoop::current()->RunUntilIdle();
301 TestAudioInputController* controller =
302 audio_input_controller_factory_.controller();
303 ASSERT_TRUE(controller);
304 controller->event_handler()->OnData(controller, audio_bus_.get());
305 base::MessageLoop::current()->RunUntilIdle();
306 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
307 ASSERT_TRUE(fetcher);
309 recognizer_->StopAudioCapture();
310 base::MessageLoop::current()->RunUntilIdle();
311 EXPECT_TRUE(audio_started_);
312 EXPECT_TRUE(audio_ended_);
313 EXPECT_FALSE(recognition_ended_);
314 EXPECT_FALSE(result_received_);
315 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
317 // Issue the network callback to complete the process.
318 fetcher->set_url(fetcher->GetOriginalURL());
319 net::URLRequestStatus status;
320 status.set_status(net::URLRequestStatus::FAILED);
321 status.set_error(net::ERR_CONNECTION_REFUSED);
322 fetcher->set_status(status);
323 fetcher->set_response_code(0);
324 fetcher->SetResponseString(std::string());
325 fetcher->delegate()->OnURLFetchComplete(fetcher);
326 base::MessageLoop::current()->RunUntilIdle();
327 EXPECT_TRUE(recognition_ended_);
328 EXPECT_FALSE(result_received_);
329 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
330 CheckFinalEventsConsistency();
333 TEST_F(SpeechRecognizerImplTest, ServerError) {
334 // Start recording, give some data and then stop. Issue the network callback
335 // with a 500 error and verify that the recognizer bubbles the error up
336 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
337 base::MessageLoop::current()->RunUntilIdle();
338 TestAudioInputController* controller =
339 audio_input_controller_factory_.controller();
340 ASSERT_TRUE(controller);
341 controller->event_handler()->OnData(controller, audio_bus_.get());
342 base::MessageLoop::current()->RunUntilIdle();
343 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
344 ASSERT_TRUE(fetcher);
346 recognizer_->StopAudioCapture();
347 base::MessageLoop::current()->RunUntilIdle();
348 EXPECT_TRUE(audio_started_);
349 EXPECT_TRUE(audio_ended_);
350 EXPECT_FALSE(recognition_ended_);
351 EXPECT_FALSE(result_received_);
352 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
354 // Issue the network callback to complete the process.
355 fetcher->set_url(fetcher->GetOriginalURL());
356 net::URLRequestStatus status;
357 status.set_status(net::URLRequestStatus::SUCCESS);
358 fetcher->set_status(status);
359 fetcher->set_response_code(500);
360 fetcher->SetResponseString("Internal Server Error");
361 fetcher->delegate()->OnURLFetchComplete(fetcher);
362 base::MessageLoop::current()->RunUntilIdle();
363 EXPECT_TRUE(recognition_ended_);
364 EXPECT_FALSE(result_received_);
365 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
366 CheckFinalEventsConsistency();
369 TEST_F(SpeechRecognizerImplTest, AudioControllerErrorNoData) {
370 // Check if things tear down properly if AudioInputController threw an error.
371 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
372 base::MessageLoop::current()->RunUntilIdle();
373 TestAudioInputController* controller =
374 audio_input_controller_factory_.controller();
375 ASSERT_TRUE(controller);
376 controller->event_handler()->OnError(controller,
377 AudioInputController::UNKNOWN_ERROR);
378 base::MessageLoop::current()->RunUntilIdle();
379 EXPECT_TRUE(recognition_started_);
380 EXPECT_FALSE(audio_started_);
381 EXPECT_FALSE(result_received_);
382 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO, error_);
383 CheckFinalEventsConsistency();
386 TEST_F(SpeechRecognizerImplTest, AudioControllerErrorWithData) {
387 // Check if things tear down properly if AudioInputController threw an error
388 // after giving some audio data.
389 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
390 base::MessageLoop::current()->RunUntilIdle();
391 TestAudioInputController* controller =
392 audio_input_controller_factory_.controller();
393 ASSERT_TRUE(controller);
394 controller->event_handler()->OnData(controller, audio_bus_.get());
395 controller->event_handler()->OnError(controller,
396 AudioInputController::UNKNOWN_ERROR);
397 base::MessageLoop::current()->RunUntilIdle();
398 ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
399 EXPECT_TRUE(recognition_started_);
400 EXPECT_TRUE(audio_started_);
401 EXPECT_FALSE(result_received_);
402 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO, error_);
403 CheckFinalEventsConsistency();
406 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) {
407 // Start recording and give a lot of packets with audio samples set to zero.
408 // This should trigger the no-speech detector and issue a callback.
409 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
410 base::MessageLoop::current()->RunUntilIdle();
411 TestAudioInputController* controller =
412 audio_input_controller_factory_.controller();
413 ASSERT_TRUE(controller);
415 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
416 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs + 1;
417 // The vector is already filled with zero value samples on create.
418 for (int i = 0; i < num_packets; ++i) {
419 controller->event_handler()->OnData(controller, audio_bus_.get());
421 base::MessageLoop::current()->RunUntilIdle();
422 EXPECT_TRUE(recognition_started_);
423 EXPECT_TRUE(audio_started_);
424 EXPECT_FALSE(result_received_);
425 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH, error_);
426 CheckFinalEventsConsistency();
429 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
430 // Start recording and give a lot of packets with audio samples set to zero
431 // and then some more with reasonably loud audio samples. This should be
432 // treated as normal speech input and the no-speech detector should not get
433 // triggered.
434 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
435 base::MessageLoop::current()->RunUntilIdle();
436 TestAudioInputController* controller =
437 audio_input_controller_factory_.controller();
438 ASSERT_TRUE(controller);
439 controller = audio_input_controller_factory_.controller();
440 ASSERT_TRUE(controller);
442 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
443 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
445 // The vector is already filled with zero value samples on create.
446 for (int i = 0; i < num_packets / 2; ++i) {
447 controller->event_handler()->OnData(controller, audio_bus_.get());
450 FillPacketWithTestWaveform();
451 for (int i = 0; i < num_packets / 2; ++i) {
452 controller->event_handler()->OnData(controller, audio_bus_.get());
455 base::MessageLoop::current()->RunUntilIdle();
456 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
457 EXPECT_TRUE(audio_started_);
458 EXPECT_FALSE(audio_ended_);
459 EXPECT_FALSE(recognition_ended_);
460 recognizer_->AbortRecognition();
461 base::MessageLoop::current()->RunUntilIdle();
462 CheckFinalEventsConsistency();
465 TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
466 // Start recording and give a lot of packets with audio samples set to zero
467 // and then some more with reasonably loud audio samples. Check that we don't
468 // get the callback during estimation phase, then get zero for the silence
469 // samples and proper volume for the loud audio.
470 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
471 base::MessageLoop::current()->RunUntilIdle();
472 TestAudioInputController* controller =
473 audio_input_controller_factory_.controller();
474 ASSERT_TRUE(controller);
475 controller = audio_input_controller_factory_.controller();
476 ASSERT_TRUE(controller);
478 // Feed some samples to begin with for the endpointer to do noise estimation.
479 int num_packets = SpeechRecognizerImpl::kEndpointerEstimationTimeMs /
480 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
481 FillPacketWithNoise();
482 for (int i = 0; i < num_packets; ++i) {
483 controller->event_handler()->OnData(controller, audio_bus_.get());
485 base::MessageLoop::current()->RunUntilIdle();
486 EXPECT_EQ(-1.0f, volume_); // No audio volume set yet.
488 // The vector is already filled with zero value samples on create.
489 controller->event_handler()->OnData(controller, audio_bus_.get());
490 base::MessageLoop::current()->RunUntilIdle();
491 EXPECT_FLOAT_EQ(0.74939233f, volume_);
493 FillPacketWithTestWaveform();
494 controller->event_handler()->OnData(controller, audio_bus_.get());
495 base::MessageLoop::current()->RunUntilIdle();
496 EXPECT_NEAR(0.89926866f, volume_, 0.00001f);
497 EXPECT_FLOAT_EQ(0.75071919f, noise_volume_);
499 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
500 EXPECT_FALSE(audio_ended_);
501 EXPECT_FALSE(recognition_ended_);
502 recognizer_->AbortRecognition();
503 base::MessageLoop::current()->RunUntilIdle();
504 CheckFinalEventsConsistency();
507 } // namespace content