Update ASan/Android runtime and setup script to LLVM r200682.
[chromium-blink-merge.git] / content / browser / speech / speech_recognizer_impl_unittest.cc
blob95da469a4350f0b9700450731994313d26cdf830
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <vector>
7 #include "content/browser/browser_thread_impl.h"
8 #include "content/browser/speech/google_one_shot_remote_engine.h"
9 #include "content/browser/speech/speech_recognizer_impl.h"
10 #include "content/public/browser/speech_recognition_event_listener.h"
11 #include "media/audio/audio_manager_base.h"
12 #include "media/audio/fake_audio_input_stream.h"
13 #include "media/audio/fake_audio_output_stream.h"
14 #include "media/audio/mock_audio_manager.h"
15 #include "media/audio/test_audio_input_controller_factory.h"
16 #include "net/base/net_errors.h"
17 #include "net/url_request/test_url_fetcher_factory.h"
18 #include "net/url_request/url_request_status.h"
19 #include "testing/gtest/include/gtest/gtest.h"
21 using base::MessageLoopProxy;
22 using media::AudioInputController;
23 using media::AudioInputStream;
24 using media::AudioManager;
25 using media::AudioOutputStream;
26 using media::AudioParameters;
27 using media::TestAudioInputController;
28 using media::TestAudioInputControllerFactory;
30 namespace content {
32 class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
33 public testing::Test {
34 public:
35 SpeechRecognizerImplTest()
36 : io_thread_(BrowserThread::IO, &message_loop_),
37 recognition_started_(false),
38 recognition_ended_(false),
39 result_received_(false),
40 audio_started_(false),
41 audio_ended_(false),
42 sound_started_(false),
43 sound_ended_(false),
44 error_(SPEECH_RECOGNITION_ERROR_NONE),
45 volume_(-1.0f) {
46 // SpeechRecognizer takes ownership of sr_engine.
47 SpeechRecognitionEngine* sr_engine =
48 new GoogleOneShotRemoteEngine(NULL /* URLRequestContextGetter */);
49 SpeechRecognitionEngineConfig config;
50 config.audio_num_bits_per_sample =
51 SpeechRecognizerImpl::kNumBitsPerAudioSample;
52 config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate;
53 config.filter_profanities = false;
54 sr_engine->SetConfig(config);
56 const int kTestingSessionId = 1;
57 recognizer_ = new SpeechRecognizerImpl(
58 this, kTestingSessionId, false, false, sr_engine);
59 audio_manager_.reset(new media::MockAudioManager(
60 base::MessageLoop::current()->message_loop_proxy().get()));
61 recognizer_->SetAudioManagerForTesting(audio_manager_.get());
63 int audio_packet_length_bytes =
64 (SpeechRecognizerImpl::kAudioSampleRate *
65 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs *
66 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout) *
67 SpeechRecognizerImpl::kNumBitsPerAudioSample) / (8 * 1000);
68 audio_packet_.resize(audio_packet_length_bytes);
71 void CheckEventsConsistency() {
72 // Note: "!x || y" == "x implies y".
73 EXPECT_TRUE(!recognition_ended_ || recognition_started_);
74 EXPECT_TRUE(!audio_ended_ || audio_started_);
75 EXPECT_TRUE(!sound_ended_ || sound_started_);
76 EXPECT_TRUE(!audio_started_ || recognition_started_);
77 EXPECT_TRUE(!sound_started_ || audio_started_);
78 EXPECT_TRUE(!audio_ended_ || (sound_ended_ || !sound_started_));
79 EXPECT_TRUE(!recognition_ended_ || (audio_ended_ || !audio_started_));
82 void CheckFinalEventsConsistency() {
83 // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
84 EXPECT_FALSE(recognition_started_ ^ recognition_ended_);
85 EXPECT_FALSE(audio_started_ ^ audio_ended_);
86 EXPECT_FALSE(sound_started_ ^ sound_ended_);
89 // Overridden from SpeechRecognitionEventListener:
90 virtual void OnAudioStart(int session_id) OVERRIDE {
91 audio_started_ = true;
92 CheckEventsConsistency();
95 virtual void OnAudioEnd(int session_id) OVERRIDE {
96 audio_ended_ = true;
97 CheckEventsConsistency();
100 virtual void OnRecognitionResults(
101 int session_id, const SpeechRecognitionResults& results) OVERRIDE {
102 result_received_ = true;
105 virtual void OnRecognitionError(
106 int session_id, const SpeechRecognitionError& error) OVERRIDE {
107 EXPECT_TRUE(recognition_started_);
108 EXPECT_FALSE(recognition_ended_);
109 error_ = error.code;
112 virtual void OnAudioLevelsChange(int session_id, float volume,
113 float noise_volume) OVERRIDE {
114 volume_ = volume;
115 noise_volume_ = noise_volume;
118 virtual void OnRecognitionEnd(int session_id) OVERRIDE {
119 recognition_ended_ = true;
120 CheckEventsConsistency();
123 virtual void OnRecognitionStart(int session_id) OVERRIDE {
124 recognition_started_ = true;
125 CheckEventsConsistency();
128 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE {}
130 virtual void OnSoundStart(int session_id) OVERRIDE {
131 sound_started_ = true;
132 CheckEventsConsistency();
135 virtual void OnSoundEnd(int session_id) OVERRIDE {
136 sound_ended_ = true;
137 CheckEventsConsistency();
140 // testing::Test methods.
141 virtual void SetUp() OVERRIDE {
142 AudioInputController::set_factory_for_testing(
143 &audio_input_controller_factory_);
146 virtual void TearDown() OVERRIDE {
147 AudioInputController::set_factory_for_testing(NULL);
150 void FillPacketWithTestWaveform() {
151 // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
152 for (size_t i = 0; i < audio_packet_.size(); ++i)
153 audio_packet_[i] = static_cast<uint8>(i);
156 void FillPacketWithNoise() {
157 int value = 0;
158 int factor = 175;
159 for (size_t i = 0; i < audio_packet_.size(); ++i) {
160 value += factor;
161 audio_packet_[i] = value % 100;
165 protected:
166 base::MessageLoopForIO message_loop_;
167 BrowserThreadImpl io_thread_;
168 scoped_refptr<SpeechRecognizerImpl> recognizer_;
169 scoped_ptr<AudioManager> audio_manager_;
170 bool recognition_started_;
171 bool recognition_ended_;
172 bool result_received_;
173 bool audio_started_;
174 bool audio_ended_;
175 bool sound_started_;
176 bool sound_ended_;
177 SpeechRecognitionErrorCode error_;
178 net::TestURLFetcherFactory url_fetcher_factory_;
179 TestAudioInputControllerFactory audio_input_controller_factory_;
180 std::vector<uint8> audio_packet_;
181 float volume_;
182 float noise_volume_;
185 TEST_F(SpeechRecognizerImplTest, StopNoData) {
186 // Check for callbacks when stopping record before any audio gets recorded.
187 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
188 recognizer_->StopAudioCapture();
189 base::MessageLoop::current()->RunUntilIdle();
190 EXPECT_TRUE(recognition_started_);
191 EXPECT_FALSE(audio_started_);
192 EXPECT_FALSE(result_received_);
193 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
194 CheckFinalEventsConsistency();
197 TEST_F(SpeechRecognizerImplTest, CancelNoData) {
198 // Check for callbacks when canceling recognition before any audio gets
199 // recorded.
200 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
201 recognizer_->AbortRecognition();
202 base::MessageLoop::current()->RunUntilIdle();
203 EXPECT_TRUE(recognition_started_);
204 EXPECT_FALSE(audio_started_);
205 EXPECT_FALSE(result_received_);
206 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
207 CheckFinalEventsConsistency();
210 TEST_F(SpeechRecognizerImplTest, StopWithData) {
211 // Start recording, give some data and then stop. This should wait for the
212 // network callback to arrive before completion.
213 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
214 base::MessageLoop::current()->RunUntilIdle();
215 TestAudioInputController* controller =
216 audio_input_controller_factory_.controller();
217 ASSERT_TRUE(controller);
219 // Try sending 5 chunks of mock audio data and verify that each of them
220 // resulted immediately in a packet sent out via the network. This verifies
221 // that we are streaming out encoded data as chunks without waiting for the
222 // full recording to complete.
223 const size_t kNumChunks = 5;
224 for (size_t i = 0; i < kNumChunks; ++i) {
225 controller->event_handler()->OnData(controller, &audio_packet_[0],
226 audio_packet_.size());
227 base::MessageLoop::current()->RunUntilIdle();
228 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
229 ASSERT_TRUE(fetcher);
230 EXPECT_EQ(i + 1, fetcher->upload_chunks().size());
233 recognizer_->StopAudioCapture();
234 base::MessageLoop::current()->RunUntilIdle();
235 EXPECT_TRUE(audio_started_);
236 EXPECT_TRUE(audio_ended_);
237 EXPECT_FALSE(recognition_ended_);
238 EXPECT_FALSE(result_received_);
239 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
241 // Issue the network callback to complete the process.
242 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
243 ASSERT_TRUE(fetcher);
245 fetcher->set_url(fetcher->GetOriginalURL());
246 net::URLRequestStatus status;
247 status.set_status(net::URLRequestStatus::SUCCESS);
248 fetcher->set_status(status);
249 fetcher->set_response_code(200);
250 fetcher->SetResponseString(
251 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
252 fetcher->delegate()->OnURLFetchComplete(fetcher);
253 base::MessageLoop::current()->RunUntilIdle();
254 EXPECT_TRUE(recognition_ended_);
255 EXPECT_TRUE(result_received_);
256 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
257 CheckFinalEventsConsistency();
260 TEST_F(SpeechRecognizerImplTest, CancelWithData) {
261 // Start recording, give some data and then cancel.
262 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
263 base::MessageLoop::current()->RunUntilIdle();
264 TestAudioInputController* controller =
265 audio_input_controller_factory_.controller();
266 ASSERT_TRUE(controller);
267 controller->event_handler()->OnData(controller, &audio_packet_[0],
268 audio_packet_.size());
269 base::MessageLoop::current()->RunUntilIdle();
270 recognizer_->AbortRecognition();
271 base::MessageLoop::current()->RunUntilIdle();
272 ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
273 EXPECT_TRUE(recognition_started_);
274 EXPECT_TRUE(audio_started_);
275 EXPECT_FALSE(result_received_);
276 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED, error_);
277 CheckFinalEventsConsistency();
280 TEST_F(SpeechRecognizerImplTest, ConnectionError) {
281 // Start recording, give some data and then stop. Issue the network callback
282 // with a connection error and verify that the recognizer bubbles the error up
283 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
284 base::MessageLoop::current()->RunUntilIdle();
285 TestAudioInputController* controller =
286 audio_input_controller_factory_.controller();
287 ASSERT_TRUE(controller);
288 controller->event_handler()->OnData(controller, &audio_packet_[0],
289 audio_packet_.size());
290 base::MessageLoop::current()->RunUntilIdle();
291 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
292 ASSERT_TRUE(fetcher);
294 recognizer_->StopAudioCapture();
295 base::MessageLoop::current()->RunUntilIdle();
296 EXPECT_TRUE(audio_started_);
297 EXPECT_TRUE(audio_ended_);
298 EXPECT_FALSE(recognition_ended_);
299 EXPECT_FALSE(result_received_);
300 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
302 // Issue the network callback to complete the process.
303 fetcher->set_url(fetcher->GetOriginalURL());
304 net::URLRequestStatus status;
305 status.set_status(net::URLRequestStatus::FAILED);
306 status.set_error(net::ERR_CONNECTION_REFUSED);
307 fetcher->set_status(status);
308 fetcher->set_response_code(0);
309 fetcher->SetResponseString(std::string());
310 fetcher->delegate()->OnURLFetchComplete(fetcher);
311 base::MessageLoop::current()->RunUntilIdle();
312 EXPECT_TRUE(recognition_ended_);
313 EXPECT_FALSE(result_received_);
314 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
315 CheckFinalEventsConsistency();
318 TEST_F(SpeechRecognizerImplTest, ServerError) {
319 // Start recording, give some data and then stop. Issue the network callback
320 // with a 500 error and verify that the recognizer bubbles the error up
321 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
322 base::MessageLoop::current()->RunUntilIdle();
323 TestAudioInputController* controller =
324 audio_input_controller_factory_.controller();
325 ASSERT_TRUE(controller);
326 controller->event_handler()->OnData(controller, &audio_packet_[0],
327 audio_packet_.size());
328 base::MessageLoop::current()->RunUntilIdle();
329 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);
330 ASSERT_TRUE(fetcher);
332 recognizer_->StopAudioCapture();
333 base::MessageLoop::current()->RunUntilIdle();
334 EXPECT_TRUE(audio_started_);
335 EXPECT_TRUE(audio_ended_);
336 EXPECT_FALSE(recognition_ended_);
337 EXPECT_FALSE(result_received_);
338 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
340 // Issue the network callback to complete the process.
341 fetcher->set_url(fetcher->GetOriginalURL());
342 net::URLRequestStatus status;
343 status.set_status(net::URLRequestStatus::SUCCESS);
344 fetcher->set_status(status);
345 fetcher->set_response_code(500);
346 fetcher->SetResponseString("Internal Server Error");
347 fetcher->delegate()->OnURLFetchComplete(fetcher);
348 base::MessageLoop::current()->RunUntilIdle();
349 EXPECT_TRUE(recognition_ended_);
350 EXPECT_FALSE(result_received_);
351 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
352 CheckFinalEventsConsistency();
355 TEST_F(SpeechRecognizerImplTest, AudioControllerErrorNoData) {
356 // Check if things tear down properly if AudioInputController threw an error.
357 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
358 base::MessageLoop::current()->RunUntilIdle();
359 TestAudioInputController* controller =
360 audio_input_controller_factory_.controller();
361 ASSERT_TRUE(controller);
362 controller->event_handler()->OnError(controller);
363 base::MessageLoop::current()->RunUntilIdle();
364 EXPECT_TRUE(recognition_started_);
365 EXPECT_FALSE(audio_started_);
366 EXPECT_FALSE(result_received_);
367 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO, error_);
368 CheckFinalEventsConsistency();
371 TEST_F(SpeechRecognizerImplTest, AudioControllerErrorWithData) {
372 // Check if things tear down properly if AudioInputController threw an error
373 // after giving some audio data.
374 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
375 base::MessageLoop::current()->RunUntilIdle();
376 TestAudioInputController* controller =
377 audio_input_controller_factory_.controller();
378 ASSERT_TRUE(controller);
379 controller->event_handler()->OnData(controller, &audio_packet_[0],
380 audio_packet_.size());
381 controller->event_handler()->OnError(controller);
382 base::MessageLoop::current()->RunUntilIdle();
383 ASSERT_TRUE(url_fetcher_factory_.GetFetcherByID(0));
384 EXPECT_TRUE(recognition_started_);
385 EXPECT_TRUE(audio_started_);
386 EXPECT_FALSE(result_received_);
387 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO, error_);
388 CheckFinalEventsConsistency();
391 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) {
392 // Start recording and give a lot of packets with audio samples set to zero.
393 // This should trigger the no-speech detector and issue a callback.
394 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
395 base::MessageLoop::current()->RunUntilIdle();
396 TestAudioInputController* controller =
397 audio_input_controller_factory_.controller();
398 ASSERT_TRUE(controller);
400 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
401 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs + 1;
402 // The vector is already filled with zero value samples on create.
403 for (int i = 0; i < num_packets; ++i) {
404 controller->event_handler()->OnData(controller, &audio_packet_[0],
405 audio_packet_.size());
407 base::MessageLoop::current()->RunUntilIdle();
408 EXPECT_TRUE(recognition_started_);
409 EXPECT_TRUE(audio_started_);
410 EXPECT_FALSE(result_received_);
411 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH, error_);
412 CheckFinalEventsConsistency();
415 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
416 // Start recording and give a lot of packets with audio samples set to zero
417 // and then some more with reasonably loud audio samples. This should be
418 // treated as normal speech input and the no-speech detector should not get
419 // triggered.
420 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
421 base::MessageLoop::current()->RunUntilIdle();
422 TestAudioInputController* controller =
423 audio_input_controller_factory_.controller();
424 ASSERT_TRUE(controller);
425 controller = audio_input_controller_factory_.controller();
426 ASSERT_TRUE(controller);
428 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) /
429 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
431 // The vector is already filled with zero value samples on create.
432 for (int i = 0; i < num_packets / 2; ++i) {
433 controller->event_handler()->OnData(controller, &audio_packet_[0],
434 audio_packet_.size());
437 FillPacketWithTestWaveform();
438 for (int i = 0; i < num_packets / 2; ++i) {
439 controller->event_handler()->OnData(controller, &audio_packet_[0],
440 audio_packet_.size());
443 base::MessageLoop::current()->RunUntilIdle();
444 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
445 EXPECT_TRUE(audio_started_);
446 EXPECT_FALSE(audio_ended_);
447 EXPECT_FALSE(recognition_ended_);
448 recognizer_->AbortRecognition();
449 base::MessageLoop::current()->RunUntilIdle();
450 CheckFinalEventsConsistency();
453 TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
454 // Start recording and give a lot of packets with audio samples set to zero
455 // and then some more with reasonably loud audio samples. Check that we don't
456 // get the callback during estimation phase, then get zero for the silence
457 // samples and proper volume for the loud audio.
458 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId);
459 base::MessageLoop::current()->RunUntilIdle();
460 TestAudioInputController* controller =
461 audio_input_controller_factory_.controller();
462 ASSERT_TRUE(controller);
463 controller = audio_input_controller_factory_.controller();
464 ASSERT_TRUE(controller);
466 // Feed some samples to begin with for the endpointer to do noise estimation.
467 int num_packets = SpeechRecognizerImpl::kEndpointerEstimationTimeMs /
468 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs;
469 FillPacketWithNoise();
470 for (int i = 0; i < num_packets; ++i) {
471 controller->event_handler()->OnData(controller, &audio_packet_[0],
472 audio_packet_.size());
474 base::MessageLoop::current()->RunUntilIdle();
475 EXPECT_EQ(-1.0f, volume_); // No audio volume set yet.
477 // The vector is already filled with zero value samples on create.
478 controller->event_handler()->OnData(controller, &audio_packet_[0],
479 audio_packet_.size());
480 base::MessageLoop::current()->RunUntilIdle();
481 EXPECT_FLOAT_EQ(0.74939233f, volume_);
483 FillPacketWithTestWaveform();
484 controller->event_handler()->OnData(controller, &audio_packet_[0],
485 audio_packet_.size());
486 base::MessageLoop::current()->RunUntilIdle();
487 EXPECT_NEAR(0.89926866f, volume_, 0.00001f);
488 EXPECT_FLOAT_EQ(0.75071919f, noise_volume_);
490 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
491 EXPECT_FALSE(audio_ended_);
492 EXPECT_FALSE(recognition_ended_);
493 recognizer_->AbortRecognition();
494 base::MessageLoop::current()->RunUntilIdle();
495 CheckFinalEventsConsistency();
498 } // namespace content