1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "content/browser/browser_thread_impl.h"
8 #include "content/browser/speech/google_one_shot_remote_engine.h"
9 #include "content/browser/speech/speech_recognizer_impl.h"
10 #include "content/public/browser/speech_recognition_event_listener.h"
11 #include "media/audio/audio_manager_base.h"
12 #include "media/audio/fake_audio_input_stream.h"
13 #include "media/audio/fake_audio_output_stream.h"
14 #include "media/audio/mock_audio_manager.h"
15 #include "media/audio/test_audio_input_controller_factory.h"
16 #include "net/base/net_errors.h"
17 #include "net/url_request/test_url_fetcher_factory.h"
18 #include "net/url_request/url_request_status.h"
19 #include "testing/gtest/include/gtest/gtest.h"
21 using base::MessageLoopProxy
;
22 using media::AudioInputController
;
23 using media::AudioInputStream
;
24 using media::AudioManager
;
25 using media::AudioOutputStream
;
26 using media::AudioParameters
;
27 using media::TestAudioInputController
;
28 using media::TestAudioInputControllerFactory
;
32 class SpeechRecognizerImplTest
: public SpeechRecognitionEventListener
,
33 public testing::Test
{
35 SpeechRecognizerImplTest()
36 : io_thread_(BrowserThread::IO
, &message_loop_
),
37 recognition_started_(false),
38 recognition_ended_(false),
39 result_received_(false),
40 audio_started_(false),
42 sound_started_(false),
44 error_(SPEECH_RECOGNITION_ERROR_NONE
),
46 // SpeechRecognizer takes ownership of sr_engine.
47 SpeechRecognitionEngine
* sr_engine
=
48 new GoogleOneShotRemoteEngine(NULL
/* URLRequestContextGetter */);
49 SpeechRecognitionEngineConfig config
;
50 config
.audio_num_bits_per_sample
=
51 SpeechRecognizerImpl::kNumBitsPerAudioSample
;
52 config
.audio_sample_rate
= SpeechRecognizerImpl::kAudioSampleRate
;
53 config
.filter_profanities
= false;
54 sr_engine
->SetConfig(config
);
56 const int kTestingSessionId
= 1;
57 const bool kOneShotMode
= true;
58 recognizer_
= new SpeechRecognizerImpl(
59 this, kTestingSessionId
, kOneShotMode
, sr_engine
);
60 audio_manager_
.reset(new media::MockAudioManager(
61 base::MessageLoop::current()->message_loop_proxy().get()));
62 recognizer_
->SetAudioManagerForTesting(audio_manager_
.get());
64 int audio_packet_length_bytes
=
65 (SpeechRecognizerImpl::kAudioSampleRate
*
66 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
*
67 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout
) *
68 SpeechRecognizerImpl::kNumBitsPerAudioSample
) / (8 * 1000);
69 audio_packet_
.resize(audio_packet_length_bytes
);
72 void CheckEventsConsistency() {
73 // Note: "!x || y" == "x implies y".
74 EXPECT_TRUE(!recognition_ended_
|| recognition_started_
);
75 EXPECT_TRUE(!audio_ended_
|| audio_started_
);
76 EXPECT_TRUE(!sound_ended_
|| sound_started_
);
77 EXPECT_TRUE(!audio_started_
|| recognition_started_
);
78 EXPECT_TRUE(!sound_started_
|| audio_started_
);
79 EXPECT_TRUE(!audio_ended_
|| (sound_ended_
|| !sound_started_
));
80 EXPECT_TRUE(!recognition_ended_
|| (audio_ended_
|| !audio_started_
));
83 void CheckFinalEventsConsistency() {
84 // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
85 EXPECT_FALSE(recognition_started_
^ recognition_ended_
);
86 EXPECT_FALSE(audio_started_
^ audio_ended_
);
87 EXPECT_FALSE(sound_started_
^ sound_ended_
);
90 // Overridden from SpeechRecognitionEventListener:
91 virtual void OnAudioStart(int session_id
) OVERRIDE
{
92 audio_started_
= true;
93 CheckEventsConsistency();
96 virtual void OnAudioEnd(int session_id
) OVERRIDE
{
98 CheckEventsConsistency();
101 virtual void OnRecognitionResults(
102 int session_id
, const SpeechRecognitionResults
& results
) OVERRIDE
{
103 result_received_
= true;
106 virtual void OnRecognitionError(
107 int session_id
, const SpeechRecognitionError
& error
) OVERRIDE
{
108 EXPECT_TRUE(recognition_started_
);
109 EXPECT_FALSE(recognition_ended_
);
113 virtual void OnAudioLevelsChange(int session_id
, float volume
,
114 float noise_volume
) OVERRIDE
{
116 noise_volume_
= noise_volume
;
119 virtual void OnRecognitionEnd(int session_id
) OVERRIDE
{
120 recognition_ended_
= true;
121 CheckEventsConsistency();
124 virtual void OnRecognitionStart(int session_id
) OVERRIDE
{
125 recognition_started_
= true;
126 CheckEventsConsistency();
129 virtual void OnEnvironmentEstimationComplete(int session_id
) OVERRIDE
{}
131 virtual void OnSoundStart(int session_id
) OVERRIDE
{
132 sound_started_
= true;
133 CheckEventsConsistency();
136 virtual void OnSoundEnd(int session_id
) OVERRIDE
{
138 CheckEventsConsistency();
141 // testing::Test methods.
142 virtual void SetUp() OVERRIDE
{
143 AudioInputController::set_factory_for_testing(
144 &audio_input_controller_factory_
);
147 virtual void TearDown() OVERRIDE
{
148 AudioInputController::set_factory_for_testing(NULL
);
151 void FillPacketWithTestWaveform() {
152 // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
153 for (size_t i
= 0; i
< audio_packet_
.size(); ++i
)
154 audio_packet_
[i
] = static_cast<uint8
>(i
);
157 void FillPacketWithNoise() {
160 for (size_t i
= 0; i
< audio_packet_
.size(); ++i
) {
162 audio_packet_
[i
] = value
% 100;
167 base::MessageLoopForIO message_loop_
;
168 BrowserThreadImpl io_thread_
;
169 scoped_refptr
<SpeechRecognizerImpl
> recognizer_
;
170 scoped_ptr
<AudioManager
> audio_manager_
;
171 bool recognition_started_
;
172 bool recognition_ended_
;
173 bool result_received_
;
178 SpeechRecognitionErrorCode error_
;
179 net::TestURLFetcherFactory url_fetcher_factory_
;
180 TestAudioInputControllerFactory audio_input_controller_factory_
;
181 std::vector
<uint8
> audio_packet_
;
186 TEST_F(SpeechRecognizerImplTest
, StopNoData
) {
187 // Check for callbacks when stopping record before any audio gets recorded.
188 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
189 recognizer_
->StopAudioCapture();
190 base::MessageLoop::current()->RunUntilIdle();
191 EXPECT_TRUE(recognition_started_
);
192 EXPECT_FALSE(audio_started_
);
193 EXPECT_FALSE(result_received_
);
194 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
195 CheckFinalEventsConsistency();
198 TEST_F(SpeechRecognizerImplTest
, CancelNoData
) {
199 // Check for callbacks when canceling recognition before any audio gets
201 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
202 recognizer_
->AbortRecognition();
203 base::MessageLoop::current()->RunUntilIdle();
204 EXPECT_TRUE(recognition_started_
);
205 EXPECT_FALSE(audio_started_
);
206 EXPECT_FALSE(result_received_
);
207 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED
, error_
);
208 CheckFinalEventsConsistency();
211 TEST_F(SpeechRecognizerImplTest
, StopWithData
) {
212 // Start recording, give some data and then stop. This should wait for the
213 // network callback to arrive before completion.
214 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
215 base::MessageLoop::current()->RunUntilIdle();
216 TestAudioInputController
* controller
=
217 audio_input_controller_factory_
.controller();
218 ASSERT_TRUE(controller
);
220 // Try sending 5 chunks of mock audio data and verify that each of them
221 // resulted immediately in a packet sent out via the network. This verifies
222 // that we are streaming out encoded data as chunks without waiting for the
223 // full recording to complete.
224 const size_t kNumChunks
= 5;
225 for (size_t i
= 0; i
< kNumChunks
; ++i
) {
226 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
227 audio_packet_
.size());
228 base::MessageLoop::current()->RunUntilIdle();
229 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
230 ASSERT_TRUE(fetcher
);
231 EXPECT_EQ(i
+ 1, fetcher
->upload_chunks().size());
234 recognizer_
->StopAudioCapture();
235 base::MessageLoop::current()->RunUntilIdle();
236 EXPECT_TRUE(audio_started_
);
237 EXPECT_TRUE(audio_ended_
);
238 EXPECT_FALSE(recognition_ended_
);
239 EXPECT_FALSE(result_received_
);
240 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
242 // Issue the network callback to complete the process.
243 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
244 ASSERT_TRUE(fetcher
);
246 fetcher
->set_url(fetcher
->GetOriginalURL());
247 net::URLRequestStatus status
;
248 status
.set_status(net::URLRequestStatus::SUCCESS
);
249 fetcher
->set_status(status
);
250 fetcher
->set_response_code(200);
251 fetcher
->SetResponseString(
252 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
253 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
254 base::MessageLoop::current()->RunUntilIdle();
255 EXPECT_TRUE(recognition_ended_
);
256 EXPECT_TRUE(result_received_
);
257 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
258 CheckFinalEventsConsistency();
261 TEST_F(SpeechRecognizerImplTest
, CancelWithData
) {
262 // Start recording, give some data and then cancel.
263 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
264 base::MessageLoop::current()->RunUntilIdle();
265 TestAudioInputController
* controller
=
266 audio_input_controller_factory_
.controller();
267 ASSERT_TRUE(controller
);
268 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
269 audio_packet_
.size());
270 base::MessageLoop::current()->RunUntilIdle();
271 recognizer_
->AbortRecognition();
272 base::MessageLoop::current()->RunUntilIdle();
273 ASSERT_TRUE(url_fetcher_factory_
.GetFetcherByID(0));
274 EXPECT_TRUE(recognition_started_
);
275 EXPECT_TRUE(audio_started_
);
276 EXPECT_FALSE(result_received_
);
277 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED
, error_
);
278 CheckFinalEventsConsistency();
281 TEST_F(SpeechRecognizerImplTest
, ConnectionError
) {
282 // Start recording, give some data and then stop. Issue the network callback
283 // with a connection error and verify that the recognizer bubbles the error up
284 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
285 base::MessageLoop::current()->RunUntilIdle();
286 TestAudioInputController
* controller
=
287 audio_input_controller_factory_
.controller();
288 ASSERT_TRUE(controller
);
289 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
290 audio_packet_
.size());
291 base::MessageLoop::current()->RunUntilIdle();
292 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
293 ASSERT_TRUE(fetcher
);
295 recognizer_
->StopAudioCapture();
296 base::MessageLoop::current()->RunUntilIdle();
297 EXPECT_TRUE(audio_started_
);
298 EXPECT_TRUE(audio_ended_
);
299 EXPECT_FALSE(recognition_ended_
);
300 EXPECT_FALSE(result_received_
);
301 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
303 // Issue the network callback to complete the process.
304 fetcher
->set_url(fetcher
->GetOriginalURL());
305 net::URLRequestStatus status
;
306 status
.set_status(net::URLRequestStatus::FAILED
);
307 status
.set_error(net::ERR_CONNECTION_REFUSED
);
308 fetcher
->set_status(status
);
309 fetcher
->set_response_code(0);
310 fetcher
->SetResponseString(std::string());
311 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
312 base::MessageLoop::current()->RunUntilIdle();
313 EXPECT_TRUE(recognition_ended_
);
314 EXPECT_FALSE(result_received_
);
315 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
316 CheckFinalEventsConsistency();
319 TEST_F(SpeechRecognizerImplTest
, ServerError
) {
320 // Start recording, give some data and then stop. Issue the network callback
321 // with a 500 error and verify that the recognizer bubbles the error up
322 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
323 base::MessageLoop::current()->RunUntilIdle();
324 TestAudioInputController
* controller
=
325 audio_input_controller_factory_
.controller();
326 ASSERT_TRUE(controller
);
327 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
328 audio_packet_
.size());
329 base::MessageLoop::current()->RunUntilIdle();
330 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
331 ASSERT_TRUE(fetcher
);
333 recognizer_
->StopAudioCapture();
334 base::MessageLoop::current()->RunUntilIdle();
335 EXPECT_TRUE(audio_started_
);
336 EXPECT_TRUE(audio_ended_
);
337 EXPECT_FALSE(recognition_ended_
);
338 EXPECT_FALSE(result_received_
);
339 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
341 // Issue the network callback to complete the process.
342 fetcher
->set_url(fetcher
->GetOriginalURL());
343 net::URLRequestStatus status
;
344 status
.set_status(net::URLRequestStatus::SUCCESS
);
345 fetcher
->set_status(status
);
346 fetcher
->set_response_code(500);
347 fetcher
->SetResponseString("Internal Server Error");
348 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
349 base::MessageLoop::current()->RunUntilIdle();
350 EXPECT_TRUE(recognition_ended_
);
351 EXPECT_FALSE(result_received_
);
352 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
353 CheckFinalEventsConsistency();
356 TEST_F(SpeechRecognizerImplTest
, AudioControllerErrorNoData
) {
357 // Check if things tear down properly if AudioInputController threw an error.
358 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
359 base::MessageLoop::current()->RunUntilIdle();
360 TestAudioInputController
* controller
=
361 audio_input_controller_factory_
.controller();
362 ASSERT_TRUE(controller
);
363 controller
->event_handler()->OnError(controller
);
364 base::MessageLoop::current()->RunUntilIdle();
365 EXPECT_TRUE(recognition_started_
);
366 EXPECT_FALSE(audio_started_
);
367 EXPECT_FALSE(result_received_
);
368 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO
, error_
);
369 CheckFinalEventsConsistency();
372 TEST_F(SpeechRecognizerImplTest
, AudioControllerErrorWithData
) {
373 // Check if things tear down properly if AudioInputController threw an error
374 // after giving some audio data.
375 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
376 base::MessageLoop::current()->RunUntilIdle();
377 TestAudioInputController
* controller
=
378 audio_input_controller_factory_
.controller();
379 ASSERT_TRUE(controller
);
380 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
381 audio_packet_
.size());
382 controller
->event_handler()->OnError(controller
);
383 base::MessageLoop::current()->RunUntilIdle();
384 ASSERT_TRUE(url_fetcher_factory_
.GetFetcherByID(0));
385 EXPECT_TRUE(recognition_started_
);
386 EXPECT_TRUE(audio_started_
);
387 EXPECT_FALSE(result_received_
);
388 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO
, error_
);
389 CheckFinalEventsConsistency();
392 TEST_F(SpeechRecognizerImplTest
, NoSpeechCallbackIssued
) {
393 // Start recording and give a lot of packets with audio samples set to zero.
394 // This should trigger the no-speech detector and issue a callback.
395 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
396 base::MessageLoop::current()->RunUntilIdle();
397 TestAudioInputController
* controller
=
398 audio_input_controller_factory_
.controller();
399 ASSERT_TRUE(controller
);
401 int num_packets
= (SpeechRecognizerImpl::kNoSpeechTimeoutMs
) /
402 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
+ 1;
403 // The vector is already filled with zero value samples on create.
404 for (int i
= 0; i
< num_packets
; ++i
) {
405 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
406 audio_packet_
.size());
408 base::MessageLoop::current()->RunUntilIdle();
409 EXPECT_TRUE(recognition_started_
);
410 EXPECT_TRUE(audio_started_
);
411 EXPECT_FALSE(result_received_
);
412 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH
, error_
);
413 CheckFinalEventsConsistency();
416 TEST_F(SpeechRecognizerImplTest
, NoSpeechCallbackNotIssued
) {
417 // Start recording and give a lot of packets with audio samples set to zero
418 // and then some more with reasonably loud audio samples. This should be
419 // treated as normal speech input and the no-speech detector should not get
421 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
422 base::MessageLoop::current()->RunUntilIdle();
423 TestAudioInputController
* controller
=
424 audio_input_controller_factory_
.controller();
425 ASSERT_TRUE(controller
);
426 controller
= audio_input_controller_factory_
.controller();
427 ASSERT_TRUE(controller
);
429 int num_packets
= (SpeechRecognizerImpl::kNoSpeechTimeoutMs
) /
430 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
;
432 // The vector is already filled with zero value samples on create.
433 for (int i
= 0; i
< num_packets
/ 2; ++i
) {
434 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
435 audio_packet_
.size());
438 FillPacketWithTestWaveform();
439 for (int i
= 0; i
< num_packets
/ 2; ++i
) {
440 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
441 audio_packet_
.size());
444 base::MessageLoop::current()->RunUntilIdle();
445 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
446 EXPECT_TRUE(audio_started_
);
447 EXPECT_FALSE(audio_ended_
);
448 EXPECT_FALSE(recognition_ended_
);
449 recognizer_
->AbortRecognition();
450 base::MessageLoop::current()->RunUntilIdle();
451 CheckFinalEventsConsistency();
454 TEST_F(SpeechRecognizerImplTest
, SetInputVolumeCallback
) {
455 // Start recording and give a lot of packets with audio samples set to zero
456 // and then some more with reasonably loud audio samples. Check that we don't
457 // get the callback during estimation phase, then get zero for the silence
458 // samples and proper volume for the loud audio.
459 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
460 base::MessageLoop::current()->RunUntilIdle();
461 TestAudioInputController
* controller
=
462 audio_input_controller_factory_
.controller();
463 ASSERT_TRUE(controller
);
464 controller
= audio_input_controller_factory_
.controller();
465 ASSERT_TRUE(controller
);
467 // Feed some samples to begin with for the endpointer to do noise estimation.
468 int num_packets
= SpeechRecognizerImpl::kEndpointerEstimationTimeMs
/
469 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
;
470 FillPacketWithNoise();
471 for (int i
= 0; i
< num_packets
; ++i
) {
472 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
473 audio_packet_
.size());
475 base::MessageLoop::current()->RunUntilIdle();
476 EXPECT_EQ(-1.0f
, volume_
); // No audio volume set yet.
478 // The vector is already filled with zero value samples on create.
479 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
480 audio_packet_
.size());
481 base::MessageLoop::current()->RunUntilIdle();
482 EXPECT_FLOAT_EQ(0.74939233f
, volume_
);
484 FillPacketWithTestWaveform();
485 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
486 audio_packet_
.size());
487 base::MessageLoop::current()->RunUntilIdle();
488 EXPECT_NEAR(0.89926866f
, volume_
, 0.00001f
);
489 EXPECT_FLOAT_EQ(0.75071919f
, noise_volume_
);
491 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
492 EXPECT_FALSE(audio_ended_
);
493 EXPECT_FALSE(recognition_ended_
);
494 recognizer_
->AbortRecognition();
495 base::MessageLoop::current()->RunUntilIdle();
496 CheckFinalEventsConsistency();
499 } // namespace content