1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "content/browser/browser_thread_impl.h"
8 #include "content/browser/speech/google_one_shot_remote_engine.h"
9 #include "content/browser/speech/speech_recognizer_impl.h"
10 #include "content/public/browser/speech_recognition_event_listener.h"
11 #include "media/audio/audio_manager_base.h"
12 #include "media/audio/fake_audio_input_stream.h"
13 #include "media/audio/fake_audio_output_stream.h"
14 #include "media/audio/mock_audio_manager.h"
15 #include "media/audio/test_audio_input_controller_factory.h"
16 #include "net/base/net_errors.h"
17 #include "net/url_request/test_url_fetcher_factory.h"
18 #include "net/url_request/url_request_status.h"
19 #include "testing/gtest/include/gtest/gtest.h"
21 using base::MessageLoopProxy
;
22 using media::AudioInputController
;
23 using media::AudioInputStream
;
24 using media::AudioManager
;
25 using media::AudioOutputStream
;
26 using media::AudioParameters
;
27 using media::TestAudioInputController
;
28 using media::TestAudioInputControllerFactory
;
32 class SpeechRecognizerImplTest
: public SpeechRecognitionEventListener
,
33 public testing::Test
{
35 SpeechRecognizerImplTest()
36 : io_thread_(BrowserThread::IO
, &message_loop_
),
37 recognition_started_(false),
38 recognition_ended_(false),
39 result_received_(false),
40 audio_started_(false),
42 sound_started_(false),
44 error_(SPEECH_RECOGNITION_ERROR_NONE
),
46 // SpeechRecognizer takes ownership of sr_engine.
47 SpeechRecognitionEngine
* sr_engine
=
48 new GoogleOneShotRemoteEngine(NULL
/* URLRequestContextGetter */);
49 SpeechRecognitionEngineConfig config
;
50 config
.audio_num_bits_per_sample
=
51 SpeechRecognizerImpl::kNumBitsPerAudioSample
;
52 config
.audio_sample_rate
= SpeechRecognizerImpl::kAudioSampleRate
;
53 config
.filter_profanities
= false;
54 sr_engine
->SetConfig(config
);
56 const int kTestingSessionId
= 1;
57 recognizer_
= new SpeechRecognizerImpl(
58 this, kTestingSessionId
, false, false, sr_engine
);
59 audio_manager_
.reset(new media::MockAudioManager(
60 base::MessageLoop::current()->message_loop_proxy().get()));
61 recognizer_
->SetAudioManagerForTesting(audio_manager_
.get());
63 int audio_packet_length_bytes
=
64 (SpeechRecognizerImpl::kAudioSampleRate
*
65 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
*
66 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout
) *
67 SpeechRecognizerImpl::kNumBitsPerAudioSample
) / (8 * 1000);
68 audio_packet_
.resize(audio_packet_length_bytes
);
71 void CheckEventsConsistency() {
72 // Note: "!x || y" == "x implies y".
73 EXPECT_TRUE(!recognition_ended_
|| recognition_started_
);
74 EXPECT_TRUE(!audio_ended_
|| audio_started_
);
75 EXPECT_TRUE(!sound_ended_
|| sound_started_
);
76 EXPECT_TRUE(!audio_started_
|| recognition_started_
);
77 EXPECT_TRUE(!sound_started_
|| audio_started_
);
78 EXPECT_TRUE(!audio_ended_
|| (sound_ended_
|| !sound_started_
));
79 EXPECT_TRUE(!recognition_ended_
|| (audio_ended_
|| !audio_started_
));
82 void CheckFinalEventsConsistency() {
83 // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
84 EXPECT_FALSE(recognition_started_
^ recognition_ended_
);
85 EXPECT_FALSE(audio_started_
^ audio_ended_
);
86 EXPECT_FALSE(sound_started_
^ sound_ended_
);
89 // Overridden from SpeechRecognitionEventListener:
90 virtual void OnAudioStart(int session_id
) OVERRIDE
{
91 audio_started_
= true;
92 CheckEventsConsistency();
95 virtual void OnAudioEnd(int session_id
) OVERRIDE
{
97 CheckEventsConsistency();
100 virtual void OnRecognitionResults(
101 int session_id
, const SpeechRecognitionResults
& results
) OVERRIDE
{
102 result_received_
= true;
105 virtual void OnRecognitionError(
106 int session_id
, const SpeechRecognitionError
& error
) OVERRIDE
{
107 EXPECT_TRUE(recognition_started_
);
108 EXPECT_FALSE(recognition_ended_
);
112 virtual void OnAudioLevelsChange(int session_id
, float volume
,
113 float noise_volume
) OVERRIDE
{
115 noise_volume_
= noise_volume
;
118 virtual void OnRecognitionEnd(int session_id
) OVERRIDE
{
119 recognition_ended_
= true;
120 CheckEventsConsistency();
123 virtual void OnRecognitionStart(int session_id
) OVERRIDE
{
124 recognition_started_
= true;
125 CheckEventsConsistency();
128 virtual void OnEnvironmentEstimationComplete(int session_id
) OVERRIDE
{}
130 virtual void OnSoundStart(int session_id
) OVERRIDE
{
131 sound_started_
= true;
132 CheckEventsConsistency();
135 virtual void OnSoundEnd(int session_id
) OVERRIDE
{
137 CheckEventsConsistency();
140 // testing::Test methods.
141 virtual void SetUp() OVERRIDE
{
142 AudioInputController::set_factory_for_testing(
143 &audio_input_controller_factory_
);
146 virtual void TearDown() OVERRIDE
{
147 AudioInputController::set_factory_for_testing(NULL
);
150 void FillPacketWithTestWaveform() {
151 // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
152 for (size_t i
= 0; i
< audio_packet_
.size(); ++i
)
153 audio_packet_
[i
] = static_cast<uint8
>(i
);
156 void FillPacketWithNoise() {
159 for (size_t i
= 0; i
< audio_packet_
.size(); ++i
) {
161 audio_packet_
[i
] = value
% 100;
166 base::MessageLoopForIO message_loop_
;
167 BrowserThreadImpl io_thread_
;
168 scoped_refptr
<SpeechRecognizerImpl
> recognizer_
;
169 scoped_ptr
<AudioManager
> audio_manager_
;
170 bool recognition_started_
;
171 bool recognition_ended_
;
172 bool result_received_
;
177 SpeechRecognitionErrorCode error_
;
178 net::TestURLFetcherFactory url_fetcher_factory_
;
179 TestAudioInputControllerFactory audio_input_controller_factory_
;
180 std::vector
<uint8
> audio_packet_
;
185 TEST_F(SpeechRecognizerImplTest
, StopNoData
) {
186 // Check for callbacks when stopping record before any audio gets recorded.
187 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
188 recognizer_
->StopAudioCapture();
189 base::MessageLoop::current()->RunUntilIdle();
190 EXPECT_TRUE(recognition_started_
);
191 EXPECT_FALSE(audio_started_
);
192 EXPECT_FALSE(result_received_
);
193 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
194 CheckFinalEventsConsistency();
197 TEST_F(SpeechRecognizerImplTest
, CancelNoData
) {
198 // Check for callbacks when canceling recognition before any audio gets
200 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
201 recognizer_
->AbortRecognition();
202 base::MessageLoop::current()->RunUntilIdle();
203 EXPECT_TRUE(recognition_started_
);
204 EXPECT_FALSE(audio_started_
);
205 EXPECT_FALSE(result_received_
);
206 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED
, error_
);
207 CheckFinalEventsConsistency();
210 TEST_F(SpeechRecognizerImplTest
, StopWithData
) {
211 // Start recording, give some data and then stop. This should wait for the
212 // network callback to arrive before completion.
213 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
214 base::MessageLoop::current()->RunUntilIdle();
215 TestAudioInputController
* controller
=
216 audio_input_controller_factory_
.controller();
217 ASSERT_TRUE(controller
);
219 // Try sending 5 chunks of mock audio data and verify that each of them
220 // resulted immediately in a packet sent out via the network. This verifies
221 // that we are streaming out encoded data as chunks without waiting for the
222 // full recording to complete.
223 const size_t kNumChunks
= 5;
224 for (size_t i
= 0; i
< kNumChunks
; ++i
) {
225 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
226 audio_packet_
.size());
227 base::MessageLoop::current()->RunUntilIdle();
228 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
229 ASSERT_TRUE(fetcher
);
230 EXPECT_EQ(i
+ 1, fetcher
->upload_chunks().size());
233 recognizer_
->StopAudioCapture();
234 base::MessageLoop::current()->RunUntilIdle();
235 EXPECT_TRUE(audio_started_
);
236 EXPECT_TRUE(audio_ended_
);
237 EXPECT_FALSE(recognition_ended_
);
238 EXPECT_FALSE(result_received_
);
239 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
241 // Issue the network callback to complete the process.
242 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
243 ASSERT_TRUE(fetcher
);
245 fetcher
->set_url(fetcher
->GetOriginalURL());
246 net::URLRequestStatus status
;
247 status
.set_status(net::URLRequestStatus::SUCCESS
);
248 fetcher
->set_status(status
);
249 fetcher
->set_response_code(200);
250 fetcher
->SetResponseString(
251 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
252 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
253 base::MessageLoop::current()->RunUntilIdle();
254 EXPECT_TRUE(recognition_ended_
);
255 EXPECT_TRUE(result_received_
);
256 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
257 CheckFinalEventsConsistency();
260 TEST_F(SpeechRecognizerImplTest
, CancelWithData
) {
261 // Start recording, give some data and then cancel.
262 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
263 base::MessageLoop::current()->RunUntilIdle();
264 TestAudioInputController
* controller
=
265 audio_input_controller_factory_
.controller();
266 ASSERT_TRUE(controller
);
267 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
268 audio_packet_
.size());
269 base::MessageLoop::current()->RunUntilIdle();
270 recognizer_
->AbortRecognition();
271 base::MessageLoop::current()->RunUntilIdle();
272 ASSERT_TRUE(url_fetcher_factory_
.GetFetcherByID(0));
273 EXPECT_TRUE(recognition_started_
);
274 EXPECT_TRUE(audio_started_
);
275 EXPECT_FALSE(result_received_
);
276 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED
, error_
);
277 CheckFinalEventsConsistency();
280 TEST_F(SpeechRecognizerImplTest
, ConnectionError
) {
281 // Start recording, give some data and then stop. Issue the network callback
282 // with a connection error and verify that the recognizer bubbles the error up
283 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
284 base::MessageLoop::current()->RunUntilIdle();
285 TestAudioInputController
* controller
=
286 audio_input_controller_factory_
.controller();
287 ASSERT_TRUE(controller
);
288 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
289 audio_packet_
.size());
290 base::MessageLoop::current()->RunUntilIdle();
291 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
292 ASSERT_TRUE(fetcher
);
294 recognizer_
->StopAudioCapture();
295 base::MessageLoop::current()->RunUntilIdle();
296 EXPECT_TRUE(audio_started_
);
297 EXPECT_TRUE(audio_ended_
);
298 EXPECT_FALSE(recognition_ended_
);
299 EXPECT_FALSE(result_received_
);
300 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
302 // Issue the network callback to complete the process.
303 fetcher
->set_url(fetcher
->GetOriginalURL());
304 net::URLRequestStatus status
;
305 status
.set_status(net::URLRequestStatus::FAILED
);
306 status
.set_error(net::ERR_CONNECTION_REFUSED
);
307 fetcher
->set_status(status
);
308 fetcher
->set_response_code(0);
309 fetcher
->SetResponseString(std::string());
310 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
311 base::MessageLoop::current()->RunUntilIdle();
312 EXPECT_TRUE(recognition_ended_
);
313 EXPECT_FALSE(result_received_
);
314 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
315 CheckFinalEventsConsistency();
318 TEST_F(SpeechRecognizerImplTest
, ServerError
) {
319 // Start recording, give some data and then stop. Issue the network callback
320 // with a 500 error and verify that the recognizer bubbles the error up
321 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
322 base::MessageLoop::current()->RunUntilIdle();
323 TestAudioInputController
* controller
=
324 audio_input_controller_factory_
.controller();
325 ASSERT_TRUE(controller
);
326 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
327 audio_packet_
.size());
328 base::MessageLoop::current()->RunUntilIdle();
329 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
330 ASSERT_TRUE(fetcher
);
332 recognizer_
->StopAudioCapture();
333 base::MessageLoop::current()->RunUntilIdle();
334 EXPECT_TRUE(audio_started_
);
335 EXPECT_TRUE(audio_ended_
);
336 EXPECT_FALSE(recognition_ended_
);
337 EXPECT_FALSE(result_received_
);
338 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
340 // Issue the network callback to complete the process.
341 fetcher
->set_url(fetcher
->GetOriginalURL());
342 net::URLRequestStatus status
;
343 status
.set_status(net::URLRequestStatus::SUCCESS
);
344 fetcher
->set_status(status
);
345 fetcher
->set_response_code(500);
346 fetcher
->SetResponseString("Internal Server Error");
347 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
348 base::MessageLoop::current()->RunUntilIdle();
349 EXPECT_TRUE(recognition_ended_
);
350 EXPECT_FALSE(result_received_
);
351 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
352 CheckFinalEventsConsistency();
355 TEST_F(SpeechRecognizerImplTest
, AudioControllerErrorNoData
) {
356 // Check if things tear down properly if AudioInputController threw an error.
357 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
358 base::MessageLoop::current()->RunUntilIdle();
359 TestAudioInputController
* controller
=
360 audio_input_controller_factory_
.controller();
361 ASSERT_TRUE(controller
);
362 controller
->event_handler()->OnError(controller
);
363 base::MessageLoop::current()->RunUntilIdle();
364 EXPECT_TRUE(recognition_started_
);
365 EXPECT_FALSE(audio_started_
);
366 EXPECT_FALSE(result_received_
);
367 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO
, error_
);
368 CheckFinalEventsConsistency();
371 TEST_F(SpeechRecognizerImplTest
, AudioControllerErrorWithData
) {
372 // Check if things tear down properly if AudioInputController threw an error
373 // after giving some audio data.
374 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
375 base::MessageLoop::current()->RunUntilIdle();
376 TestAudioInputController
* controller
=
377 audio_input_controller_factory_
.controller();
378 ASSERT_TRUE(controller
);
379 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
380 audio_packet_
.size());
381 controller
->event_handler()->OnError(controller
);
382 base::MessageLoop::current()->RunUntilIdle();
383 ASSERT_TRUE(url_fetcher_factory_
.GetFetcherByID(0));
384 EXPECT_TRUE(recognition_started_
);
385 EXPECT_TRUE(audio_started_
);
386 EXPECT_FALSE(result_received_
);
387 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO
, error_
);
388 CheckFinalEventsConsistency();
391 TEST_F(SpeechRecognizerImplTest
, NoSpeechCallbackIssued
) {
392 // Start recording and give a lot of packets with audio samples set to zero.
393 // This should trigger the no-speech detector and issue a callback.
394 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
395 base::MessageLoop::current()->RunUntilIdle();
396 TestAudioInputController
* controller
=
397 audio_input_controller_factory_
.controller();
398 ASSERT_TRUE(controller
);
400 int num_packets
= (SpeechRecognizerImpl::kNoSpeechTimeoutMs
) /
401 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
+ 1;
402 // The vector is already filled with zero value samples on create.
403 for (int i
= 0; i
< num_packets
; ++i
) {
404 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
405 audio_packet_
.size());
407 base::MessageLoop::current()->RunUntilIdle();
408 EXPECT_TRUE(recognition_started_
);
409 EXPECT_TRUE(audio_started_
);
410 EXPECT_FALSE(result_received_
);
411 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH
, error_
);
412 CheckFinalEventsConsistency();
415 TEST_F(SpeechRecognizerImplTest
, NoSpeechCallbackNotIssued
) {
416 // Start recording and give a lot of packets with audio samples set to zero
417 // and then some more with reasonably loud audio samples. This should be
418 // treated as normal speech input and the no-speech detector should not get
420 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
421 base::MessageLoop::current()->RunUntilIdle();
422 TestAudioInputController
* controller
=
423 audio_input_controller_factory_
.controller();
424 ASSERT_TRUE(controller
);
425 controller
= audio_input_controller_factory_
.controller();
426 ASSERT_TRUE(controller
);
428 int num_packets
= (SpeechRecognizerImpl::kNoSpeechTimeoutMs
) /
429 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
;
431 // The vector is already filled with zero value samples on create.
432 for (int i
= 0; i
< num_packets
/ 2; ++i
) {
433 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
434 audio_packet_
.size());
437 FillPacketWithTestWaveform();
438 for (int i
= 0; i
< num_packets
/ 2; ++i
) {
439 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
440 audio_packet_
.size());
443 base::MessageLoop::current()->RunUntilIdle();
444 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
445 EXPECT_TRUE(audio_started_
);
446 EXPECT_FALSE(audio_ended_
);
447 EXPECT_FALSE(recognition_ended_
);
448 recognizer_
->AbortRecognition();
449 base::MessageLoop::current()->RunUntilIdle();
450 CheckFinalEventsConsistency();
453 TEST_F(SpeechRecognizerImplTest
, SetInputVolumeCallback
) {
454 // Start recording and give a lot of packets with audio samples set to zero
455 // and then some more with reasonably loud audio samples. Check that we don't
456 // get the callback during estimation phase, then get zero for the silence
457 // samples and proper volume for the loud audio.
458 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
459 base::MessageLoop::current()->RunUntilIdle();
460 TestAudioInputController
* controller
=
461 audio_input_controller_factory_
.controller();
462 ASSERT_TRUE(controller
);
463 controller
= audio_input_controller_factory_
.controller();
464 ASSERT_TRUE(controller
);
466 // Feed some samples to begin with for the endpointer to do noise estimation.
467 int num_packets
= SpeechRecognizerImpl::kEndpointerEstimationTimeMs
/
468 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
;
469 FillPacketWithNoise();
470 for (int i
= 0; i
< num_packets
; ++i
) {
471 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
472 audio_packet_
.size());
474 base::MessageLoop::current()->RunUntilIdle();
475 EXPECT_EQ(-1.0f
, volume_
); // No audio volume set yet.
477 // The vector is already filled with zero value samples on create.
478 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
479 audio_packet_
.size());
480 base::MessageLoop::current()->RunUntilIdle();
481 EXPECT_FLOAT_EQ(0.74939233f
, volume_
);
483 FillPacketWithTestWaveform();
484 controller
->event_handler()->OnData(controller
, &audio_packet_
[0],
485 audio_packet_
.size());
486 base::MessageLoop::current()->RunUntilIdle();
487 EXPECT_NEAR(0.89926866f
, volume_
, 0.00001f
);
488 EXPECT_FLOAT_EQ(0.75071919f
, noise_volume_
);
490 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
491 EXPECT_FALSE(audio_ended_
);
492 EXPECT_FALSE(recognition_ended_
);
493 recognizer_
->AbortRecognition();
494 base::MessageLoop::current()->RunUntilIdle();
495 CheckFinalEventsConsistency();
498 } // namespace content