1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "content/browser/browser_thread_impl.h"
8 #include "content/browser/speech/google_one_shot_remote_engine.h"
9 #include "content/browser/speech/speech_recognizer_impl.h"
10 #include "content/public/browser/speech_recognition_event_listener.h"
11 #include "media/audio/audio_manager_base.h"
12 #include "media/audio/fake_audio_input_stream.h"
13 #include "media/audio/fake_audio_output_stream.h"
14 #include "media/audio/mock_audio_manager.h"
15 #include "media/audio/test_audio_input_controller_factory.h"
16 #include "media/base/audio_bus.h"
17 #include "net/base/net_errors.h"
18 #include "net/url_request/test_url_fetcher_factory.h"
19 #include "net/url_request/url_request_status.h"
20 #include "testing/gtest/include/gtest/gtest.h"
22 using media::AudioInputController
;
23 using media::AudioInputStream
;
24 using media::AudioManager
;
25 using media::AudioOutputStream
;
26 using media::AudioParameters
;
27 using media::TestAudioInputController
;
28 using media::TestAudioInputControllerFactory
;
32 class SpeechRecognizerImplTest
: public SpeechRecognitionEventListener
,
33 public testing::Test
{
35 SpeechRecognizerImplTest()
36 : io_thread_(BrowserThread::IO
, &message_loop_
),
37 recognition_started_(false),
38 recognition_ended_(false),
39 result_received_(false),
40 audio_started_(false),
42 sound_started_(false),
44 error_(SPEECH_RECOGNITION_ERROR_NONE
),
46 // SpeechRecognizer takes ownership of sr_engine.
47 SpeechRecognitionEngine
* sr_engine
=
48 new GoogleOneShotRemoteEngine(NULL
/* URLRequestContextGetter */);
49 SpeechRecognitionEngineConfig config
;
50 config
.audio_num_bits_per_sample
=
51 SpeechRecognizerImpl::kNumBitsPerAudioSample
;
52 config
.audio_sample_rate
= SpeechRecognizerImpl::kAudioSampleRate
;
53 config
.filter_profanities
= false;
54 sr_engine
->SetConfig(config
);
56 const int kTestingSessionId
= 1;
57 recognizer_
= new SpeechRecognizerImpl(
58 this, kTestingSessionId
, false, false, sr_engine
);
59 audio_manager_
.reset(new media::MockAudioManager(
60 base::MessageLoop::current()->task_runner().get()));
61 recognizer_
->SetAudioManagerForTesting(audio_manager_
.get());
63 int audio_packet_length_bytes
=
64 (SpeechRecognizerImpl::kAudioSampleRate
*
65 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
*
66 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout
) *
67 SpeechRecognizerImpl::kNumBitsPerAudioSample
) / (8 * 1000);
68 audio_packet_
.resize(audio_packet_length_bytes
);
71 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout
);
72 bytes_per_sample_
= SpeechRecognizerImpl::kNumBitsPerAudioSample
/ 8;
73 const int frames
= audio_packet_length_bytes
/ channels
/ bytes_per_sample_
;
74 audio_bus_
= media::AudioBus::Create(channels
, frames
);
78 void CheckEventsConsistency() {
79 // Note: "!x || y" == "x implies y".
80 EXPECT_TRUE(!recognition_ended_
|| recognition_started_
);
81 EXPECT_TRUE(!audio_ended_
|| audio_started_
);
82 EXPECT_TRUE(!sound_ended_
|| sound_started_
);
83 EXPECT_TRUE(!audio_started_
|| recognition_started_
);
84 EXPECT_TRUE(!sound_started_
|| audio_started_
);
85 EXPECT_TRUE(!audio_ended_
|| (sound_ended_
|| !sound_started_
));
86 EXPECT_TRUE(!recognition_ended_
|| (audio_ended_
|| !audio_started_
));
89 void CheckFinalEventsConsistency() {
90 // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
91 EXPECT_FALSE(recognition_started_
^ recognition_ended_
);
92 EXPECT_FALSE(audio_started_
^ audio_ended_
);
93 EXPECT_FALSE(sound_started_
^ sound_ended_
);
96 // Overridden from SpeechRecognitionEventListener:
97 void OnAudioStart(int session_id
) override
{
98 audio_started_
= true;
99 CheckEventsConsistency();
102 void OnAudioEnd(int session_id
) override
{
104 CheckEventsConsistency();
107 void OnRecognitionResults(int session_id
,
108 const SpeechRecognitionResults
& results
) override
{
109 result_received_
= true;
112 void OnRecognitionError(int session_id
,
113 const SpeechRecognitionError
& error
) override
{
114 EXPECT_TRUE(recognition_started_
);
115 EXPECT_FALSE(recognition_ended_
);
119 void OnAudioLevelsChange(int session_id
,
121 float noise_volume
) override
{
123 noise_volume_
= noise_volume
;
126 void OnRecognitionEnd(int session_id
) override
{
127 recognition_ended_
= true;
128 CheckEventsConsistency();
131 void OnRecognitionStart(int session_id
) override
{
132 recognition_started_
= true;
133 CheckEventsConsistency();
136 void OnEnvironmentEstimationComplete(int session_id
) override
{}
138 void OnSoundStart(int session_id
) override
{
139 sound_started_
= true;
140 CheckEventsConsistency();
143 void OnSoundEnd(int session_id
) override
{
145 CheckEventsConsistency();
148 // testing::Test methods.
149 void SetUp() override
{
150 AudioInputController::set_factory_for_testing(
151 &audio_input_controller_factory_
);
154 void TearDown() override
{
155 AudioInputController::set_factory_for_testing(NULL
);
158 void CopyPacketToAudioBus() {
159 // Copy the created signal into an audio bus in a deinterleaved format.
160 audio_bus_
->FromInterleaved(
161 &audio_packet_
[0], audio_bus_
->frames(), bytes_per_sample_
);
164 void FillPacketWithTestWaveform() {
165 // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
166 for (size_t i
= 0; i
< audio_packet_
.size(); ++i
)
167 audio_packet_
[i
] = static_cast<uint8
>(i
);
168 CopyPacketToAudioBus();
171 void FillPacketWithNoise() {
174 for (size_t i
= 0; i
< audio_packet_
.size(); ++i
) {
176 audio_packet_
[i
] = value
% 100;
178 CopyPacketToAudioBus();
182 base::MessageLoopForIO message_loop_
;
183 BrowserThreadImpl io_thread_
;
184 scoped_refptr
<SpeechRecognizerImpl
> recognizer_
;
185 scoped_ptr
<AudioManager
> audio_manager_
;
186 bool recognition_started_
;
187 bool recognition_ended_
;
188 bool result_received_
;
193 SpeechRecognitionErrorCode error_
;
194 net::TestURLFetcherFactory url_fetcher_factory_
;
195 TestAudioInputControllerFactory audio_input_controller_factory_
;
196 std::vector
<uint8
> audio_packet_
;
197 scoped_ptr
<media::AudioBus
> audio_bus_
;
198 int bytes_per_sample_
;
203 TEST_F(SpeechRecognizerImplTest
, StopNoData
) {
204 // Check for callbacks when stopping record before any audio gets recorded.
205 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
206 recognizer_
->StopAudioCapture();
207 base::MessageLoop::current()->RunUntilIdle();
208 EXPECT_TRUE(recognition_started_
);
209 EXPECT_FALSE(audio_started_
);
210 EXPECT_FALSE(result_received_
);
211 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
212 CheckFinalEventsConsistency();
215 TEST_F(SpeechRecognizerImplTest
, CancelNoData
) {
216 // Check for callbacks when canceling recognition before any audio gets
218 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
219 recognizer_
->AbortRecognition();
220 base::MessageLoop::current()->RunUntilIdle();
221 EXPECT_TRUE(recognition_started_
);
222 EXPECT_FALSE(audio_started_
);
223 EXPECT_FALSE(result_received_
);
224 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED
, error_
);
225 CheckFinalEventsConsistency();
228 TEST_F(SpeechRecognizerImplTest
, StopWithData
) {
229 // Start recording, give some data and then stop. This should wait for the
230 // network callback to arrive before completion.
231 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
232 base::MessageLoop::current()->RunUntilIdle();
233 TestAudioInputController
* controller
=
234 audio_input_controller_factory_
.controller();
235 ASSERT_TRUE(controller
);
237 // Try sending 5 chunks of mock audio data and verify that each of them
238 // resulted immediately in a packet sent out via the network. This verifies
239 // that we are streaming out encoded data as chunks without waiting for the
240 // full recording to complete.
241 const size_t kNumChunks
= 5;
242 for (size_t i
= 0; i
< kNumChunks
; ++i
) {
243 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
244 base::MessageLoop::current()->RunUntilIdle();
245 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
246 ASSERT_TRUE(fetcher
);
247 EXPECT_EQ(i
+ 1, fetcher
->upload_chunks().size());
250 recognizer_
->StopAudioCapture();
251 base::MessageLoop::current()->RunUntilIdle();
252 EXPECT_TRUE(audio_started_
);
253 EXPECT_TRUE(audio_ended_
);
254 EXPECT_FALSE(recognition_ended_
);
255 EXPECT_FALSE(result_received_
);
256 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
258 // Issue the network callback to complete the process.
259 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
260 ASSERT_TRUE(fetcher
);
262 fetcher
->set_url(fetcher
->GetOriginalURL());
263 fetcher
->set_status(net::URLRequestStatus());
264 fetcher
->set_response_code(200);
265 fetcher
->SetResponseString(
266 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
267 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
268 base::MessageLoop::current()->RunUntilIdle();
269 EXPECT_TRUE(recognition_ended_
);
270 EXPECT_TRUE(result_received_
);
271 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
272 CheckFinalEventsConsistency();
275 TEST_F(SpeechRecognizerImplTest
, CancelWithData
) {
276 // Start recording, give some data and then cancel.
277 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
278 base::MessageLoop::current()->RunUntilIdle();
279 TestAudioInputController
* controller
=
280 audio_input_controller_factory_
.controller();
281 ASSERT_TRUE(controller
);
282 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
283 base::MessageLoop::current()->RunUntilIdle();
284 recognizer_
->AbortRecognition();
285 base::MessageLoop::current()->RunUntilIdle();
286 ASSERT_TRUE(url_fetcher_factory_
.GetFetcherByID(0));
287 EXPECT_TRUE(recognition_started_
);
288 EXPECT_TRUE(audio_started_
);
289 EXPECT_FALSE(result_received_
);
290 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED
, error_
);
291 CheckFinalEventsConsistency();
294 TEST_F(SpeechRecognizerImplTest
, ConnectionError
) {
295 // Start recording, give some data and then stop. Issue the network callback
296 // with a connection error and verify that the recognizer bubbles the error up
297 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
298 base::MessageLoop::current()->RunUntilIdle();
299 TestAudioInputController
* controller
=
300 audio_input_controller_factory_
.controller();
301 ASSERT_TRUE(controller
);
302 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
303 base::MessageLoop::current()->RunUntilIdle();
304 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
305 ASSERT_TRUE(fetcher
);
307 recognizer_
->StopAudioCapture();
308 base::MessageLoop::current()->RunUntilIdle();
309 EXPECT_TRUE(audio_started_
);
310 EXPECT_TRUE(audio_ended_
);
311 EXPECT_FALSE(recognition_ended_
);
312 EXPECT_FALSE(result_received_
);
313 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
315 // Issue the network callback to complete the process.
316 fetcher
->set_url(fetcher
->GetOriginalURL());
318 net::URLRequestStatus::FromError(net::ERR_CONNECTION_REFUSED
));
319 fetcher
->set_response_code(0);
320 fetcher
->SetResponseString(std::string());
321 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
322 base::MessageLoop::current()->RunUntilIdle();
323 EXPECT_TRUE(recognition_ended_
);
324 EXPECT_FALSE(result_received_
);
325 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
326 CheckFinalEventsConsistency();
329 TEST_F(SpeechRecognizerImplTest
, ServerError
) {
330 // Start recording, give some data and then stop. Issue the network callback
331 // with a 500 error and verify that the recognizer bubbles the error up
332 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
333 base::MessageLoop::current()->RunUntilIdle();
334 TestAudioInputController
* controller
=
335 audio_input_controller_factory_
.controller();
336 ASSERT_TRUE(controller
);
337 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
338 base::MessageLoop::current()->RunUntilIdle();
339 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
340 ASSERT_TRUE(fetcher
);
342 recognizer_
->StopAudioCapture();
343 base::MessageLoop::current()->RunUntilIdle();
344 EXPECT_TRUE(audio_started_
);
345 EXPECT_TRUE(audio_ended_
);
346 EXPECT_FALSE(recognition_ended_
);
347 EXPECT_FALSE(result_received_
);
348 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
350 // Issue the network callback to complete the process.
351 fetcher
->set_url(fetcher
->GetOriginalURL());
352 fetcher
->set_status(net::URLRequestStatus());
353 fetcher
->set_response_code(500);
354 fetcher
->SetResponseString("Internal Server Error");
355 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
356 base::MessageLoop::current()->RunUntilIdle();
357 EXPECT_TRUE(recognition_ended_
);
358 EXPECT_FALSE(result_received_
);
359 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
360 CheckFinalEventsConsistency();
363 TEST_F(SpeechRecognizerImplTest
, AudioControllerErrorNoData
) {
364 // Check if things tear down properly if AudioInputController threw an error.
365 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
366 base::MessageLoop::current()->RunUntilIdle();
367 TestAudioInputController
* controller
=
368 audio_input_controller_factory_
.controller();
369 ASSERT_TRUE(controller
);
370 controller
->event_handler()->OnError(controller
,
371 AudioInputController::UNKNOWN_ERROR
);
372 base::MessageLoop::current()->RunUntilIdle();
373 EXPECT_TRUE(recognition_started_
);
374 EXPECT_FALSE(audio_started_
);
375 EXPECT_FALSE(result_received_
);
376 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE
, error_
);
377 CheckFinalEventsConsistency();
380 TEST_F(SpeechRecognizerImplTest
, AudioControllerErrorWithData
) {
381 // Check if things tear down properly if AudioInputController threw an error
382 // after giving some audio data.
383 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
384 base::MessageLoop::current()->RunUntilIdle();
385 TestAudioInputController
* controller
=
386 audio_input_controller_factory_
.controller();
387 ASSERT_TRUE(controller
);
388 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
389 controller
->event_handler()->OnError(controller
,
390 AudioInputController::UNKNOWN_ERROR
);
391 base::MessageLoop::current()->RunUntilIdle();
392 ASSERT_TRUE(url_fetcher_factory_
.GetFetcherByID(0));
393 EXPECT_TRUE(recognition_started_
);
394 EXPECT_TRUE(audio_started_
);
395 EXPECT_FALSE(result_received_
);
396 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE
, error_
);
397 CheckFinalEventsConsistency();
400 TEST_F(SpeechRecognizerImplTest
, NoSpeechCallbackIssued
) {
401 // Start recording and give a lot of packets with audio samples set to zero.
402 // This should trigger the no-speech detector and issue a callback.
403 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
404 base::MessageLoop::current()->RunUntilIdle();
405 TestAudioInputController
* controller
=
406 audio_input_controller_factory_
.controller();
407 ASSERT_TRUE(controller
);
409 int num_packets
= (SpeechRecognizerImpl::kNoSpeechTimeoutMs
) /
410 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
+ 1;
411 // The vector is already filled with zero value samples on create.
412 for (int i
= 0; i
< num_packets
; ++i
) {
413 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
415 base::MessageLoop::current()->RunUntilIdle();
416 EXPECT_TRUE(recognition_started_
);
417 EXPECT_TRUE(audio_started_
);
418 EXPECT_FALSE(result_received_
);
419 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH
, error_
);
420 CheckFinalEventsConsistency();
423 TEST_F(SpeechRecognizerImplTest
, NoSpeechCallbackNotIssued
) {
424 // Start recording and give a lot of packets with audio samples set to zero
425 // and then some more with reasonably loud audio samples. This should be
426 // treated as normal speech input and the no-speech detector should not get
428 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
429 base::MessageLoop::current()->RunUntilIdle();
430 TestAudioInputController
* controller
=
431 audio_input_controller_factory_
.controller();
432 ASSERT_TRUE(controller
);
433 controller
= audio_input_controller_factory_
.controller();
434 ASSERT_TRUE(controller
);
436 int num_packets
= (SpeechRecognizerImpl::kNoSpeechTimeoutMs
) /
437 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
;
439 // The vector is already filled with zero value samples on create.
440 for (int i
= 0; i
< num_packets
/ 2; ++i
) {
441 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
444 FillPacketWithTestWaveform();
445 for (int i
= 0; i
< num_packets
/ 2; ++i
) {
446 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
449 base::MessageLoop::current()->RunUntilIdle();
450 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
451 EXPECT_TRUE(audio_started_
);
452 EXPECT_FALSE(audio_ended_
);
453 EXPECT_FALSE(recognition_ended_
);
454 recognizer_
->AbortRecognition();
455 base::MessageLoop::current()->RunUntilIdle();
456 CheckFinalEventsConsistency();
459 TEST_F(SpeechRecognizerImplTest
, SetInputVolumeCallback
) {
460 // Start recording and give a lot of packets with audio samples set to zero
461 // and then some more with reasonably loud audio samples. Check that we don't
462 // get the callback during estimation phase, then get zero for the silence
463 // samples and proper volume for the loud audio.
464 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
465 base::MessageLoop::current()->RunUntilIdle();
466 TestAudioInputController
* controller
=
467 audio_input_controller_factory_
.controller();
468 ASSERT_TRUE(controller
);
469 controller
= audio_input_controller_factory_
.controller();
470 ASSERT_TRUE(controller
);
472 // Feed some samples to begin with for the endpointer to do noise estimation.
473 int num_packets
= SpeechRecognizerImpl::kEndpointerEstimationTimeMs
/
474 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
;
475 FillPacketWithNoise();
476 for (int i
= 0; i
< num_packets
; ++i
) {
477 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
479 base::MessageLoop::current()->RunUntilIdle();
480 EXPECT_EQ(-1.0f
, volume_
); // No audio volume set yet.
482 // The vector is already filled with zero value samples on create.
483 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
484 base::MessageLoop::current()->RunUntilIdle();
485 EXPECT_FLOAT_EQ(0.74939233f
, volume_
);
487 FillPacketWithTestWaveform();
488 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
489 base::MessageLoop::current()->RunUntilIdle();
490 EXPECT_NEAR(0.89926866f
, volume_
, 0.00001f
);
491 EXPECT_FLOAT_EQ(0.75071919f
, noise_volume_
);
493 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
494 EXPECT_FALSE(audio_ended_
);
495 EXPECT_FALSE(recognition_ended_
);
496 recognizer_
->AbortRecognition();
497 base::MessageLoop::current()->RunUntilIdle();
498 CheckFinalEventsConsistency();
501 } // namespace content