1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "content/browser/browser_thread_impl.h"
8 #include "content/browser/speech/google_one_shot_remote_engine.h"
9 #include "content/browser/speech/speech_recognizer_impl.h"
10 #include "content/public/browser/speech_recognition_event_listener.h"
11 #include "media/audio/audio_manager_base.h"
12 #include "media/audio/fake_audio_input_stream.h"
13 #include "media/audio/fake_audio_output_stream.h"
14 #include "media/audio/mock_audio_manager.h"
15 #include "media/audio/test_audio_input_controller_factory.h"
16 #include "media/base/audio_bus.h"
17 #include "net/base/net_errors.h"
18 #include "net/url_request/test_url_fetcher_factory.h"
19 #include "net/url_request/url_request_status.h"
20 #include "testing/gtest/include/gtest/gtest.h"
22 using base::MessageLoopProxy
;
23 using media::AudioInputController
;
24 using media::AudioInputStream
;
25 using media::AudioManager
;
26 using media::AudioOutputStream
;
27 using media::AudioParameters
;
28 using media::TestAudioInputController
;
29 using media::TestAudioInputControllerFactory
;
33 class SpeechRecognizerImplTest
: public SpeechRecognitionEventListener
,
34 public testing::Test
{
36 SpeechRecognizerImplTest()
37 : io_thread_(BrowserThread::IO
, &message_loop_
),
38 recognition_started_(false),
39 recognition_ended_(false),
40 result_received_(false),
41 audio_started_(false),
43 sound_started_(false),
45 error_(SPEECH_RECOGNITION_ERROR_NONE
),
47 // SpeechRecognizer takes ownership of sr_engine.
48 SpeechRecognitionEngine
* sr_engine
=
49 new GoogleOneShotRemoteEngine(NULL
/* URLRequestContextGetter */);
50 SpeechRecognitionEngineConfig config
;
51 config
.audio_num_bits_per_sample
=
52 SpeechRecognizerImpl::kNumBitsPerAudioSample
;
53 config
.audio_sample_rate
= SpeechRecognizerImpl::kAudioSampleRate
;
54 config
.filter_profanities
= false;
55 sr_engine
->SetConfig(config
);
57 const int kTestingSessionId
= 1;
58 recognizer_
= new SpeechRecognizerImpl(
59 this, kTestingSessionId
, false, false, sr_engine
);
60 audio_manager_
.reset(new media::MockAudioManager(
61 base::MessageLoop::current()->message_loop_proxy().get()));
62 recognizer_
->SetAudioManagerForTesting(audio_manager_
.get());
64 int audio_packet_length_bytes
=
65 (SpeechRecognizerImpl::kAudioSampleRate
*
66 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
*
67 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout
) *
68 SpeechRecognizerImpl::kNumBitsPerAudioSample
) / (8 * 1000);
69 audio_packet_
.resize(audio_packet_length_bytes
);
72 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout
);
73 bytes_per_sample_
= SpeechRecognizerImpl::kNumBitsPerAudioSample
/ 8;
74 const int frames
= audio_packet_length_bytes
/ channels
/ bytes_per_sample_
;
75 audio_bus_
= media::AudioBus::Create(channels
, frames
);
79 void CheckEventsConsistency() {
80 // Note: "!x || y" == "x implies y".
81 EXPECT_TRUE(!recognition_ended_
|| recognition_started_
);
82 EXPECT_TRUE(!audio_ended_
|| audio_started_
);
83 EXPECT_TRUE(!sound_ended_
|| sound_started_
);
84 EXPECT_TRUE(!audio_started_
|| recognition_started_
);
85 EXPECT_TRUE(!sound_started_
|| audio_started_
);
86 EXPECT_TRUE(!audio_ended_
|| (sound_ended_
|| !sound_started_
));
87 EXPECT_TRUE(!recognition_ended_
|| (audio_ended_
|| !audio_started_
));
90 void CheckFinalEventsConsistency() {
91 // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
92 EXPECT_FALSE(recognition_started_
^ recognition_ended_
);
93 EXPECT_FALSE(audio_started_
^ audio_ended_
);
94 EXPECT_FALSE(sound_started_
^ sound_ended_
);
97 // Overridden from SpeechRecognitionEventListener:
98 virtual void OnAudioStart(int session_id
) OVERRIDE
{
99 audio_started_
= true;
100 CheckEventsConsistency();
103 virtual void OnAudioEnd(int session_id
) OVERRIDE
{
105 CheckEventsConsistency();
108 virtual void OnRecognitionResults(
109 int session_id
, const SpeechRecognitionResults
& results
) OVERRIDE
{
110 result_received_
= true;
113 virtual void OnRecognitionError(
114 int session_id
, const SpeechRecognitionError
& error
) OVERRIDE
{
115 EXPECT_TRUE(recognition_started_
);
116 EXPECT_FALSE(recognition_ended_
);
120 virtual void OnAudioLevelsChange(int session_id
, float volume
,
121 float noise_volume
) OVERRIDE
{
123 noise_volume_
= noise_volume
;
126 virtual void OnRecognitionEnd(int session_id
) OVERRIDE
{
127 recognition_ended_
= true;
128 CheckEventsConsistency();
131 virtual void OnRecognitionStart(int session_id
) OVERRIDE
{
132 recognition_started_
= true;
133 CheckEventsConsistency();
136 virtual void OnEnvironmentEstimationComplete(int session_id
) OVERRIDE
{}
138 virtual void OnSoundStart(int session_id
) OVERRIDE
{
139 sound_started_
= true;
140 CheckEventsConsistency();
143 virtual void OnSoundEnd(int session_id
) OVERRIDE
{
145 CheckEventsConsistency();
148 // testing::Test methods.
149 virtual void SetUp() OVERRIDE
{
150 AudioInputController::set_factory_for_testing(
151 &audio_input_controller_factory_
);
154 virtual void TearDown() OVERRIDE
{
155 AudioInputController::set_factory_for_testing(NULL
);
158 void CopyPacketToAudioBus() {
159 // Copy the created signal into an audio bus in a deinterleaved format.
160 audio_bus_
->FromInterleaved(
161 &audio_packet_
[0], audio_bus_
->frames(), bytes_per_sample_
);
164 void FillPacketWithTestWaveform() {
165 // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
166 for (size_t i
= 0; i
< audio_packet_
.size(); ++i
)
167 audio_packet_
[i
] = static_cast<uint8
>(i
);
168 CopyPacketToAudioBus();
171 void FillPacketWithNoise() {
174 for (size_t i
= 0; i
< audio_packet_
.size(); ++i
) {
176 audio_packet_
[i
] = value
% 100;
178 CopyPacketToAudioBus();
182 base::MessageLoopForIO message_loop_
;
183 BrowserThreadImpl io_thread_
;
184 scoped_refptr
<SpeechRecognizerImpl
> recognizer_
;
185 scoped_ptr
<AudioManager
> audio_manager_
;
186 bool recognition_started_
;
187 bool recognition_ended_
;
188 bool result_received_
;
193 SpeechRecognitionErrorCode error_
;
194 net::TestURLFetcherFactory url_fetcher_factory_
;
195 TestAudioInputControllerFactory audio_input_controller_factory_
;
196 std::vector
<uint8
> audio_packet_
;
197 scoped_ptr
<media::AudioBus
> audio_bus_
;
198 int bytes_per_sample_
;
203 TEST_F(SpeechRecognizerImplTest
, StopNoData
) {
204 // Check for callbacks when stopping record before any audio gets recorded.
205 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
206 recognizer_
->StopAudioCapture();
207 base::MessageLoop::current()->RunUntilIdle();
208 EXPECT_TRUE(recognition_started_
);
209 EXPECT_FALSE(audio_started_
);
210 EXPECT_FALSE(result_received_
);
211 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
212 CheckFinalEventsConsistency();
215 TEST_F(SpeechRecognizerImplTest
, CancelNoData
) {
216 // Check for callbacks when canceling recognition before any audio gets
218 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
219 recognizer_
->AbortRecognition();
220 base::MessageLoop::current()->RunUntilIdle();
221 EXPECT_TRUE(recognition_started_
);
222 EXPECT_FALSE(audio_started_
);
223 EXPECT_FALSE(result_received_
);
224 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED
, error_
);
225 CheckFinalEventsConsistency();
228 TEST_F(SpeechRecognizerImplTest
, StopWithData
) {
229 // Start recording, give some data and then stop. This should wait for the
230 // network callback to arrive before completion.
231 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
232 base::MessageLoop::current()->RunUntilIdle();
233 TestAudioInputController
* controller
=
234 audio_input_controller_factory_
.controller();
235 ASSERT_TRUE(controller
);
237 // Try sending 5 chunks of mock audio data and verify that each of them
238 // resulted immediately in a packet sent out via the network. This verifies
239 // that we are streaming out encoded data as chunks without waiting for the
240 // full recording to complete.
241 const size_t kNumChunks
= 5;
242 for (size_t i
= 0; i
< kNumChunks
; ++i
) {
243 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
244 base::MessageLoop::current()->RunUntilIdle();
245 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
246 ASSERT_TRUE(fetcher
);
247 EXPECT_EQ(i
+ 1, fetcher
->upload_chunks().size());
250 recognizer_
->StopAudioCapture();
251 base::MessageLoop::current()->RunUntilIdle();
252 EXPECT_TRUE(audio_started_
);
253 EXPECT_TRUE(audio_ended_
);
254 EXPECT_FALSE(recognition_ended_
);
255 EXPECT_FALSE(result_received_
);
256 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
258 // Issue the network callback to complete the process.
259 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
260 ASSERT_TRUE(fetcher
);
262 fetcher
->set_url(fetcher
->GetOriginalURL());
263 net::URLRequestStatus status
;
264 status
.set_status(net::URLRequestStatus::SUCCESS
);
265 fetcher
->set_status(status
);
266 fetcher
->set_response_code(200);
267 fetcher
->SetResponseString(
268 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
269 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
270 base::MessageLoop::current()->RunUntilIdle();
271 EXPECT_TRUE(recognition_ended_
);
272 EXPECT_TRUE(result_received_
);
273 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
274 CheckFinalEventsConsistency();
277 TEST_F(SpeechRecognizerImplTest
, CancelWithData
) {
278 // Start recording, give some data and then cancel.
279 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
280 base::MessageLoop::current()->RunUntilIdle();
281 TestAudioInputController
* controller
=
282 audio_input_controller_factory_
.controller();
283 ASSERT_TRUE(controller
);
284 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
285 base::MessageLoop::current()->RunUntilIdle();
286 recognizer_
->AbortRecognition();
287 base::MessageLoop::current()->RunUntilIdle();
288 ASSERT_TRUE(url_fetcher_factory_
.GetFetcherByID(0));
289 EXPECT_TRUE(recognition_started_
);
290 EXPECT_TRUE(audio_started_
);
291 EXPECT_FALSE(result_received_
);
292 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED
, error_
);
293 CheckFinalEventsConsistency();
296 TEST_F(SpeechRecognizerImplTest
, ConnectionError
) {
297 // Start recording, give some data and then stop. Issue the network callback
298 // with a connection error and verify that the recognizer bubbles the error up
299 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
300 base::MessageLoop::current()->RunUntilIdle();
301 TestAudioInputController
* controller
=
302 audio_input_controller_factory_
.controller();
303 ASSERT_TRUE(controller
);
304 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
305 base::MessageLoop::current()->RunUntilIdle();
306 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
307 ASSERT_TRUE(fetcher
);
309 recognizer_
->StopAudioCapture();
310 base::MessageLoop::current()->RunUntilIdle();
311 EXPECT_TRUE(audio_started_
);
312 EXPECT_TRUE(audio_ended_
);
313 EXPECT_FALSE(recognition_ended_
);
314 EXPECT_FALSE(result_received_
);
315 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
317 // Issue the network callback to complete the process.
318 fetcher
->set_url(fetcher
->GetOriginalURL());
319 net::URLRequestStatus status
;
320 status
.set_status(net::URLRequestStatus::FAILED
);
321 status
.set_error(net::ERR_CONNECTION_REFUSED
);
322 fetcher
->set_status(status
);
323 fetcher
->set_response_code(0);
324 fetcher
->SetResponseString(std::string());
325 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
326 base::MessageLoop::current()->RunUntilIdle();
327 EXPECT_TRUE(recognition_ended_
);
328 EXPECT_FALSE(result_received_
);
329 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
330 CheckFinalEventsConsistency();
333 TEST_F(SpeechRecognizerImplTest
, ServerError
) {
334 // Start recording, give some data and then stop. Issue the network callback
335 // with a 500 error and verify that the recognizer bubbles the error up
336 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
337 base::MessageLoop::current()->RunUntilIdle();
338 TestAudioInputController
* controller
=
339 audio_input_controller_factory_
.controller();
340 ASSERT_TRUE(controller
);
341 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
342 base::MessageLoop::current()->RunUntilIdle();
343 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
344 ASSERT_TRUE(fetcher
);
346 recognizer_
->StopAudioCapture();
347 base::MessageLoop::current()->RunUntilIdle();
348 EXPECT_TRUE(audio_started_
);
349 EXPECT_TRUE(audio_ended_
);
350 EXPECT_FALSE(recognition_ended_
);
351 EXPECT_FALSE(result_received_
);
352 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
354 // Issue the network callback to complete the process.
355 fetcher
->set_url(fetcher
->GetOriginalURL());
356 net::URLRequestStatus status
;
357 status
.set_status(net::URLRequestStatus::SUCCESS
);
358 fetcher
->set_status(status
);
359 fetcher
->set_response_code(500);
360 fetcher
->SetResponseString("Internal Server Error");
361 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
362 base::MessageLoop::current()->RunUntilIdle();
363 EXPECT_TRUE(recognition_ended_
);
364 EXPECT_FALSE(result_received_
);
365 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
366 CheckFinalEventsConsistency();
369 TEST_F(SpeechRecognizerImplTest
, AudioControllerErrorNoData
) {
370 // Check if things tear down properly if AudioInputController threw an error.
371 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
372 base::MessageLoop::current()->RunUntilIdle();
373 TestAudioInputController
* controller
=
374 audio_input_controller_factory_
.controller();
375 ASSERT_TRUE(controller
);
376 controller
->event_handler()->OnError(controller
,
377 AudioInputController::UNKNOWN_ERROR
);
378 base::MessageLoop::current()->RunUntilIdle();
379 EXPECT_TRUE(recognition_started_
);
380 EXPECT_FALSE(audio_started_
);
381 EXPECT_FALSE(result_received_
);
382 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO
, error_
);
383 CheckFinalEventsConsistency();
386 TEST_F(SpeechRecognizerImplTest
, AudioControllerErrorWithData
) {
387 // Check if things tear down properly if AudioInputController threw an error
388 // after giving some audio data.
389 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
390 base::MessageLoop::current()->RunUntilIdle();
391 TestAudioInputController
* controller
=
392 audio_input_controller_factory_
.controller();
393 ASSERT_TRUE(controller
);
394 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
395 controller
->event_handler()->OnError(controller
,
396 AudioInputController::UNKNOWN_ERROR
);
397 base::MessageLoop::current()->RunUntilIdle();
398 ASSERT_TRUE(url_fetcher_factory_
.GetFetcherByID(0));
399 EXPECT_TRUE(recognition_started_
);
400 EXPECT_TRUE(audio_started_
);
401 EXPECT_FALSE(result_received_
);
402 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO
, error_
);
403 CheckFinalEventsConsistency();
406 TEST_F(SpeechRecognizerImplTest
, NoSpeechCallbackIssued
) {
407 // Start recording and give a lot of packets with audio samples set to zero.
408 // This should trigger the no-speech detector and issue a callback.
409 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
410 base::MessageLoop::current()->RunUntilIdle();
411 TestAudioInputController
* controller
=
412 audio_input_controller_factory_
.controller();
413 ASSERT_TRUE(controller
);
415 int num_packets
= (SpeechRecognizerImpl::kNoSpeechTimeoutMs
) /
416 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
+ 1;
417 // The vector is already filled with zero value samples on create.
418 for (int i
= 0; i
< num_packets
; ++i
) {
419 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
421 base::MessageLoop::current()->RunUntilIdle();
422 EXPECT_TRUE(recognition_started_
);
423 EXPECT_TRUE(audio_started_
);
424 EXPECT_FALSE(result_received_
);
425 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH
, error_
);
426 CheckFinalEventsConsistency();
429 TEST_F(SpeechRecognizerImplTest
, NoSpeechCallbackNotIssued
) {
430 // Start recording and give a lot of packets with audio samples set to zero
431 // and then some more with reasonably loud audio samples. This should be
432 // treated as normal speech input and the no-speech detector should not get
434 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
435 base::MessageLoop::current()->RunUntilIdle();
436 TestAudioInputController
* controller
=
437 audio_input_controller_factory_
.controller();
438 ASSERT_TRUE(controller
);
439 controller
= audio_input_controller_factory_
.controller();
440 ASSERT_TRUE(controller
);
442 int num_packets
= (SpeechRecognizerImpl::kNoSpeechTimeoutMs
) /
443 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
;
445 // The vector is already filled with zero value samples on create.
446 for (int i
= 0; i
< num_packets
/ 2; ++i
) {
447 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
450 FillPacketWithTestWaveform();
451 for (int i
= 0; i
< num_packets
/ 2; ++i
) {
452 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
455 base::MessageLoop::current()->RunUntilIdle();
456 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
457 EXPECT_TRUE(audio_started_
);
458 EXPECT_FALSE(audio_ended_
);
459 EXPECT_FALSE(recognition_ended_
);
460 recognizer_
->AbortRecognition();
461 base::MessageLoop::current()->RunUntilIdle();
462 CheckFinalEventsConsistency();
465 TEST_F(SpeechRecognizerImplTest
, SetInputVolumeCallback
) {
466 // Start recording and give a lot of packets with audio samples set to zero
467 // and then some more with reasonably loud audio samples. Check that we don't
468 // get the callback during estimation phase, then get zero for the silence
469 // samples and proper volume for the loud audio.
470 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
471 base::MessageLoop::current()->RunUntilIdle();
472 TestAudioInputController
* controller
=
473 audio_input_controller_factory_
.controller();
474 ASSERT_TRUE(controller
);
475 controller
= audio_input_controller_factory_
.controller();
476 ASSERT_TRUE(controller
);
478 // Feed some samples to begin with for the endpointer to do noise estimation.
479 int num_packets
= SpeechRecognizerImpl::kEndpointerEstimationTimeMs
/
480 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
;
481 FillPacketWithNoise();
482 for (int i
= 0; i
< num_packets
; ++i
) {
483 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
485 base::MessageLoop::current()->RunUntilIdle();
486 EXPECT_EQ(-1.0f
, volume_
); // No audio volume set yet.
488 // The vector is already filled with zero value samples on create.
489 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
490 base::MessageLoop::current()->RunUntilIdle();
491 EXPECT_FLOAT_EQ(0.74939233f
, volume_
);
493 FillPacketWithTestWaveform();
494 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
495 base::MessageLoop::current()->RunUntilIdle();
496 EXPECT_NEAR(0.89926866f
, volume_
, 0.00001f
);
497 EXPECT_FLOAT_EQ(0.75071919f
, noise_volume_
);
499 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
500 EXPECT_FALSE(audio_ended_
);
501 EXPECT_FALSE(recognition_ended_
);
502 recognizer_
->AbortRecognition();
503 base::MessageLoop::current()->RunUntilIdle();
504 CheckFinalEventsConsistency();
507 } // namespace content