1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "content/browser/browser_thread_impl.h"
8 #include "content/browser/speech/google_one_shot_remote_engine.h"
9 #include "content/browser/speech/speech_recognizer_impl.h"
10 #include "content/public/browser/speech_recognition_event_listener.h"
11 #include "media/audio/audio_manager_base.h"
12 #include "media/audio/fake_audio_input_stream.h"
13 #include "media/audio/fake_audio_output_stream.h"
14 #include "media/audio/mock_audio_manager.h"
15 #include "media/audio/test_audio_input_controller_factory.h"
16 #include "media/base/audio_bus.h"
17 #include "net/base/net_errors.h"
18 #include "net/url_request/test_url_fetcher_factory.h"
19 #include "net/url_request/url_request_status.h"
20 #include "testing/gtest/include/gtest/gtest.h"
22 using base::MessageLoopProxy
;
23 using media::AudioInputController
;
24 using media::AudioInputStream
;
25 using media::AudioManager
;
26 using media::AudioOutputStream
;
27 using media::AudioParameters
;
28 using media::TestAudioInputController
;
29 using media::TestAudioInputControllerFactory
;
33 class SpeechRecognizerImplTest
: public SpeechRecognitionEventListener
,
34 public testing::Test
{
36 SpeechRecognizerImplTest()
37 : io_thread_(BrowserThread::IO
, &message_loop_
),
38 recognition_started_(false),
39 recognition_ended_(false),
40 result_received_(false),
41 audio_started_(false),
43 sound_started_(false),
45 error_(SPEECH_RECOGNITION_ERROR_NONE
),
47 // SpeechRecognizer takes ownership of sr_engine.
48 SpeechRecognitionEngine
* sr_engine
=
49 new GoogleOneShotRemoteEngine(NULL
/* URLRequestContextGetter */);
50 SpeechRecognitionEngineConfig config
;
51 config
.audio_num_bits_per_sample
=
52 SpeechRecognizerImpl::kNumBitsPerAudioSample
;
53 config
.audio_sample_rate
= SpeechRecognizerImpl::kAudioSampleRate
;
54 config
.filter_profanities
= false;
55 sr_engine
->SetConfig(config
);
57 const int kTestingSessionId
= 1;
58 recognizer_
= new SpeechRecognizerImpl(
59 this, kTestingSessionId
, false, false, sr_engine
);
60 audio_manager_
.reset(new media::MockAudioManager(
61 base::MessageLoop::current()->message_loop_proxy().get()));
62 recognizer_
->SetAudioManagerForTesting(audio_manager_
.get());
64 int audio_packet_length_bytes
=
65 (SpeechRecognizerImpl::kAudioSampleRate
*
66 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
*
67 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout
) *
68 SpeechRecognizerImpl::kNumBitsPerAudioSample
) / (8 * 1000);
69 audio_packet_
.resize(audio_packet_length_bytes
);
72 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout
);
73 bytes_per_sample_
= SpeechRecognizerImpl::kNumBitsPerAudioSample
/ 8;
74 const int frames
= audio_packet_length_bytes
/ channels
/ bytes_per_sample_
;
75 audio_bus_
= media::AudioBus::Create(channels
, frames
);
79 void CheckEventsConsistency() {
80 // Note: "!x || y" == "x implies y".
81 EXPECT_TRUE(!recognition_ended_
|| recognition_started_
);
82 EXPECT_TRUE(!audio_ended_
|| audio_started_
);
83 EXPECT_TRUE(!sound_ended_
|| sound_started_
);
84 EXPECT_TRUE(!audio_started_
|| recognition_started_
);
85 EXPECT_TRUE(!sound_started_
|| audio_started_
);
86 EXPECT_TRUE(!audio_ended_
|| (sound_ended_
|| !sound_started_
));
87 EXPECT_TRUE(!recognition_ended_
|| (audio_ended_
|| !audio_started_
));
90 void CheckFinalEventsConsistency() {
91 // Note: "!(x ^ y)" == "(x && y) || (!x && !x)".
92 EXPECT_FALSE(recognition_started_
^ recognition_ended_
);
93 EXPECT_FALSE(audio_started_
^ audio_ended_
);
94 EXPECT_FALSE(sound_started_
^ sound_ended_
);
97 // Overridden from SpeechRecognitionEventListener:
98 void OnAudioStart(int session_id
) override
{
99 audio_started_
= true;
100 CheckEventsConsistency();
103 void OnAudioEnd(int session_id
) override
{
105 CheckEventsConsistency();
108 void OnRecognitionResults(int session_id
,
109 const SpeechRecognitionResults
& results
) override
{
110 result_received_
= true;
113 void OnRecognitionError(int session_id
,
114 const SpeechRecognitionError
& error
) override
{
115 EXPECT_TRUE(recognition_started_
);
116 EXPECT_FALSE(recognition_ended_
);
120 void OnAudioLevelsChange(int session_id
,
122 float noise_volume
) override
{
124 noise_volume_
= noise_volume
;
127 void OnRecognitionEnd(int session_id
) override
{
128 recognition_ended_
= true;
129 CheckEventsConsistency();
132 void OnRecognitionStart(int session_id
) override
{
133 recognition_started_
= true;
134 CheckEventsConsistency();
137 void OnEnvironmentEstimationComplete(int session_id
) override
{}
139 void OnSoundStart(int session_id
) override
{
140 sound_started_
= true;
141 CheckEventsConsistency();
144 void OnSoundEnd(int session_id
) override
{
146 CheckEventsConsistency();
149 // testing::Test methods.
150 void SetUp() override
{
151 AudioInputController::set_factory_for_testing(
152 &audio_input_controller_factory_
);
155 void TearDown() override
{
156 AudioInputController::set_factory_for_testing(NULL
);
159 void CopyPacketToAudioBus() {
160 // Copy the created signal into an audio bus in a deinterleaved format.
161 audio_bus_
->FromInterleaved(
162 &audio_packet_
[0], audio_bus_
->frames(), bytes_per_sample_
);
165 void FillPacketWithTestWaveform() {
166 // Fill the input with a simple pattern, a 125Hz sawtooth waveform.
167 for (size_t i
= 0; i
< audio_packet_
.size(); ++i
)
168 audio_packet_
[i
] = static_cast<uint8
>(i
);
169 CopyPacketToAudioBus();
172 void FillPacketWithNoise() {
175 for (size_t i
= 0; i
< audio_packet_
.size(); ++i
) {
177 audio_packet_
[i
] = value
% 100;
179 CopyPacketToAudioBus();
183 base::MessageLoopForIO message_loop_
;
184 BrowserThreadImpl io_thread_
;
185 scoped_refptr
<SpeechRecognizerImpl
> recognizer_
;
186 scoped_ptr
<AudioManager
> audio_manager_
;
187 bool recognition_started_
;
188 bool recognition_ended_
;
189 bool result_received_
;
194 SpeechRecognitionErrorCode error_
;
195 net::TestURLFetcherFactory url_fetcher_factory_
;
196 TestAudioInputControllerFactory audio_input_controller_factory_
;
197 std::vector
<uint8
> audio_packet_
;
198 scoped_ptr
<media::AudioBus
> audio_bus_
;
199 int bytes_per_sample_
;
204 TEST_F(SpeechRecognizerImplTest
, StopNoData
) {
205 // Check for callbacks when stopping record before any audio gets recorded.
206 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
207 recognizer_
->StopAudioCapture();
208 base::MessageLoop::current()->RunUntilIdle();
209 EXPECT_TRUE(recognition_started_
);
210 EXPECT_FALSE(audio_started_
);
211 EXPECT_FALSE(result_received_
);
212 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
213 CheckFinalEventsConsistency();
216 TEST_F(SpeechRecognizerImplTest
, CancelNoData
) {
217 // Check for callbacks when canceling recognition before any audio gets
219 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
220 recognizer_
->AbortRecognition();
221 base::MessageLoop::current()->RunUntilIdle();
222 EXPECT_TRUE(recognition_started_
);
223 EXPECT_FALSE(audio_started_
);
224 EXPECT_FALSE(result_received_
);
225 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED
, error_
);
226 CheckFinalEventsConsistency();
229 TEST_F(SpeechRecognizerImplTest
, StopWithData
) {
230 // Start recording, give some data and then stop. This should wait for the
231 // network callback to arrive before completion.
232 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
233 base::MessageLoop::current()->RunUntilIdle();
234 TestAudioInputController
* controller
=
235 audio_input_controller_factory_
.controller();
236 ASSERT_TRUE(controller
);
238 // Try sending 5 chunks of mock audio data and verify that each of them
239 // resulted immediately in a packet sent out via the network. This verifies
240 // that we are streaming out encoded data as chunks without waiting for the
241 // full recording to complete.
242 const size_t kNumChunks
= 5;
243 for (size_t i
= 0; i
< kNumChunks
; ++i
) {
244 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
245 base::MessageLoop::current()->RunUntilIdle();
246 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
247 ASSERT_TRUE(fetcher
);
248 EXPECT_EQ(i
+ 1, fetcher
->upload_chunks().size());
251 recognizer_
->StopAudioCapture();
252 base::MessageLoop::current()->RunUntilIdle();
253 EXPECT_TRUE(audio_started_
);
254 EXPECT_TRUE(audio_ended_
);
255 EXPECT_FALSE(recognition_ended_
);
256 EXPECT_FALSE(result_received_
);
257 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
259 // Issue the network callback to complete the process.
260 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
261 ASSERT_TRUE(fetcher
);
263 fetcher
->set_url(fetcher
->GetOriginalURL());
264 net::URLRequestStatus status
;
265 status
.set_status(net::URLRequestStatus::SUCCESS
);
266 fetcher
->set_status(status
);
267 fetcher
->set_response_code(200);
268 fetcher
->SetResponseString(
269 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}");
270 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
271 base::MessageLoop::current()->RunUntilIdle();
272 EXPECT_TRUE(recognition_ended_
);
273 EXPECT_TRUE(result_received_
);
274 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
275 CheckFinalEventsConsistency();
278 TEST_F(SpeechRecognizerImplTest
, CancelWithData
) {
279 // Start recording, give some data and then cancel.
280 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
281 base::MessageLoop::current()->RunUntilIdle();
282 TestAudioInputController
* controller
=
283 audio_input_controller_factory_
.controller();
284 ASSERT_TRUE(controller
);
285 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
286 base::MessageLoop::current()->RunUntilIdle();
287 recognizer_
->AbortRecognition();
288 base::MessageLoop::current()->RunUntilIdle();
289 ASSERT_TRUE(url_fetcher_factory_
.GetFetcherByID(0));
290 EXPECT_TRUE(recognition_started_
);
291 EXPECT_TRUE(audio_started_
);
292 EXPECT_FALSE(result_received_
);
293 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_ABORTED
, error_
);
294 CheckFinalEventsConsistency();
297 TEST_F(SpeechRecognizerImplTest
, ConnectionError
) {
298 // Start recording, give some data and then stop. Issue the network callback
299 // with a connection error and verify that the recognizer bubbles the error up
300 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
301 base::MessageLoop::current()->RunUntilIdle();
302 TestAudioInputController
* controller
=
303 audio_input_controller_factory_
.controller();
304 ASSERT_TRUE(controller
);
305 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
306 base::MessageLoop::current()->RunUntilIdle();
307 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
308 ASSERT_TRUE(fetcher
);
310 recognizer_
->StopAudioCapture();
311 base::MessageLoop::current()->RunUntilIdle();
312 EXPECT_TRUE(audio_started_
);
313 EXPECT_TRUE(audio_ended_
);
314 EXPECT_FALSE(recognition_ended_
);
315 EXPECT_FALSE(result_received_
);
316 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
318 // Issue the network callback to complete the process.
319 fetcher
->set_url(fetcher
->GetOriginalURL());
320 net::URLRequestStatus status
;
321 status
.set_status(net::URLRequestStatus::FAILED
);
322 status
.set_error(net::ERR_CONNECTION_REFUSED
);
323 fetcher
->set_status(status
);
324 fetcher
->set_response_code(0);
325 fetcher
->SetResponseString(std::string());
326 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
327 base::MessageLoop::current()->RunUntilIdle();
328 EXPECT_TRUE(recognition_ended_
);
329 EXPECT_FALSE(result_received_
);
330 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
331 CheckFinalEventsConsistency();
334 TEST_F(SpeechRecognizerImplTest
, ServerError
) {
335 // Start recording, give some data and then stop. Issue the network callback
336 // with a 500 error and verify that the recognizer bubbles the error up
337 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
338 base::MessageLoop::current()->RunUntilIdle();
339 TestAudioInputController
* controller
=
340 audio_input_controller_factory_
.controller();
341 ASSERT_TRUE(controller
);
342 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
343 base::MessageLoop::current()->RunUntilIdle();
344 net::TestURLFetcher
* fetcher
= url_fetcher_factory_
.GetFetcherByID(0);
345 ASSERT_TRUE(fetcher
);
347 recognizer_
->StopAudioCapture();
348 base::MessageLoop::current()->RunUntilIdle();
349 EXPECT_TRUE(audio_started_
);
350 EXPECT_TRUE(audio_ended_
);
351 EXPECT_FALSE(recognition_ended_
);
352 EXPECT_FALSE(result_received_
);
353 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
355 // Issue the network callback to complete the process.
356 fetcher
->set_url(fetcher
->GetOriginalURL());
357 net::URLRequestStatus status
;
358 status
.set_status(net::URLRequestStatus::SUCCESS
);
359 fetcher
->set_status(status
);
360 fetcher
->set_response_code(500);
361 fetcher
->SetResponseString("Internal Server Error");
362 fetcher
->delegate()->OnURLFetchComplete(fetcher
);
363 base::MessageLoop::current()->RunUntilIdle();
364 EXPECT_TRUE(recognition_ended_
);
365 EXPECT_FALSE(result_received_
);
366 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
367 CheckFinalEventsConsistency();
370 TEST_F(SpeechRecognizerImplTest
, AudioControllerErrorNoData
) {
371 // Check if things tear down properly if AudioInputController threw an error.
372 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
373 base::MessageLoop::current()->RunUntilIdle();
374 TestAudioInputController
* controller
=
375 audio_input_controller_factory_
.controller();
376 ASSERT_TRUE(controller
);
377 controller
->event_handler()->OnError(controller
,
378 AudioInputController::UNKNOWN_ERROR
);
379 base::MessageLoop::current()->RunUntilIdle();
380 EXPECT_TRUE(recognition_started_
);
381 EXPECT_FALSE(audio_started_
);
382 EXPECT_FALSE(result_received_
);
383 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE
, error_
);
384 CheckFinalEventsConsistency();
387 TEST_F(SpeechRecognizerImplTest
, AudioControllerErrorWithData
) {
388 // Check if things tear down properly if AudioInputController threw an error
389 // after giving some audio data.
390 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
391 base::MessageLoop::current()->RunUntilIdle();
392 TestAudioInputController
* controller
=
393 audio_input_controller_factory_
.controller();
394 ASSERT_TRUE(controller
);
395 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
396 controller
->event_handler()->OnError(controller
,
397 AudioInputController::UNKNOWN_ERROR
);
398 base::MessageLoop::current()->RunUntilIdle();
399 ASSERT_TRUE(url_fetcher_factory_
.GetFetcherByID(0));
400 EXPECT_TRUE(recognition_started_
);
401 EXPECT_TRUE(audio_started_
);
402 EXPECT_FALSE(result_received_
);
403 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE
, error_
);
404 CheckFinalEventsConsistency();
407 TEST_F(SpeechRecognizerImplTest
, NoSpeechCallbackIssued
) {
408 // Start recording and give a lot of packets with audio samples set to zero.
409 // This should trigger the no-speech detector and issue a callback.
410 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
411 base::MessageLoop::current()->RunUntilIdle();
412 TestAudioInputController
* controller
=
413 audio_input_controller_factory_
.controller();
414 ASSERT_TRUE(controller
);
416 int num_packets
= (SpeechRecognizerImpl::kNoSpeechTimeoutMs
) /
417 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
+ 1;
418 // The vector is already filled with zero value samples on create.
419 for (int i
= 0; i
< num_packets
; ++i
) {
420 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
422 base::MessageLoop::current()->RunUntilIdle();
423 EXPECT_TRUE(recognition_started_
);
424 EXPECT_TRUE(audio_started_
);
425 EXPECT_FALSE(result_received_
);
426 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH
, error_
);
427 CheckFinalEventsConsistency();
430 TEST_F(SpeechRecognizerImplTest
, NoSpeechCallbackNotIssued
) {
431 // Start recording and give a lot of packets with audio samples set to zero
432 // and then some more with reasonably loud audio samples. This should be
433 // treated as normal speech input and the no-speech detector should not get
435 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
436 base::MessageLoop::current()->RunUntilIdle();
437 TestAudioInputController
* controller
=
438 audio_input_controller_factory_
.controller();
439 ASSERT_TRUE(controller
);
440 controller
= audio_input_controller_factory_
.controller();
441 ASSERT_TRUE(controller
);
443 int num_packets
= (SpeechRecognizerImpl::kNoSpeechTimeoutMs
) /
444 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
;
446 // The vector is already filled with zero value samples on create.
447 for (int i
= 0; i
< num_packets
/ 2; ++i
) {
448 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
451 FillPacketWithTestWaveform();
452 for (int i
= 0; i
< num_packets
/ 2; ++i
) {
453 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
456 base::MessageLoop::current()->RunUntilIdle();
457 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
458 EXPECT_TRUE(audio_started_
);
459 EXPECT_FALSE(audio_ended_
);
460 EXPECT_FALSE(recognition_ended_
);
461 recognizer_
->AbortRecognition();
462 base::MessageLoop::current()->RunUntilIdle();
463 CheckFinalEventsConsistency();
466 TEST_F(SpeechRecognizerImplTest
, SetInputVolumeCallback
) {
467 // Start recording and give a lot of packets with audio samples set to zero
468 // and then some more with reasonably loud audio samples. Check that we don't
469 // get the callback during estimation phase, then get zero for the silence
470 // samples and proper volume for the loud audio.
471 recognizer_
->StartRecognition(media::AudioManagerBase::kDefaultDeviceId
);
472 base::MessageLoop::current()->RunUntilIdle();
473 TestAudioInputController
* controller
=
474 audio_input_controller_factory_
.controller();
475 ASSERT_TRUE(controller
);
476 controller
= audio_input_controller_factory_
.controller();
477 ASSERT_TRUE(controller
);
479 // Feed some samples to begin with for the endpointer to do noise estimation.
480 int num_packets
= SpeechRecognizerImpl::kEndpointerEstimationTimeMs
/
481 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs
;
482 FillPacketWithNoise();
483 for (int i
= 0; i
< num_packets
; ++i
) {
484 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
486 base::MessageLoop::current()->RunUntilIdle();
487 EXPECT_EQ(-1.0f
, volume_
); // No audio volume set yet.
489 // The vector is already filled with zero value samples on create.
490 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
491 base::MessageLoop::current()->RunUntilIdle();
492 EXPECT_FLOAT_EQ(0.74939233f
, volume_
);
494 FillPacketWithTestWaveform();
495 controller
->event_handler()->OnData(controller
, audio_bus_
.get());
496 base::MessageLoop::current()->RunUntilIdle();
497 EXPECT_NEAR(0.89926866f
, volume_
, 0.00001f
);
498 EXPECT_FLOAT_EQ(0.75071919f
, noise_volume_
);
500 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
501 EXPECT_FALSE(audio_ended_
);
502 EXPECT_FALSE(recognition_ended_
);
503 recognizer_
->AbortRecognition();
504 base::MessageLoop::current()->RunUntilIdle();
505 CheckFinalEventsConsistency();
508 } // namespace content