1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "base/memory/scoped_ptr.h"
8 #include "base/message_loop/message_loop.h"
9 #include "base/safe_numerics.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "base/sys_byteorder.h"
12 #include "content/browser/speech/audio_buffer.h"
13 #include "content/browser/speech/google_streaming_remote_engine.h"
14 #include "content/browser/speech/proto/google_streaming_api.pb.h"
15 #include "content/public/common/speech_recognition_error.h"
16 #include "content/public/common/speech_recognition_result.h"
17 #include "net/url_request/test_url_fetcher_factory.h"
18 #include "net/url_request/url_request_context_getter.h"
19 #include "net/url_request/url_request_status.h"
20 #include "testing/gtest/include/gtest/gtest.h"
22 using base::HostToNet32
;
23 using base::checked_numeric_cast
;
24 using net::URLRequestStatus
;
25 using net::TestURLFetcher
;
26 using net::TestURLFetcherFactory
;
30 // Note: the terms upstream and downstream are from the point-of-view of the
31 // client (engine_under_test_).
33 class GoogleStreamingRemoteEngineTest
: public SpeechRecognitionEngineDelegate
,
34 public testing::Test
{
36 GoogleStreamingRemoteEngineTest()
37 : last_number_of_upstream_chunks_seen_(0U),
38 error_(SPEECH_RECOGNITION_ERROR_NONE
) { }
40 // Creates a speech recognition request and invokes its URL fetcher delegate
41 // with the given test data.
42 void CreateAndTestRequest(bool success
, const std::string
& http_response
);
44 // SpeechRecognitionRequestDelegate methods.
45 virtual void OnSpeechRecognitionEngineResults(
46 const SpeechRecognitionResults
& results
) OVERRIDE
{
47 results_
.push(results
);
49 virtual void OnSpeechRecognitionEngineError(
50 const SpeechRecognitionError
& error
) OVERRIDE
{
54 // testing::Test methods.
55 virtual void SetUp() OVERRIDE
;
56 virtual void TearDown() OVERRIDE
;
59 enum DownstreamError
{
60 DOWNSTREAM_ERROR_NONE
,
61 DOWNSTREAM_ERROR_HTTP500
,
62 DOWNSTREAM_ERROR_NETWORK
,
63 DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH
65 static bool ResultsAreEqual(const SpeechRecognitionResults
& a
,
66 const SpeechRecognitionResults
& b
);
67 static std::string
SerializeProtobufResponse(
68 const proto::SpeechRecognitionEvent
& msg
);
70 TestURLFetcher
* GetUpstreamFetcher();
71 TestURLFetcher
* GetDownstreamFetcher();
72 void StartMockRecognition();
73 void EndMockRecognition();
74 void InjectDummyAudioChunk();
75 size_t UpstreamChunksUploadedFromLastCall();
76 void ProvideMockProtoResultDownstream(
77 const proto::SpeechRecognitionEvent
& result
);
78 void ProvideMockResultDownstream(const SpeechRecognitionResult
& result
);
79 void ExpectResultsReceived(const SpeechRecognitionResults
& result
);
80 void CloseMockDownstream(DownstreamError error
);
82 scoped_ptr
<GoogleStreamingRemoteEngine
> engine_under_test_
;
83 TestURLFetcherFactory url_fetcher_factory_
;
84 size_t last_number_of_upstream_chunks_seen_
;
85 base::MessageLoop message_loop_
;
86 std::string response_buffer_
;
87 SpeechRecognitionErrorCode error_
;
88 std::queue
<SpeechRecognitionResults
> results_
;
91 TEST_F(GoogleStreamingRemoteEngineTest
, SingleDefinitiveResult
) {
92 StartMockRecognition();
93 ASSERT_TRUE(GetUpstreamFetcher());
94 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
96 // Inject some dummy audio chunks and check a corresponding chunked upload
97 // is performed every time on the server.
98 for (int i
= 0; i
< 3; ++i
) {
99 InjectDummyAudioChunk();
100 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
103 // Ensure that a final (empty) audio chunk is uploaded on chunks end.
104 engine_under_test_
->AudioChunksEnded();
105 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
106 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
108 // Simulate a protobuf message streamed from the server containing a single
109 // result with two hypotheses.
110 SpeechRecognitionResults results
;
111 results
.push_back(SpeechRecognitionResult());
112 SpeechRecognitionResult
& result
= results
.back();
113 result
.is_provisional
= false;
114 result
.hypotheses
.push_back(
115 SpeechRecognitionHypothesis(base::UTF8ToUTF16("hypothesis 1"), 0.1F
));
116 result
.hypotheses
.push_back(
117 SpeechRecognitionHypothesis(base::UTF8ToUTF16("hypothesis 2"), 0.2F
));
119 ProvideMockResultDownstream(result
);
120 ExpectResultsReceived(results
);
121 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
123 // Ensure everything is closed cleanly after the downstream is closed.
124 CloseMockDownstream(DOWNSTREAM_ERROR_NONE
);
125 ASSERT_FALSE(engine_under_test_
->IsRecognitionPending());
126 EndMockRecognition();
127 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
128 ASSERT_EQ(0U, results_
.size());
131 TEST_F(GoogleStreamingRemoteEngineTest
, SeveralStreamingResults
) {
132 StartMockRecognition();
133 ASSERT_TRUE(GetUpstreamFetcher());
134 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
136 for (int i
= 0; i
< 4; ++i
) {
137 InjectDummyAudioChunk();
138 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
140 SpeechRecognitionResults results
;
141 results
.push_back(SpeechRecognitionResult());
142 SpeechRecognitionResult
& result
= results
.back();
143 result
.is_provisional
= (i
% 2 == 0); // Alternate result types.
144 float confidence
= result
.is_provisional
? 0.0F
: (i
* 0.1F
);
145 result
.hypotheses
.push_back(SpeechRecognitionHypothesis(
146 base::UTF8ToUTF16("hypothesis"), confidence
));
148 ProvideMockResultDownstream(result
);
149 ExpectResultsReceived(results
);
150 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
153 // Ensure that a final (empty) audio chunk is uploaded on chunks end.
154 engine_under_test_
->AudioChunksEnded();
155 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
156 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
158 // Simulate a final definitive result.
159 SpeechRecognitionResults results
;
160 results
.push_back(SpeechRecognitionResult());
161 SpeechRecognitionResult
& result
= results
.back();
162 result
.is_provisional
= false;
163 result
.hypotheses
.push_back(
164 SpeechRecognitionHypothesis(base::UTF8ToUTF16("The final result"), 1.0F
));
165 ProvideMockResultDownstream(result
);
166 ExpectResultsReceived(results
);
167 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
169 // Ensure everything is closed cleanly after the downstream is closed.
170 CloseMockDownstream(DOWNSTREAM_ERROR_NONE
);
171 ASSERT_FALSE(engine_under_test_
->IsRecognitionPending());
172 EndMockRecognition();
173 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
174 ASSERT_EQ(0U, results_
.size());
177 TEST_F(GoogleStreamingRemoteEngineTest
, NoFinalResultAfterAudioChunksEnded
) {
178 StartMockRecognition();
179 ASSERT_TRUE(GetUpstreamFetcher());
180 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
182 // Simulate one pushed audio chunk.
183 InjectDummyAudioChunk();
184 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
186 // Simulate the corresponding definitive result.
187 SpeechRecognitionResults results
;
188 results
.push_back(SpeechRecognitionResult());
189 SpeechRecognitionResult
& result
= results
.back();
190 result
.hypotheses
.push_back(
191 SpeechRecognitionHypothesis(base::UTF8ToUTF16("hypothesis"), 1.0F
));
192 ProvideMockResultDownstream(result
);
193 ExpectResultsReceived(results
);
194 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
196 // Simulate a silent downstream closure after |AudioChunksEnded|.
197 engine_under_test_
->AudioChunksEnded();
198 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
199 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
200 CloseMockDownstream(DOWNSTREAM_ERROR_NONE
);
202 // Expect an empty result, aimed at notifying recognition ended with no
203 // actual results nor errors.
204 SpeechRecognitionResults empty_results
;
205 ExpectResultsReceived(empty_results
);
207 // Ensure everything is closed cleanly after the downstream is closed.
208 ASSERT_FALSE(engine_under_test_
->IsRecognitionPending());
209 EndMockRecognition();
210 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
211 ASSERT_EQ(0U, results_
.size());
214 TEST_F(GoogleStreamingRemoteEngineTest
, NoMatchError
) {
215 StartMockRecognition();
216 ASSERT_TRUE(GetUpstreamFetcher());
217 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
219 for (int i
= 0; i
< 3; ++i
)
220 InjectDummyAudioChunk();
221 engine_under_test_
->AudioChunksEnded();
222 ASSERT_EQ(4U, UpstreamChunksUploadedFromLastCall());
223 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
225 // Simulate only a provisional result.
226 SpeechRecognitionResults results
;
227 results
.push_back(SpeechRecognitionResult());
228 SpeechRecognitionResult
& result
= results
.back();
229 result
.is_provisional
= true;
230 result
.hypotheses
.push_back(
231 SpeechRecognitionHypothesis(base::UTF8ToUTF16("The final result"), 0.0F
));
232 ProvideMockResultDownstream(result
);
233 ExpectResultsReceived(results
);
234 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
236 CloseMockDownstream(DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH
);
238 // Expect an empty result.
239 ASSERT_FALSE(engine_under_test_
->IsRecognitionPending());
240 EndMockRecognition();
241 SpeechRecognitionResults empty_result
;
242 ExpectResultsReceived(empty_result
);
245 TEST_F(GoogleStreamingRemoteEngineTest
, HTTPError
) {
246 StartMockRecognition();
247 ASSERT_TRUE(GetUpstreamFetcher());
248 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
250 InjectDummyAudioChunk();
251 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
253 // Close the downstream with a HTTP 500 error.
254 CloseMockDownstream(DOWNSTREAM_ERROR_HTTP500
);
256 // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised.
257 ASSERT_FALSE(engine_under_test_
->IsRecognitionPending());
258 EndMockRecognition();
259 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
260 ASSERT_EQ(0U, results_
.size());
263 TEST_F(GoogleStreamingRemoteEngineTest
, NetworkError
) {
264 StartMockRecognition();
265 ASSERT_TRUE(GetUpstreamFetcher());
266 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
268 InjectDummyAudioChunk();
269 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
271 // Close the downstream fetcher simulating a network failure.
272 CloseMockDownstream(DOWNSTREAM_ERROR_NETWORK
);
274 // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised.
275 ASSERT_FALSE(engine_under_test_
->IsRecognitionPending());
276 EndMockRecognition();
277 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK
, error_
);
278 ASSERT_EQ(0U, results_
.size());
281 TEST_F(GoogleStreamingRemoteEngineTest
, Stability
) {
282 StartMockRecognition();
283 ASSERT_TRUE(GetUpstreamFetcher());
284 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
286 // Upload a dummy audio chunk.
287 InjectDummyAudioChunk();
288 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
289 engine_under_test_
->AudioChunksEnded();
291 // Simulate a protobuf message with an intermediate result without confidence,
292 // but with stability.
293 proto::SpeechRecognitionEvent proto_event
;
294 proto_event
.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS
);
295 proto::SpeechRecognitionResult
* proto_result
= proto_event
.add_result();
296 proto_result
->set_stability(0.5);
297 proto::SpeechRecognitionAlternative
*proto_alternative
=
298 proto_result
->add_alternative();
299 proto_alternative
->set_transcript("foo");
300 ProvideMockProtoResultDownstream(proto_event
);
302 // Set up expectations.
303 SpeechRecognitionResults results
;
304 results
.push_back(SpeechRecognitionResult());
305 SpeechRecognitionResult
& result
= results
.back();
306 result
.is_provisional
= true;
307 result
.hypotheses
.push_back(
308 SpeechRecognitionHypothesis(base::UTF8ToUTF16("foo"), 0.5));
310 // Check that the protobuf generated the expected result.
311 ExpectResultsReceived(results
);
313 // Since it was a provisional result, recognition is still pending.
314 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
317 CloseMockDownstream(DOWNSTREAM_ERROR_NONE
);
318 ASSERT_FALSE(engine_under_test_
->IsRecognitionPending());
319 EndMockRecognition();
321 // Since there was no final result, we get an empty "no match" result.
322 SpeechRecognitionResults empty_result
;
323 ExpectResultsReceived(empty_result
);
324 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE
, error_
);
325 ASSERT_EQ(0U, results_
.size());
328 void GoogleStreamingRemoteEngineTest::SetUp() {
329 engine_under_test_
.reset(
330 new GoogleStreamingRemoteEngine(NULL
/*URLRequestContextGetter*/));
331 engine_under_test_
->set_delegate(this);
334 void GoogleStreamingRemoteEngineTest::TearDown() {
335 engine_under_test_
.reset();
338 TestURLFetcher
* GoogleStreamingRemoteEngineTest::GetUpstreamFetcher() {
339 return url_fetcher_factory_
.GetFetcherByID(
340 GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTesting
);
343 TestURLFetcher
* GoogleStreamingRemoteEngineTest::GetDownstreamFetcher() {
344 return url_fetcher_factory_
.GetFetcherByID(
345 GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTesting
);
348 // Starts recognition on the engine, ensuring that both stream fetchers are
350 void GoogleStreamingRemoteEngineTest::StartMockRecognition() {
351 DCHECK(engine_under_test_
.get());
353 ASSERT_FALSE(engine_under_test_
->IsRecognitionPending());
355 engine_under_test_
->StartRecognition();
356 ASSERT_TRUE(engine_under_test_
->IsRecognitionPending());
358 TestURLFetcher
* upstream_fetcher
= GetUpstreamFetcher();
359 ASSERT_TRUE(upstream_fetcher
);
360 upstream_fetcher
->set_url(upstream_fetcher
->GetOriginalURL());
362 TestURLFetcher
* downstream_fetcher
= GetDownstreamFetcher();
363 ASSERT_TRUE(downstream_fetcher
);
364 downstream_fetcher
->set_url(downstream_fetcher
->GetOriginalURL());
367 void GoogleStreamingRemoteEngineTest::EndMockRecognition() {
368 DCHECK(engine_under_test_
.get());
369 engine_under_test_
->EndRecognition();
370 ASSERT_FALSE(engine_under_test_
->IsRecognitionPending());
372 // TODO(primiano): In order to be very pedantic we should check that both the
373 // upstream and downstream URL fetchers have been disposed at this time.
374 // Unfortunately it seems that there is no direct way to detect (in tests)
375 // if a url_fetcher has been freed or not, since they are not automatically
376 // de-registered from the TestURLFetcherFactory on destruction.
379 void GoogleStreamingRemoteEngineTest::InjectDummyAudioChunk() {
380 unsigned char dummy_audio_buffer_data
[2] = {'\0', '\0'};
381 scoped_refptr
<AudioChunk
> dummy_audio_chunk(
382 new AudioChunk(&dummy_audio_buffer_data
[0],
383 sizeof(dummy_audio_buffer_data
),
384 2 /* bytes per sample */));
385 DCHECK(engine_under_test_
.get());
386 engine_under_test_
->TakeAudioChunk(*dummy_audio_chunk
.get());
389 size_t GoogleStreamingRemoteEngineTest::UpstreamChunksUploadedFromLastCall() {
390 TestURLFetcher
* upstream_fetcher
= GetUpstreamFetcher();
391 DCHECK(upstream_fetcher
);
392 const size_t number_of_chunks
= upstream_fetcher
->upload_chunks().size();
393 DCHECK_GE(number_of_chunks
, last_number_of_upstream_chunks_seen_
);
394 const size_t new_chunks
= number_of_chunks
-
395 last_number_of_upstream_chunks_seen_
;
396 last_number_of_upstream_chunks_seen_
= number_of_chunks
;
400 void GoogleStreamingRemoteEngineTest::ProvideMockProtoResultDownstream(
401 const proto::SpeechRecognitionEvent
& result
) {
402 TestURLFetcher
* downstream_fetcher
= GetDownstreamFetcher();
404 ASSERT_TRUE(downstream_fetcher
);
405 downstream_fetcher
->set_status(URLRequestStatus(/* default=SUCCESS */));
406 downstream_fetcher
->set_response_code(200);
408 std::string response_string
= SerializeProtobufResponse(result
);
409 response_buffer_
.append(response_string
);
410 downstream_fetcher
->SetResponseString(response_buffer_
);
411 downstream_fetcher
->delegate()->OnURLFetchDownloadProgress(
413 response_buffer_
.size(),
414 -1 /* total response length not used */);
417 void GoogleStreamingRemoteEngineTest::ProvideMockResultDownstream(
418 const SpeechRecognitionResult
& result
) {
419 proto::SpeechRecognitionEvent proto_event
;
420 proto_event
.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS
);
421 proto::SpeechRecognitionResult
* proto_result
= proto_event
.add_result();
422 proto_result
->set_final(!result
.is_provisional
);
423 for (size_t i
= 0; i
< result
.hypotheses
.size(); ++i
) {
424 proto::SpeechRecognitionAlternative
* proto_alternative
=
425 proto_result
->add_alternative();
426 const SpeechRecognitionHypothesis
& hypothesis
= result
.hypotheses
[i
];
427 proto_alternative
->set_confidence(hypothesis
.confidence
);
428 proto_alternative
->set_transcript(base::UTF16ToUTF8(hypothesis
.utterance
));
430 ProvideMockProtoResultDownstream(proto_event
);
433 void GoogleStreamingRemoteEngineTest::CloseMockDownstream(
434 DownstreamError error
) {
435 TestURLFetcher
* downstream_fetcher
= GetDownstreamFetcher();
436 ASSERT_TRUE(downstream_fetcher
);
438 const URLRequestStatus::Status fetcher_status
=
439 (error
== DOWNSTREAM_ERROR_NETWORK
) ? URLRequestStatus::FAILED
:
440 URLRequestStatus::SUCCESS
;
441 downstream_fetcher
->set_status(URLRequestStatus(fetcher_status
, 0));
442 downstream_fetcher
->set_response_code(
443 (error
== DOWNSTREAM_ERROR_HTTP500
) ? 500 : 200);
445 if (error
== DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH
) {
446 // Send empty response.
447 proto::SpeechRecognitionEvent response
;
448 response_buffer_
.append(SerializeProtobufResponse(response
));
450 downstream_fetcher
->SetResponseString(response_buffer_
);
451 downstream_fetcher
->delegate()->OnURLFetchComplete(downstream_fetcher
);
454 void GoogleStreamingRemoteEngineTest::ExpectResultsReceived(
455 const SpeechRecognitionResults
& results
) {
456 ASSERT_GE(1U, results_
.size());
457 ASSERT_TRUE(ResultsAreEqual(results
, results_
.front()));
461 bool GoogleStreamingRemoteEngineTest::ResultsAreEqual(
462 const SpeechRecognitionResults
& a
, const SpeechRecognitionResults
& b
) {
463 if (a
.size() != b
.size())
466 SpeechRecognitionResults::const_iterator it_a
= a
.begin();
467 SpeechRecognitionResults::const_iterator it_b
= b
.begin();
468 for (; it_a
!= a
.end() && it_b
!= b
.end(); ++it_a
, ++it_b
) {
469 if (it_a
->is_provisional
!= it_b
->is_provisional
||
470 it_a
->hypotheses
.size() != it_b
->hypotheses
.size()) {
473 for (size_t i
= 0; i
< it_a
->hypotheses
.size(); ++i
) {
474 const SpeechRecognitionHypothesis
& hyp_a
= it_a
->hypotheses
[i
];
475 const SpeechRecognitionHypothesis
& hyp_b
= it_b
->hypotheses
[i
];
476 if (hyp_a
.utterance
!= hyp_b
.utterance
||
477 hyp_a
.confidence
!= hyp_b
.confidence
) {
486 std::string
GoogleStreamingRemoteEngineTest::SerializeProtobufResponse(
487 const proto::SpeechRecognitionEvent
& msg
) {
488 std::string msg_string
;
489 msg
.SerializeToString(&msg_string
);
491 // Prepend 4 byte prefix length indication to the protobuf message as
492 // envisaged by the google streaming recognition webservice protocol.
493 uint32 prefix
= HostToNet32(checked_numeric_cast
<uint32
>(msg_string
.size()));
494 msg_string
.insert(0, reinterpret_cast<char*>(&prefix
), sizeof(prefix
));
499 } // namespace content