Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / content / browser / speech / google_streaming_remote_engine_unittest.cc
blobab703d7ac04e7941d021d00e0ccc12c654ade0a0
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <queue>
7 #include "base/memory/scoped_ptr.h"
8 #include "base/message_loop/message_loop.h"
9 #include "base/numerics/safe_conversions.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "base/sys_byteorder.h"
12 #include "content/browser/speech/audio_buffer.h"
13 #include "content/browser/speech/google_streaming_remote_engine.h"
14 #include "content/browser/speech/proto/google_streaming_api.pb.h"
15 #include "content/public/common/speech_recognition_error.h"
16 #include "content/public/common/speech_recognition_result.h"
17 #include "net/url_request/test_url_fetcher_factory.h"
18 #include "net/url_request/url_request_context_getter.h"
19 #include "net/url_request/url_request_status.h"
20 #include "testing/gtest/include/gtest/gtest.h"
22 using base::HostToNet32;
23 using base::checked_cast;
24 using net::URLRequestStatus;
25 using net::TestURLFetcher;
26 using net::TestURLFetcherFactory;
28 namespace content {
30 // Note: the terms upstream and downstream are from the point-of-view of the
31 // client (engine_under_test_).
33 class GoogleStreamingRemoteEngineTest : public SpeechRecognitionEngineDelegate,
34 public testing::Test {
35 public:
36 GoogleStreamingRemoteEngineTest()
37 : last_number_of_upstream_chunks_seen_(0U),
38 error_(SPEECH_RECOGNITION_ERROR_NONE) { }
40 // Creates a speech recognition request and invokes its URL fetcher delegate
41 // with the given test data.
42 void CreateAndTestRequest(bool success, const std::string& http_response);
44 // SpeechRecognitionRequestDelegate methods.
45 virtual void OnSpeechRecognitionEngineResults(
46 const SpeechRecognitionResults& results) OVERRIDE {
47 results_.push(results);
49 virtual void OnSpeechRecognitionEngineError(
50 const SpeechRecognitionError& error) OVERRIDE {
51 error_ = error.code;
54 // testing::Test methods.
55 virtual void SetUp() OVERRIDE;
56 virtual void TearDown() OVERRIDE;
58 protected:
59 enum DownstreamError {
60 DOWNSTREAM_ERROR_NONE,
61 DOWNSTREAM_ERROR_HTTP500,
62 DOWNSTREAM_ERROR_NETWORK,
63 DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH
65 static bool ResultsAreEqual(const SpeechRecognitionResults& a,
66 const SpeechRecognitionResults& b);
67 static std::string SerializeProtobufResponse(
68 const proto::SpeechRecognitionEvent& msg);
70 TestURLFetcher* GetUpstreamFetcher();
71 TestURLFetcher* GetDownstreamFetcher();
72 void StartMockRecognition();
73 void EndMockRecognition();
74 void InjectDummyAudioChunk();
75 size_t UpstreamChunksUploadedFromLastCall();
76 void ProvideMockProtoResultDownstream(
77 const proto::SpeechRecognitionEvent& result);
78 void ProvideMockResultDownstream(const SpeechRecognitionResult& result);
79 void ExpectResultsReceived(const SpeechRecognitionResults& result);
80 void CloseMockDownstream(DownstreamError error);
82 scoped_ptr<GoogleStreamingRemoteEngine> engine_under_test_;
83 TestURLFetcherFactory url_fetcher_factory_;
84 size_t last_number_of_upstream_chunks_seen_;
85 base::MessageLoop message_loop_;
86 std::string response_buffer_;
87 SpeechRecognitionErrorCode error_;
88 std::queue<SpeechRecognitionResults> results_;
91 TEST_F(GoogleStreamingRemoteEngineTest, SingleDefinitiveResult) {
92 StartMockRecognition();
93 ASSERT_TRUE(GetUpstreamFetcher());
94 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
96 // Inject some dummy audio chunks and check a corresponding chunked upload
97 // is performed every time on the server.
98 for (int i = 0; i < 3; ++i) {
99 InjectDummyAudioChunk();
100 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
103 // Ensure that a final (empty) audio chunk is uploaded on chunks end.
104 engine_under_test_->AudioChunksEnded();
105 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
106 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
108 // Simulate a protobuf message streamed from the server containing a single
109 // result with two hypotheses.
110 SpeechRecognitionResults results;
111 results.push_back(SpeechRecognitionResult());
112 SpeechRecognitionResult& result = results.back();
113 result.is_provisional = false;
114 result.hypotheses.push_back(
115 SpeechRecognitionHypothesis(base::UTF8ToUTF16("hypothesis 1"), 0.1F));
116 result.hypotheses.push_back(
117 SpeechRecognitionHypothesis(base::UTF8ToUTF16("hypothesis 2"), 0.2F));
119 ProvideMockResultDownstream(result);
120 ExpectResultsReceived(results);
121 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
123 // Ensure everything is closed cleanly after the downstream is closed.
124 CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
125 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
126 EndMockRecognition();
127 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
128 ASSERT_EQ(0U, results_.size());
131 TEST_F(GoogleStreamingRemoteEngineTest, SeveralStreamingResults) {
132 StartMockRecognition();
133 ASSERT_TRUE(GetUpstreamFetcher());
134 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
136 for (int i = 0; i < 4; ++i) {
137 InjectDummyAudioChunk();
138 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
140 SpeechRecognitionResults results;
141 results.push_back(SpeechRecognitionResult());
142 SpeechRecognitionResult& result = results.back();
143 result.is_provisional = (i % 2 == 0); // Alternate result types.
144 float confidence = result.is_provisional ? 0.0F : (i * 0.1F);
145 result.hypotheses.push_back(SpeechRecognitionHypothesis(
146 base::UTF8ToUTF16("hypothesis"), confidence));
148 ProvideMockResultDownstream(result);
149 ExpectResultsReceived(results);
150 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
153 // Ensure that a final (empty) audio chunk is uploaded on chunks end.
154 engine_under_test_->AudioChunksEnded();
155 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
156 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
158 // Simulate a final definitive result.
159 SpeechRecognitionResults results;
160 results.push_back(SpeechRecognitionResult());
161 SpeechRecognitionResult& result = results.back();
162 result.is_provisional = false;
163 result.hypotheses.push_back(
164 SpeechRecognitionHypothesis(base::UTF8ToUTF16("The final result"), 1.0F));
165 ProvideMockResultDownstream(result);
166 ExpectResultsReceived(results);
167 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
169 // Ensure everything is closed cleanly after the downstream is closed.
170 CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
171 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
172 EndMockRecognition();
173 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
174 ASSERT_EQ(0U, results_.size());
177 TEST_F(GoogleStreamingRemoteEngineTest, NoFinalResultAfterAudioChunksEnded) {
178 StartMockRecognition();
179 ASSERT_TRUE(GetUpstreamFetcher());
180 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
182 // Simulate one pushed audio chunk.
183 InjectDummyAudioChunk();
184 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
186 // Simulate the corresponding definitive result.
187 SpeechRecognitionResults results;
188 results.push_back(SpeechRecognitionResult());
189 SpeechRecognitionResult& result = results.back();
190 result.hypotheses.push_back(
191 SpeechRecognitionHypothesis(base::UTF8ToUTF16("hypothesis"), 1.0F));
192 ProvideMockResultDownstream(result);
193 ExpectResultsReceived(results);
194 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
196 // Simulate a silent downstream closure after |AudioChunksEnded|.
197 engine_under_test_->AudioChunksEnded();
198 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
199 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
200 CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
202 // Expect an empty result, aimed at notifying recognition ended with no
203 // actual results nor errors.
204 SpeechRecognitionResults empty_results;
205 ExpectResultsReceived(empty_results);
207 // Ensure everything is closed cleanly after the downstream is closed.
208 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
209 EndMockRecognition();
210 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
211 ASSERT_EQ(0U, results_.size());
214 TEST_F(GoogleStreamingRemoteEngineTest, NoMatchError) {
215 StartMockRecognition();
216 ASSERT_TRUE(GetUpstreamFetcher());
217 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
219 for (int i = 0; i < 3; ++i)
220 InjectDummyAudioChunk();
221 engine_under_test_->AudioChunksEnded();
222 ASSERT_EQ(4U, UpstreamChunksUploadedFromLastCall());
223 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
225 // Simulate only a provisional result.
226 SpeechRecognitionResults results;
227 results.push_back(SpeechRecognitionResult());
228 SpeechRecognitionResult& result = results.back();
229 result.is_provisional = true;
230 result.hypotheses.push_back(
231 SpeechRecognitionHypothesis(base::UTF8ToUTF16("The final result"), 0.0F));
232 ProvideMockResultDownstream(result);
233 ExpectResultsReceived(results);
234 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
236 CloseMockDownstream(DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH);
238 // Expect an empty result.
239 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
240 EndMockRecognition();
241 SpeechRecognitionResults empty_result;
242 ExpectResultsReceived(empty_result);
245 TEST_F(GoogleStreamingRemoteEngineTest, HTTPError) {
246 StartMockRecognition();
247 ASSERT_TRUE(GetUpstreamFetcher());
248 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
250 InjectDummyAudioChunk();
251 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
253 // Close the downstream with a HTTP 500 error.
254 CloseMockDownstream(DOWNSTREAM_ERROR_HTTP500);
256 // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised.
257 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
258 EndMockRecognition();
259 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
260 ASSERT_EQ(0U, results_.size());
263 TEST_F(GoogleStreamingRemoteEngineTest, NetworkError) {
264 StartMockRecognition();
265 ASSERT_TRUE(GetUpstreamFetcher());
266 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
268 InjectDummyAudioChunk();
269 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
271 // Close the downstream fetcher simulating a network failure.
272 CloseMockDownstream(DOWNSTREAM_ERROR_NETWORK);
274 // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised.
275 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
276 EndMockRecognition();
277 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
278 ASSERT_EQ(0U, results_.size());
281 TEST_F(GoogleStreamingRemoteEngineTest, Stability) {
282 StartMockRecognition();
283 ASSERT_TRUE(GetUpstreamFetcher());
284 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
286 // Upload a dummy audio chunk.
287 InjectDummyAudioChunk();
288 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
289 engine_under_test_->AudioChunksEnded();
291 // Simulate a protobuf message with an intermediate result without confidence,
292 // but with stability.
293 proto::SpeechRecognitionEvent proto_event;
294 proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS);
295 proto::SpeechRecognitionResult* proto_result = proto_event.add_result();
296 proto_result->set_stability(0.5);
297 proto::SpeechRecognitionAlternative *proto_alternative =
298 proto_result->add_alternative();
299 proto_alternative->set_transcript("foo");
300 ProvideMockProtoResultDownstream(proto_event);
302 // Set up expectations.
303 SpeechRecognitionResults results;
304 results.push_back(SpeechRecognitionResult());
305 SpeechRecognitionResult& result = results.back();
306 result.is_provisional = true;
307 result.hypotheses.push_back(
308 SpeechRecognitionHypothesis(base::UTF8ToUTF16("foo"), 0.5));
310 // Check that the protobuf generated the expected result.
311 ExpectResultsReceived(results);
313 // Since it was a provisional result, recognition is still pending.
314 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
316 // Shut down.
317 CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
318 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
319 EndMockRecognition();
321 // Since there was no final result, we get an empty "no match" result.
322 SpeechRecognitionResults empty_result;
323 ExpectResultsReceived(empty_result);
324 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
325 ASSERT_EQ(0U, results_.size());
328 void GoogleStreamingRemoteEngineTest::SetUp() {
329 engine_under_test_.reset(
330 new GoogleStreamingRemoteEngine(NULL /*URLRequestContextGetter*/));
331 engine_under_test_->set_delegate(this);
334 void GoogleStreamingRemoteEngineTest::TearDown() {
335 engine_under_test_.reset();
338 TestURLFetcher* GoogleStreamingRemoteEngineTest::GetUpstreamFetcher() {
339 return url_fetcher_factory_.GetFetcherByID(
340 GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTesting);
343 TestURLFetcher* GoogleStreamingRemoteEngineTest::GetDownstreamFetcher() {
344 return url_fetcher_factory_.GetFetcherByID(
345 GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTesting);
348 // Starts recognition on the engine, ensuring that both stream fetchers are
349 // created.
350 void GoogleStreamingRemoteEngineTest::StartMockRecognition() {
351 DCHECK(engine_under_test_.get());
353 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
355 engine_under_test_->StartRecognition();
356 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
358 TestURLFetcher* upstream_fetcher = GetUpstreamFetcher();
359 ASSERT_TRUE(upstream_fetcher);
360 upstream_fetcher->set_url(upstream_fetcher->GetOriginalURL());
362 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher();
363 ASSERT_TRUE(downstream_fetcher);
364 downstream_fetcher->set_url(downstream_fetcher->GetOriginalURL());
367 void GoogleStreamingRemoteEngineTest::EndMockRecognition() {
368 DCHECK(engine_under_test_.get());
369 engine_under_test_->EndRecognition();
370 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
372 // TODO(primiano): In order to be very pedantic we should check that both the
373 // upstream and downstream URL fetchers have been disposed at this time.
374 // Unfortunately it seems that there is no direct way to detect (in tests)
375 // if a url_fetcher has been freed or not, since they are not automatically
376 // de-registered from the TestURLFetcherFactory on destruction.
379 void GoogleStreamingRemoteEngineTest::InjectDummyAudioChunk() {
380 unsigned char dummy_audio_buffer_data[2] = {'\0', '\0'};
381 scoped_refptr<AudioChunk> dummy_audio_chunk(
382 new AudioChunk(&dummy_audio_buffer_data[0],
383 sizeof(dummy_audio_buffer_data),
384 2 /* bytes per sample */));
385 DCHECK(engine_under_test_.get());
386 engine_under_test_->TakeAudioChunk(*dummy_audio_chunk.get());
389 size_t GoogleStreamingRemoteEngineTest::UpstreamChunksUploadedFromLastCall() {
390 TestURLFetcher* upstream_fetcher = GetUpstreamFetcher();
391 DCHECK(upstream_fetcher);
392 const size_t number_of_chunks = upstream_fetcher->upload_chunks().size();
393 DCHECK_GE(number_of_chunks, last_number_of_upstream_chunks_seen_);
394 const size_t new_chunks = number_of_chunks -
395 last_number_of_upstream_chunks_seen_;
396 last_number_of_upstream_chunks_seen_ = number_of_chunks;
397 return new_chunks;
400 void GoogleStreamingRemoteEngineTest::ProvideMockProtoResultDownstream(
401 const proto::SpeechRecognitionEvent& result) {
402 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher();
404 ASSERT_TRUE(downstream_fetcher);
405 downstream_fetcher->set_status(URLRequestStatus(/* default=SUCCESS */));
406 downstream_fetcher->set_response_code(200);
408 std::string response_string = SerializeProtobufResponse(result);
409 response_buffer_.append(response_string);
410 downstream_fetcher->SetResponseString(response_buffer_);
411 downstream_fetcher->delegate()->OnURLFetchDownloadProgress(
412 downstream_fetcher,
413 response_buffer_.size(),
414 -1 /* total response length not used */);
417 void GoogleStreamingRemoteEngineTest::ProvideMockResultDownstream(
418 const SpeechRecognitionResult& result) {
419 proto::SpeechRecognitionEvent proto_event;
420 proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS);
421 proto::SpeechRecognitionResult* proto_result = proto_event.add_result();
422 proto_result->set_final(!result.is_provisional);
423 for (size_t i = 0; i < result.hypotheses.size(); ++i) {
424 proto::SpeechRecognitionAlternative* proto_alternative =
425 proto_result->add_alternative();
426 const SpeechRecognitionHypothesis& hypothesis = result.hypotheses[i];
427 proto_alternative->set_confidence(hypothesis.confidence);
428 proto_alternative->set_transcript(base::UTF16ToUTF8(hypothesis.utterance));
430 ProvideMockProtoResultDownstream(proto_event);
433 void GoogleStreamingRemoteEngineTest::CloseMockDownstream(
434 DownstreamError error) {
435 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher();
436 ASSERT_TRUE(downstream_fetcher);
438 const URLRequestStatus::Status fetcher_status =
439 (error == DOWNSTREAM_ERROR_NETWORK) ? URLRequestStatus::FAILED :
440 URLRequestStatus::SUCCESS;
441 downstream_fetcher->set_status(URLRequestStatus(fetcher_status, 0));
442 downstream_fetcher->set_response_code(
443 (error == DOWNSTREAM_ERROR_HTTP500) ? 500 : 200);
445 if (error == DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH) {
446 // Send empty response.
447 proto::SpeechRecognitionEvent response;
448 response_buffer_.append(SerializeProtobufResponse(response));
450 downstream_fetcher->SetResponseString(response_buffer_);
451 downstream_fetcher->delegate()->OnURLFetchComplete(downstream_fetcher);
454 void GoogleStreamingRemoteEngineTest::ExpectResultsReceived(
455 const SpeechRecognitionResults& results) {
456 ASSERT_GE(1U, results_.size());
457 ASSERT_TRUE(ResultsAreEqual(results, results_.front()));
458 results_.pop();
461 bool GoogleStreamingRemoteEngineTest::ResultsAreEqual(
462 const SpeechRecognitionResults& a, const SpeechRecognitionResults& b) {
463 if (a.size() != b.size())
464 return false;
466 SpeechRecognitionResults::const_iterator it_a = a.begin();
467 SpeechRecognitionResults::const_iterator it_b = b.begin();
468 for (; it_a != a.end() && it_b != b.end(); ++it_a, ++it_b) {
469 if (it_a->is_provisional != it_b->is_provisional ||
470 it_a->hypotheses.size() != it_b->hypotheses.size()) {
471 return false;
473 for (size_t i = 0; i < it_a->hypotheses.size(); ++i) {
474 const SpeechRecognitionHypothesis& hyp_a = it_a->hypotheses[i];
475 const SpeechRecognitionHypothesis& hyp_b = it_b->hypotheses[i];
476 if (hyp_a.utterance != hyp_b.utterance ||
477 hyp_a.confidence != hyp_b.confidence) {
478 return false;
483 return true;
486 std::string GoogleStreamingRemoteEngineTest::SerializeProtobufResponse(
487 const proto::SpeechRecognitionEvent& msg) {
488 std::string msg_string;
489 msg.SerializeToString(&msg_string);
491 // Prepend 4 byte prefix length indication to the protobuf message as
492 // envisaged by the google streaming recognition webservice protocol.
493 uint32 prefix = HostToNet32(checked_cast<uint32>(msg_string.size()));
494 msg_string.insert(0, reinterpret_cast<char*>(&prefix), sizeof(prefix));
496 return msg_string;
499 } // namespace content