Save errno for logging before potentially overwriting it.
[chromium-blink-merge.git] / content / browser / speech / google_streaming_remote_engine_unittest.cc
blob437bad54408a3ae6a074c8aef57ff6032786d385
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <queue>
7 #include "base/memory/scoped_ptr.h"
8 #include "base/message_loop.h"
9 #include "base/strings/utf_string_conversions.h"
10 #include "content/browser/speech/audio_buffer.h"
11 #include "content/browser/speech/google_streaming_remote_engine.h"
12 #include "content/browser/speech/proto/google_streaming_api.pb.h"
13 #include "content/public/common/speech_recognition_error.h"
14 #include "content/public/common/speech_recognition_result.h"
15 #include "net/url_request/test_url_fetcher_factory.h"
16 #include "net/url_request/url_request_context_getter.h"
17 #include "net/url_request/url_request_status.h"
18 #include "testing/gtest/include/gtest/gtest.h"
20 using net::URLRequestStatus;
21 using net::TestURLFetcher;
22 using net::TestURLFetcherFactory;
24 namespace content {
26 // Note: the terms upstream and downstream are from the point-of-view of the
27 // client (engine_under_test_).
29 class GoogleStreamingRemoteEngineTest : public SpeechRecognitionEngineDelegate,
30 public testing::Test {
31 public:
32 GoogleStreamingRemoteEngineTest()
33 : last_number_of_upstream_chunks_seen_(0U),
34 error_(SPEECH_RECOGNITION_ERROR_NONE) { }
36 // Creates a speech recognition request and invokes its URL fetcher delegate
37 // with the given test data.
38 void CreateAndTestRequest(bool success, const std::string& http_response);
40 // SpeechRecognitionRequestDelegate methods.
41 virtual void OnSpeechRecognitionEngineResults(
42 const SpeechRecognitionResults& results) OVERRIDE {
43 results_.push(results);
45 virtual void OnSpeechRecognitionEngineError(
46 const SpeechRecognitionError& error) OVERRIDE {
47 error_ = error.code;
50 // testing::Test methods.
51 virtual void SetUp() OVERRIDE;
52 virtual void TearDown() OVERRIDE;
54 protected:
55 enum DownstreamError {
56 DOWNSTREAM_ERROR_NONE,
57 DOWNSTREAM_ERROR_HTTP500,
58 DOWNSTREAM_ERROR_NETWORK,
59 DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH
61 static bool ResultsAreEqual(const SpeechRecognitionResults& a,
62 const SpeechRecognitionResults& b);
63 static std::string SerializeProtobufResponse(
64 const proto::SpeechRecognitionEvent& msg);
65 static std::string ToBigEndian32(uint32 value);
67 TestURLFetcher* GetUpstreamFetcher();
68 TestURLFetcher* GetDownstreamFetcher();
69 void StartMockRecognition();
70 void EndMockRecognition();
71 void InjectDummyAudioChunk();
72 size_t UpstreamChunksUploadedFromLastCall();
73 void ProvideMockProtoResultDownstream(
74 const proto::SpeechRecognitionEvent& result);
75 void ProvideMockResultDownstream(const SpeechRecognitionResult& result);
76 void ExpectResultsReceived(const SpeechRecognitionResults& result);
77 void CloseMockDownstream(DownstreamError error);
79 scoped_ptr<GoogleStreamingRemoteEngine> engine_under_test_;
80 TestURLFetcherFactory url_fetcher_factory_;
81 size_t last_number_of_upstream_chunks_seen_;
82 base::MessageLoop message_loop_;
83 std::string response_buffer_;
84 SpeechRecognitionErrorCode error_;
85 std::queue<SpeechRecognitionResults> results_;
88 TEST_F(GoogleStreamingRemoteEngineTest, SingleDefinitiveResult) {
89 StartMockRecognition();
90 ASSERT_TRUE(GetUpstreamFetcher());
91 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
93 // Inject some dummy audio chunks and check a corresponding chunked upload
94 // is performed every time on the server.
95 for (int i = 0; i < 3; ++i) {
96 InjectDummyAudioChunk();
97 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
100 // Ensure that a final (empty) audio chunk is uploaded on chunks end.
101 engine_under_test_->AudioChunksEnded();
102 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
103 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
105 // Simulate a protobuf message streamed from the server containing a single
106 // result with two hypotheses.
107 SpeechRecognitionResults results;
108 results.push_back(SpeechRecognitionResult());
109 SpeechRecognitionResult& result = results.back();
110 result.is_provisional = false;
111 result.hypotheses.push_back(
112 SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis 1"), 0.1F));
113 result.hypotheses.push_back(
114 SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis 2"), 0.2F));
116 ProvideMockResultDownstream(result);
117 ExpectResultsReceived(results);
118 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
120 // Ensure everything is closed cleanly after the downstream is closed.
121 CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
122 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
123 EndMockRecognition();
124 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
125 ASSERT_EQ(0U, results_.size());
128 TEST_F(GoogleStreamingRemoteEngineTest, SeveralStreamingResults) {
129 StartMockRecognition();
130 ASSERT_TRUE(GetUpstreamFetcher());
131 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
133 for (int i = 0; i < 4; ++i) {
134 InjectDummyAudioChunk();
135 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
137 SpeechRecognitionResults results;
138 results.push_back(SpeechRecognitionResult());
139 SpeechRecognitionResult& result = results.back();
140 result.is_provisional = (i % 2 == 0); // Alternate result types.
141 float confidence = result.is_provisional ? 0.0F : (i * 0.1F);
142 result.hypotheses.push_back(
143 SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis"), confidence));
145 ProvideMockResultDownstream(result);
146 ExpectResultsReceived(results);
147 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
150 // Ensure that a final (empty) audio chunk is uploaded on chunks end.
151 engine_under_test_->AudioChunksEnded();
152 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
153 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
155 // Simulate a final definitive result.
156 SpeechRecognitionResults results;
157 results.push_back(SpeechRecognitionResult());
158 SpeechRecognitionResult& result = results.back();
159 result.is_provisional = false;
160 result.hypotheses.push_back(
161 SpeechRecognitionHypothesis(UTF8ToUTF16("The final result"), 1.0F));
162 ProvideMockResultDownstream(result);
163 ExpectResultsReceived(results);
164 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
166 // Ensure everything is closed cleanly after the downstream is closed.
167 CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
168 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
169 EndMockRecognition();
170 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
171 ASSERT_EQ(0U, results_.size());
174 TEST_F(GoogleStreamingRemoteEngineTest, NoFinalResultAfterAudioChunksEnded) {
175 StartMockRecognition();
176 ASSERT_TRUE(GetUpstreamFetcher());
177 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
179 // Simulate one pushed audio chunk.
180 InjectDummyAudioChunk();
181 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
183 // Simulate the corresponding definitive result.
184 SpeechRecognitionResults results;
185 results.push_back(SpeechRecognitionResult());
186 SpeechRecognitionResult& result = results.back();
187 result.hypotheses.push_back(
188 SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis"), 1.0F));
189 ProvideMockResultDownstream(result);
190 ExpectResultsReceived(results);
191 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
193 // Simulate a silent downstream closure after |AudioChunksEnded|.
194 engine_under_test_->AudioChunksEnded();
195 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
196 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
197 CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
199 // Expect an empty result, aimed at notifying recognition ended with no
200 // actual results nor errors.
201 SpeechRecognitionResults empty_results;
202 ExpectResultsReceived(empty_results);
204 // Ensure everything is closed cleanly after the downstream is closed.
205 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
206 EndMockRecognition();
207 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
208 ASSERT_EQ(0U, results_.size());
211 TEST_F(GoogleStreamingRemoteEngineTest, NoMatchError) {
212 StartMockRecognition();
213 ASSERT_TRUE(GetUpstreamFetcher());
214 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
216 for (int i = 0; i < 3; ++i)
217 InjectDummyAudioChunk();
218 engine_under_test_->AudioChunksEnded();
219 ASSERT_EQ(4U, UpstreamChunksUploadedFromLastCall());
220 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
222 // Simulate only a provisional result.
223 SpeechRecognitionResults results;
224 results.push_back(SpeechRecognitionResult());
225 SpeechRecognitionResult& result = results.back();
226 result.is_provisional = true;
227 result.hypotheses.push_back(
228 SpeechRecognitionHypothesis(UTF8ToUTF16("The final result"), 0.0F));
229 ProvideMockResultDownstream(result);
230 ExpectResultsReceived(results);
231 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
233 CloseMockDownstream(DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH);
235 // Expect an empty result.
236 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
237 EndMockRecognition();
238 SpeechRecognitionResults empty_result;
239 ExpectResultsReceived(empty_result);
242 TEST_F(GoogleStreamingRemoteEngineTest, HTTPError) {
243 StartMockRecognition();
244 ASSERT_TRUE(GetUpstreamFetcher());
245 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
247 InjectDummyAudioChunk();
248 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
250 // Close the downstream with a HTTP 500 error.
251 CloseMockDownstream(DOWNSTREAM_ERROR_HTTP500);
253 // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised.
254 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
255 EndMockRecognition();
256 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
257 ASSERT_EQ(0U, results_.size());
260 TEST_F(GoogleStreamingRemoteEngineTest, NetworkError) {
261 StartMockRecognition();
262 ASSERT_TRUE(GetUpstreamFetcher());
263 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
265 InjectDummyAudioChunk();
266 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
268 // Close the downstream fetcher simulating a network failure.
269 CloseMockDownstream(DOWNSTREAM_ERROR_NETWORK);
271 // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised.
272 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
273 EndMockRecognition();
274 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_);
275 ASSERT_EQ(0U, results_.size());
278 TEST_F(GoogleStreamingRemoteEngineTest, Stability) {
279 StartMockRecognition();
280 ASSERT_TRUE(GetUpstreamFetcher());
281 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
283 // Upload a dummy audio chunk.
284 InjectDummyAudioChunk();
285 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
286 engine_under_test_->AudioChunksEnded();
288 // Simulate a protobuf message with an intermediate result without confidence,
289 // but with stability.
290 proto::SpeechRecognitionEvent proto_event;
291 proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS);
292 proto::SpeechRecognitionResult* proto_result = proto_event.add_result();
293 proto_result->set_stability(0.5);
294 proto::SpeechRecognitionAlternative *proto_alternative =
295 proto_result->add_alternative();
296 proto_alternative->set_transcript("foo");
297 ProvideMockProtoResultDownstream(proto_event);
299 // Set up expectations.
300 SpeechRecognitionResults results;
301 results.push_back(SpeechRecognitionResult());
302 SpeechRecognitionResult& result = results.back();
303 result.is_provisional = true;
304 result.hypotheses.push_back(
305 SpeechRecognitionHypothesis(UTF8ToUTF16("foo"), 0.5));
307 // Check that the protobuf generated the expected result.
308 ExpectResultsReceived(results);
310 // Since it was a provisional result, recognition is still pending.
311 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
313 // Shut down.
314 CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
315 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
316 EndMockRecognition();
318 // Since there was no final result, we get an empty "no match" result.
319 SpeechRecognitionResults empty_result;
320 ExpectResultsReceived(empty_result);
321 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_);
322 ASSERT_EQ(0U, results_.size());
325 void GoogleStreamingRemoteEngineTest::SetUp() {
326 engine_under_test_.reset(
327 new GoogleStreamingRemoteEngine(NULL /*URLRequestContextGetter*/));
328 engine_under_test_->set_delegate(this);
331 void GoogleStreamingRemoteEngineTest::TearDown() {
332 engine_under_test_.reset();
335 TestURLFetcher* GoogleStreamingRemoteEngineTest::GetUpstreamFetcher() {
336 return url_fetcher_factory_.GetFetcherByID(
337 GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTests);
340 TestURLFetcher* GoogleStreamingRemoteEngineTest::GetDownstreamFetcher() {
341 return url_fetcher_factory_.GetFetcherByID(
342 GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTests);
345 // Starts recognition on the engine, ensuring that both stream fetchers are
346 // created.
347 void GoogleStreamingRemoteEngineTest::StartMockRecognition() {
348 DCHECK(engine_under_test_.get());
350 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
352 engine_under_test_->StartRecognition();
353 ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
355 TestURLFetcher* upstream_fetcher = GetUpstreamFetcher();
356 ASSERT_TRUE(upstream_fetcher);
357 upstream_fetcher->set_url(upstream_fetcher->GetOriginalURL());
359 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher();
360 ASSERT_TRUE(downstream_fetcher);
361 downstream_fetcher->set_url(downstream_fetcher->GetOriginalURL());
364 void GoogleStreamingRemoteEngineTest::EndMockRecognition() {
365 DCHECK(engine_under_test_.get());
366 engine_under_test_->EndRecognition();
367 ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
369 // TODO(primiano): In order to be very pedantic we should check that both the
370 // upstream and downstream URL fetchers have been disposed at this time.
371 // Unfortunately it seems that there is no direct way to detect (in tests)
372 // if a url_fetcher has been freed or not, since they are not automatically
373 // de-registered from the TestURLFetcherFactory on destruction.
376 void GoogleStreamingRemoteEngineTest::InjectDummyAudioChunk() {
377 unsigned char dummy_audio_buffer_data[2] = {'\0', '\0'};
378 scoped_refptr<AudioChunk> dummy_audio_chunk(
379 new AudioChunk(&dummy_audio_buffer_data[0],
380 sizeof(dummy_audio_buffer_data),
381 2 /* bytes per sample */));
382 DCHECK(engine_under_test_.get());
383 engine_under_test_->TakeAudioChunk(*dummy_audio_chunk.get());
386 size_t GoogleStreamingRemoteEngineTest::UpstreamChunksUploadedFromLastCall() {
387 TestURLFetcher* upstream_fetcher = GetUpstreamFetcher();
388 DCHECK(upstream_fetcher);
389 const size_t number_of_chunks = upstream_fetcher->upload_chunks().size();
390 DCHECK_GE(number_of_chunks, last_number_of_upstream_chunks_seen_);
391 const size_t new_chunks = number_of_chunks -
392 last_number_of_upstream_chunks_seen_;
393 last_number_of_upstream_chunks_seen_ = number_of_chunks;
394 return new_chunks;
397 void GoogleStreamingRemoteEngineTest::ProvideMockProtoResultDownstream(
398 const proto::SpeechRecognitionEvent& result) {
399 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher();
401 ASSERT_TRUE(downstream_fetcher);
402 downstream_fetcher->set_status(URLRequestStatus(/* default=SUCCESS */));
403 downstream_fetcher->set_response_code(200);
405 std::string response_string = SerializeProtobufResponse(result);
406 response_buffer_.append(response_string);
407 downstream_fetcher->SetResponseString(response_buffer_);
408 downstream_fetcher->delegate()->OnURLFetchDownloadProgress(
409 downstream_fetcher,
410 response_buffer_.size(),
411 -1 /* total response length not used */);
414 void GoogleStreamingRemoteEngineTest::ProvideMockResultDownstream(
415 const SpeechRecognitionResult& result) {
416 proto::SpeechRecognitionEvent proto_event;
417 proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS);
418 proto::SpeechRecognitionResult* proto_result = proto_event.add_result();
419 proto_result->set_final(!result.is_provisional);
420 for (size_t i = 0; i < result.hypotheses.size(); ++i) {
421 proto::SpeechRecognitionAlternative* proto_alternative =
422 proto_result->add_alternative();
423 const SpeechRecognitionHypothesis& hypothesis = result.hypotheses[i];
424 proto_alternative->set_confidence(hypothesis.confidence);
425 proto_alternative->set_transcript(UTF16ToUTF8(hypothesis.utterance));
427 ProvideMockProtoResultDownstream(proto_event);
430 void GoogleStreamingRemoteEngineTest::CloseMockDownstream(
431 DownstreamError error) {
432 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher();
433 ASSERT_TRUE(downstream_fetcher);
435 const URLRequestStatus::Status fetcher_status =
436 (error == DOWNSTREAM_ERROR_NETWORK) ? URLRequestStatus::FAILED :
437 URLRequestStatus::SUCCESS;
438 downstream_fetcher->set_status(URLRequestStatus(fetcher_status, 0));
439 downstream_fetcher->set_response_code(
440 (error == DOWNSTREAM_ERROR_HTTP500) ? 500 : 200);
442 if (error == DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH) {
443 // Send empty response.
444 proto::SpeechRecognitionEvent response;
445 response_buffer_.append(SerializeProtobufResponse(response));
447 downstream_fetcher->SetResponseString(response_buffer_);
448 downstream_fetcher->delegate()->OnURLFetchComplete(downstream_fetcher);
451 void GoogleStreamingRemoteEngineTest::ExpectResultsReceived(
452 const SpeechRecognitionResults& results) {
453 ASSERT_GE(1U, results_.size());
454 ASSERT_TRUE(ResultsAreEqual(results, results_.front()));
455 results_.pop();
458 bool GoogleStreamingRemoteEngineTest::ResultsAreEqual(
459 const SpeechRecognitionResults& a, const SpeechRecognitionResults& b) {
460 if (a.size() != b.size())
461 return false;
463 SpeechRecognitionResults::const_iterator it_a = a.begin();
464 SpeechRecognitionResults::const_iterator it_b = b.begin();
465 for (; it_a != a.end() && it_b != b.end(); ++it_a, ++it_b) {
466 if (it_a->is_provisional != it_b->is_provisional ||
467 it_a->hypotheses.size() != it_b->hypotheses.size()) {
468 return false;
470 for (size_t i = 0; i < it_a->hypotheses.size(); ++i) {
471 const SpeechRecognitionHypothesis& hyp_a = it_a->hypotheses[i];
472 const SpeechRecognitionHypothesis& hyp_b = it_b->hypotheses[i];
473 if (hyp_a.utterance != hyp_b.utterance ||
474 hyp_a.confidence != hyp_b.confidence) {
475 return false;
480 return true;
483 std::string GoogleStreamingRemoteEngineTest::SerializeProtobufResponse(
484 const proto::SpeechRecognitionEvent& msg) {
485 std::string msg_string;
486 msg.SerializeToString(&msg_string);
488 // Prepend 4 byte prefix length indication to the protobuf message as
489 // envisaged by the google streaming recognition webservice protocol.
490 msg_string.insert(0, ToBigEndian32(msg_string.size()));
491 return msg_string;
494 std::string GoogleStreamingRemoteEngineTest::ToBigEndian32(uint32 value) {
495 char raw_data[4];
496 raw_data[0] = static_cast<uint8>((value >> 24) & 0xFF);
497 raw_data[1] = static_cast<uint8>((value >> 16) & 0xFF);
498 raw_data[2] = static_cast<uint8>((value >> 8) & 0xFF);
499 raw_data[3] = static_cast<uint8>(value & 0xFF);
500 return std::string(raw_data, sizeof(raw_data));
503 } // namespace content