Re-subimission of https://codereview.chromium.org/1041213003/
[chromium-blink-merge.git] / content / browser / speech / google_streaming_remote_engine.cc
blobca8e906ec30bec43e71103be4d113137189fea70
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/browser/speech/google_streaming_remote_engine.h"
7 #include <algorithm>
8 #include <vector>
10 #include "base/big_endian.h"
11 #include "base/bind.h"
12 #include "base/rand_util.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/time/time.h"
17 #include "content/browser/speech/audio_buffer.h"
18 #include "content/browser/speech/proto/google_streaming_api.pb.h"
19 #include "content/public/common/speech_recognition_error.h"
20 #include "content/public/common/speech_recognition_result.h"
21 #include "google_apis/google_api_keys.h"
22 #include "net/base/escape.h"
23 #include "net/base/load_flags.h"
24 #include "net/url_request/http_user_agent_settings.h"
25 #include "net/url_request/url_fetcher.h"
26 #include "net/url_request/url_request_context.h"
27 #include "net/url_request/url_request_context_getter.h"
28 #include "net/url_request/url_request_status.h"
30 using net::URLFetcher;
32 namespace content {
33 namespace {
35 const char kWebServiceBaseUrl[] =
36 "https://www.google.com/speech-api/full-duplex/v1";
37 const char kDownstreamUrl[] = "/down?";
38 const char kUpstreamUrl[] = "/up?";
39 const AudioEncoder::Codec kDefaultAudioCodec = AudioEncoder::CODEC_FLAC;
41 // This matches the maximum maxAlternatives value supported by the server.
42 const uint32 kMaxMaxAlternatives = 30;
44 // TODO(hans): Remove this and other logging when we don't need it anymore.
45 void DumpResponse(const std::string& response) {
46 DVLOG(1) << "------------";
47 proto::SpeechRecognitionEvent event;
48 if (!event.ParseFromString(response)) {
49 DVLOG(1) << "Parse failed!";
50 return;
52 if (event.has_status())
53 DVLOG(1) << "STATUS\t" << event.status();
54 for (int i = 0; i < event.result_size(); ++i) {
55 DVLOG(1) << "RESULT #" << i << ":";
56 const proto::SpeechRecognitionResult& res = event.result(i);
57 if (res.has_final())
58 DVLOG(1) << " final:\t" << res.final();
59 if (res.has_stability())
60 DVLOG(1) << " STABILITY:\t" << res.stability();
61 for (int j = 0; j < res.alternative_size(); ++j) {
62 const proto::SpeechRecognitionAlternative& alt =
63 res.alternative(j);
64 if (alt.has_confidence())
65 DVLOG(1) << " CONFIDENCE:\t" << alt.confidence();
66 if (alt.has_transcript())
67 DVLOG(1) << " TRANSCRIPT:\t" << alt.transcript();
72 } // namespace
74 const int GoogleStreamingRemoteEngine::kAudioPacketIntervalMs = 100;
75 const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTesting = 0;
76 const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTesting = 1;
77 const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0;
78 const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5;
80 GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine(
81 net::URLRequestContextGetter* context)
82 : url_context_(context),
83 previous_response_length_(0),
84 got_last_definitive_result_(false),
85 is_dispatching_event_(false),
86 use_framed_post_data_(false),
87 state_(STATE_IDLE) {}
89 GoogleStreamingRemoteEngine::~GoogleStreamingRemoteEngine() {}
91 void GoogleStreamingRemoteEngine::SetConfig(
92 const SpeechRecognitionEngineConfig& config) {
93 config_ = config;
96 void GoogleStreamingRemoteEngine::StartRecognition() {
97 FSMEventArgs event_args(EVENT_START_RECOGNITION);
98 DispatchEvent(event_args);
101 void GoogleStreamingRemoteEngine::EndRecognition() {
102 FSMEventArgs event_args(EVENT_END_RECOGNITION);
103 DispatchEvent(event_args);
106 void GoogleStreamingRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
107 FSMEventArgs event_args(EVENT_AUDIO_CHUNK);
108 event_args.audio_data = &data;
109 DispatchEvent(event_args);
112 void GoogleStreamingRemoteEngine::AudioChunksEnded() {
113 FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);
114 DispatchEvent(event_args);
117 void GoogleStreamingRemoteEngine::OnURLFetchComplete(const URLFetcher* source) {
118 const bool kResponseComplete = true;
119 DispatchHTTPResponse(source, kResponseComplete);
122 void GoogleStreamingRemoteEngine::OnURLFetchDownloadProgress(
123 const URLFetcher* source, int64 current, int64 total) {
124 const bool kPartialResponse = false;
125 DispatchHTTPResponse(source, kPartialResponse);
128 void GoogleStreamingRemoteEngine::DispatchHTTPResponse(const URLFetcher* source,
129 bool end_of_response) {
130 DCHECK(CalledOnValidThread());
131 DCHECK(source);
132 const bool response_is_good = source->GetStatus().is_success() &&
133 source->GetResponseCode() == 200;
134 std::string response;
135 if (response_is_good)
136 source->GetResponseAsString(&response);
137 const size_t current_response_length = response.size();
139 DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")
140 << "HTTP, code: " << source->GetResponseCode()
141 << " length: " << current_response_length
142 << " eor: " << end_of_response;
144 // URLFetcher provides always the entire response buffer, but we are only
145 // interested in the fresh data introduced by the last chunk. Therefore, we
146 // drop the previous content we have already processed.
147 if (current_response_length != 0) {
148 DCHECK_GE(current_response_length, previous_response_length_);
149 response.erase(0, previous_response_length_);
150 previous_response_length_ = current_response_length;
153 if (!response_is_good && source == downstream_fetcher_.get()) {
154 DVLOG(1) << "Downstream error " << source->GetResponseCode();
155 FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);
156 DispatchEvent(event_args);
157 return;
159 if (!response_is_good && source == upstream_fetcher_.get()) {
160 DVLOG(1) << "Upstream error " << source->GetResponseCode()
161 << " EOR " << end_of_response;
162 FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);
163 DispatchEvent(event_args);
164 return;
167 // Ignore incoming data on the upstream connection.
168 if (source == upstream_fetcher_.get())
169 return;
171 DCHECK(response_is_good && source == downstream_fetcher_.get());
173 // The downstream response is organized in chunks, whose size is determined
174 // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.
175 // Such chunks are sent by the speech recognition webservice over the HTTP
176 // downstream channel using HTTP chunked transfer (unrelated to our chunks).
177 // This function is called every time an HTTP chunk is received by the
178 // url fetcher. However there isn't any particular matching beween our
179 // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can
180 // contain a portion of one chunk or even more chunks together.
181 chunked_byte_buffer_.Append(response);
183 // A single HTTP chunk can contain more than one data chunk, thus the while.
184 while (chunked_byte_buffer_.HasChunks()) {
185 FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);
186 event_args.response = chunked_byte_buffer_.PopChunk();
187 DCHECK(event_args.response.get());
188 DumpResponse(std::string(event_args.response->begin(),
189 event_args.response->end()));
190 DispatchEvent(event_args);
192 if (end_of_response) {
193 FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);
194 DispatchEvent(event_args);
198 bool GoogleStreamingRemoteEngine::IsRecognitionPending() const {
199 DCHECK(CalledOnValidThread());
200 return state_ != STATE_IDLE;
203 int GoogleStreamingRemoteEngine::GetDesiredAudioChunkDurationMs() const {
204 return kAudioPacketIntervalMs;
207 // ----------------------- Core FSM implementation ---------------------------
209 void GoogleStreamingRemoteEngine::DispatchEvent(
210 const FSMEventArgs& event_args) {
211 DCHECK(CalledOnValidThread());
212 DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
213 DCHECK_LE(state_, STATE_MAX_VALUE);
215 // Event dispatching must be sequential, otherwise it will break all the rules
216 // and the assumptions of the finite state automata model.
217 DCHECK(!is_dispatching_event_);
218 is_dispatching_event_ = true;
220 state_ = ExecuteTransitionAndGetNextState(event_args);
222 is_dispatching_event_ = false;
225 GoogleStreamingRemoteEngine::FSMState
226 GoogleStreamingRemoteEngine::ExecuteTransitionAndGetNextState(
227 const FSMEventArgs& event_args) {
228 const FSMEvent event = event_args.event;
229 switch (state_) {
230 case STATE_IDLE:
231 switch (event) {
232 case EVENT_START_RECOGNITION:
233 return ConnectBothStreams(event_args);
234 case EVENT_END_RECOGNITION:
235 // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of
236 // abort, so we just silently drop them here.
237 case EVENT_AUDIO_CHUNK:
238 case EVENT_AUDIO_CHUNKS_ENDED:
239 // DOWNSTREAM_CLOSED can be received if we end up here due to an error.
240 case EVENT_DOWNSTREAM_CLOSED:
241 return DoNothing(event_args);
242 case EVENT_UPSTREAM_ERROR:
243 case EVENT_DOWNSTREAM_ERROR:
244 case EVENT_DOWNSTREAM_RESPONSE:
245 return NotFeasible(event_args);
247 break;
248 case STATE_BOTH_STREAMS_CONNECTED:
249 switch (event) {
250 case EVENT_AUDIO_CHUNK:
251 return TransmitAudioUpstream(event_args);
252 case EVENT_DOWNSTREAM_RESPONSE:
253 return ProcessDownstreamResponse(event_args);
254 case EVENT_AUDIO_CHUNKS_ENDED:
255 return CloseUpstreamAndWaitForResults(event_args);
256 case EVENT_END_RECOGNITION:
257 return AbortSilently(event_args);
258 case EVENT_UPSTREAM_ERROR:
259 case EVENT_DOWNSTREAM_ERROR:
260 case EVENT_DOWNSTREAM_CLOSED:
261 return AbortWithError(event_args);
262 case EVENT_START_RECOGNITION:
263 return NotFeasible(event_args);
265 break;
266 case STATE_WAITING_DOWNSTREAM_RESULTS:
267 switch (event) {
268 case EVENT_DOWNSTREAM_RESPONSE:
269 return ProcessDownstreamResponse(event_args);
270 case EVENT_DOWNSTREAM_CLOSED:
271 return RaiseNoMatchErrorIfGotNoResults(event_args);
272 case EVENT_END_RECOGNITION:
273 return AbortSilently(event_args);
274 case EVENT_UPSTREAM_ERROR:
275 case EVENT_DOWNSTREAM_ERROR:
276 return AbortWithError(event_args);
277 case EVENT_START_RECOGNITION:
278 case EVENT_AUDIO_CHUNK:
279 case EVENT_AUDIO_CHUNKS_ENDED:
280 return NotFeasible(event_args);
282 break;
284 return NotFeasible(event_args);
287 // ----------- Contract for all the FSM evolution functions below -------------
288 // - Are guaranteed to be executed in the same thread (IO, except for tests);
289 // - Are guaranteed to be not reentrant (themselves and each other);
290 // - event_args members are guaranteed to be stable during the call;
292 GoogleStreamingRemoteEngine::FSMState
293 GoogleStreamingRemoteEngine::ConnectBothStreams(const FSMEventArgs&) {
294 DCHECK(!upstream_fetcher_.get());
295 DCHECK(!downstream_fetcher_.get());
297 encoder_.reset(AudioEncoder::Create(kDefaultAudioCodec,
298 config_.audio_sample_rate,
299 config_.audio_num_bits_per_sample));
300 DCHECK(encoder_.get());
301 const std::string request_key = GenerateRequestKey();
303 // Only use the framed post data format when a preamble needs to be logged.
304 use_framed_post_data_ = (config_.preamble &&
305 !config_.preamble->sample_data.empty() &&
306 !config_.auth_token.empty() &&
307 !config_.auth_scope.empty());
308 if (use_framed_post_data_) {
309 preamble_encoder_.reset(AudioEncoder::Create(
310 kDefaultAudioCodec,
311 config_.preamble->sample_rate,
312 config_.preamble->sample_depth * 8));
315 // Setup downstream fetcher.
316 std::vector<std::string> downstream_args;
317 downstream_args.push_back(
318 "key=" + net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
319 downstream_args.push_back("pair=" + request_key);
320 downstream_args.push_back("output=pb");
321 GURL downstream_url(std::string(kWebServiceBaseUrl) +
322 std::string(kDownstreamUrl) +
323 JoinString(downstream_args, '&'));
325 downstream_fetcher_.reset(URLFetcher::Create(
326 kDownstreamUrlFetcherIdForTesting, downstream_url, URLFetcher::GET,
327 this));
328 downstream_fetcher_->SetRequestContext(url_context_.get());
329 downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
330 net::LOAD_DO_NOT_SEND_COOKIES |
331 net::LOAD_DO_NOT_SEND_AUTH_DATA);
332 downstream_fetcher_->Start();
334 // Setup upstream fetcher.
335 // TODO(hans): Support for user-selected grammars.
336 std::vector<std::string> upstream_args;
337 upstream_args.push_back("key=" +
338 net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
339 upstream_args.push_back("pair=" + request_key);
340 upstream_args.push_back("output=pb");
341 upstream_args.push_back(
342 "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));
343 upstream_args.push_back(
344 config_.filter_profanities ? "pFilter=2" : "pFilter=0");
345 if (config_.max_hypotheses > 0U) {
346 int max_alternatives = std::min(kMaxMaxAlternatives,
347 config_.max_hypotheses);
348 upstream_args.push_back("maxAlternatives=" +
349 base::UintToString(max_alternatives));
351 upstream_args.push_back("client=chromium");
352 if (!config_.hardware_info.empty()) {
353 upstream_args.push_back(
354 "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));
356 if (config_.continuous)
357 upstream_args.push_back("continuous");
358 if (config_.interim_results)
359 upstream_args.push_back("interim");
360 if (!config_.auth_token.empty() && !config_.auth_scope.empty()) {
361 upstream_args.push_back(
362 "authScope=" + net::EscapeQueryParamValue(config_.auth_scope, true));
363 upstream_args.push_back(
364 "authToken=" + net::EscapeQueryParamValue(config_.auth_token, true));
366 if (use_framed_post_data_) {
367 std::string audio_format;
368 if (preamble_encoder_)
369 audio_format = preamble_encoder_->mime_type() + ",";
370 audio_format += encoder_->mime_type();
371 upstream_args.push_back(
372 "audioFormat=" + net::EscapeQueryParamValue(audio_format, true));
374 GURL upstream_url(std::string(kWebServiceBaseUrl) +
375 std::string(kUpstreamUrl) +
376 JoinString(upstream_args, '&'));
378 upstream_fetcher_.reset(URLFetcher::Create(
379 kUpstreamUrlFetcherIdForTesting, upstream_url, URLFetcher::POST, this));
380 if (use_framed_post_data_)
381 upstream_fetcher_->SetChunkedUpload("application/octet-stream");
382 else
383 upstream_fetcher_->SetChunkedUpload(encoder_->mime_type());
384 upstream_fetcher_->SetRequestContext(url_context_.get());
385 upstream_fetcher_->SetReferrer(config_.origin_url);
386 upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
387 net::LOAD_DO_NOT_SEND_COOKIES |
388 net::LOAD_DO_NOT_SEND_AUTH_DATA);
389 upstream_fetcher_->Start();
390 previous_response_length_ = 0;
392 if (preamble_encoder_) {
393 // Encode and send preamble right away.
394 scoped_refptr<AudioChunk> chunk = new AudioChunk(
395 reinterpret_cast<const uint8*>(config_.preamble->sample_data.data()),
396 config_.preamble->sample_data.size(),
397 config_.preamble->sample_depth);
398 preamble_encoder_->Encode(*chunk);
399 preamble_encoder_->Flush();
400 scoped_refptr<AudioChunk> encoded_data(
401 preamble_encoder_->GetEncodedDataAndClear());
402 UploadAudioChunk(encoded_data->AsString(), FRAME_PREAMBLE_AUDIO, false);
404 return STATE_BOTH_STREAMS_CONNECTED;
407 GoogleStreamingRemoteEngine::FSMState
408 GoogleStreamingRemoteEngine::TransmitAudioUpstream(
409 const FSMEventArgs& event_args) {
410 DCHECK(upstream_fetcher_.get());
411 DCHECK(event_args.audio_data.get());
412 const AudioChunk& audio = *(event_args.audio_data.get());
414 DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
415 encoder_->Encode(audio);
416 scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
417 UploadAudioChunk(encoded_data->AsString(), FRAME_RECOGNITION_AUDIO, false);
418 return state_;
421 GoogleStreamingRemoteEngine::FSMState
422 GoogleStreamingRemoteEngine::ProcessDownstreamResponse(
423 const FSMEventArgs& event_args) {
424 DCHECK(event_args.response.get());
426 proto::SpeechRecognitionEvent ws_event;
427 if (!ws_event.ParseFromString(std::string(event_args.response->begin(),
428 event_args.response->end())))
429 return AbortWithError(event_args);
431 // An empty (default) event is used to notify us that the upstream has
432 // been connected. Ignore.
433 if (!ws_event.result_size() && (!ws_event.has_status() ||
434 ws_event.status() == proto::SpeechRecognitionEvent::STATUS_SUCCESS)) {
435 DVLOG(1) << "Received empty response";
436 return state_;
439 if (ws_event.has_status()) {
440 switch (ws_event.status()) {
441 case proto::SpeechRecognitionEvent::STATUS_SUCCESS:
442 break;
443 case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:
444 return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH);
445 case proto::SpeechRecognitionEvent::STATUS_ABORTED:
446 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
447 case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:
448 return Abort(SPEECH_RECOGNITION_ERROR_AUDIO);
449 case proto::SpeechRecognitionEvent::STATUS_NETWORK:
450 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
451 case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:
452 // TODO(hans): We need a better error code for this.
453 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
454 case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:
455 // TODO(hans): We need a better error code for this.
456 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
457 case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:
458 return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR);
459 case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:
460 // TODO(hans): We need a better error code for this.
461 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
465 SpeechRecognitionResults results;
466 for (int i = 0; i < ws_event.result_size(); ++i) {
467 const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);
468 results.push_back(SpeechRecognitionResult());
469 SpeechRecognitionResult& result = results.back();
470 result.is_provisional = !(ws_result.has_final() && ws_result.final());
472 if (!result.is_provisional)
473 got_last_definitive_result_ = true;
475 for (int j = 0; j < ws_result.alternative_size(); ++j) {
476 const proto::SpeechRecognitionAlternative& ws_alternative =
477 ws_result.alternative(j);
478 SpeechRecognitionHypothesis hypothesis;
479 if (ws_alternative.has_confidence())
480 hypothesis.confidence = ws_alternative.confidence();
481 else if (ws_result.has_stability())
482 hypothesis.confidence = ws_result.stability();
483 DCHECK(ws_alternative.has_transcript());
484 // TODO(hans): Perhaps the transcript should be required in the proto?
485 if (ws_alternative.has_transcript())
486 hypothesis.utterance = base::UTF8ToUTF16(ws_alternative.transcript());
488 result.hypotheses.push_back(hypothesis);
492 delegate()->OnSpeechRecognitionEngineResults(results);
494 return state_;
497 GoogleStreamingRemoteEngine::FSMState
498 GoogleStreamingRemoteEngine::RaiseNoMatchErrorIfGotNoResults(
499 const FSMEventArgs& event_args) {
500 if (!got_last_definitive_result_) {
501 // Provide an empty result to notify that recognition is ended with no
502 // errors, yet neither any further results.
503 delegate()->OnSpeechRecognitionEngineResults(SpeechRecognitionResults());
505 return AbortSilently(event_args);
508 GoogleStreamingRemoteEngine::FSMState
509 GoogleStreamingRemoteEngine::CloseUpstreamAndWaitForResults(
510 const FSMEventArgs&) {
511 DCHECK(upstream_fetcher_.get());
512 DCHECK(encoder_.get());
514 DVLOG(1) << "Closing upstream.";
516 // The encoder requires a non-empty final buffer. So we encode a packet
517 // of silence in case encoder had no data already.
518 size_t sample_count =
519 config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
520 scoped_refptr<AudioChunk> dummy_chunk = new AudioChunk(
521 sample_count * sizeof(int16), encoder_->bits_per_sample() / 8);
522 encoder_->Encode(*dummy_chunk.get());
523 encoder_->Flush();
524 scoped_refptr<AudioChunk> encoded_dummy_data =
525 encoder_->GetEncodedDataAndClear();
526 DCHECK(!encoded_dummy_data->IsEmpty());
527 encoder_.reset();
529 UploadAudioChunk(encoded_dummy_data->AsString(),
530 FRAME_RECOGNITION_AUDIO,
531 true);
532 got_last_definitive_result_ = false;
533 return STATE_WAITING_DOWNSTREAM_RESULTS;
536 GoogleStreamingRemoteEngine::FSMState
537 GoogleStreamingRemoteEngine::CloseDownstream(const FSMEventArgs&) {
538 DCHECK(!upstream_fetcher_.get());
539 DCHECK(downstream_fetcher_.get());
541 DVLOG(1) << "Closing downstream.";
542 downstream_fetcher_.reset();
543 return STATE_IDLE;
546 GoogleStreamingRemoteEngine::FSMState
547 GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) {
548 return Abort(SPEECH_RECOGNITION_ERROR_NONE);
551 GoogleStreamingRemoteEngine::FSMState
552 GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) {
553 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
556 GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort(
557 SpeechRecognitionErrorCode error_code) {
558 DVLOG(1) << "Aborting with error " << error_code;
560 if (error_code != SPEECH_RECOGNITION_ERROR_NONE) {
561 delegate()->OnSpeechRecognitionEngineError(
562 SpeechRecognitionError(error_code));
564 downstream_fetcher_.reset();
565 upstream_fetcher_.reset();
566 encoder_.reset();
567 return STATE_IDLE;
570 GoogleStreamingRemoteEngine::FSMState
571 GoogleStreamingRemoteEngine::DoNothing(const FSMEventArgs&) {
572 return state_;
575 GoogleStreamingRemoteEngine::FSMState
576 GoogleStreamingRemoteEngine::NotFeasible(const FSMEventArgs& event_args) {
577 NOTREACHED() << "Unfeasible event " << event_args.event
578 << " in state " << state_;
579 return state_;
582 std::string GoogleStreamingRemoteEngine::GetAcceptedLanguages() const {
583 std::string langs = config_.language;
584 if (langs.empty() && url_context_.get()) {
585 // If no language is provided then we use the first from the accepted
586 // language list. If this list is empty then it defaults to "en-US".
587 // Example of the contents of this list: "es,en-GB;q=0.8", ""
588 net::URLRequestContext* request_context =
589 url_context_->GetURLRequestContext();
590 DCHECK(request_context);
591 // TODO(pauljensen): GoogleStreamingRemoteEngine should be constructed with
592 // a reference to the HttpUserAgentSettings rather than accessing the
593 // accept language through the URLRequestContext.
594 if (request_context->http_user_agent_settings()) {
595 std::string accepted_language_list =
596 request_context->http_user_agent_settings()->GetAcceptLanguage();
597 size_t separator = accepted_language_list.find_first_of(",;");
598 if (separator != std::string::npos)
599 langs = accepted_language_list.substr(0, separator);
602 if (langs.empty())
603 langs = "en-US";
604 return langs;
607 // TODO(primiano): Is there any utility in the codebase that already does this?
608 std::string GoogleStreamingRemoteEngine::GenerateRequestKey() const {
609 const int64 kKeepLowBytes = 0x00000000FFFFFFFFLL;
610 const int64 kKeepHighBytes = 0xFFFFFFFF00000000LL;
612 // Just keep the least significant bits of timestamp, in order to reduce
613 // probability of collisions.
614 int64 key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) |
615 (base::RandUint64() & kKeepHighBytes);
616 return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));
619 void GoogleStreamingRemoteEngine::UploadAudioChunk(const std::string& data,
620 FrameType type,
621 bool is_final) {
622 if (use_framed_post_data_) {
623 std::string frame(data.size() + 8, 0);
624 base::WriteBigEndian(&frame[0], static_cast<uint32_t>(data.size()));
625 base::WriteBigEndian(&frame[4], static_cast<uint32_t>(type));
626 frame.replace(8, data.size(), data);
627 upstream_fetcher_->AppendChunkToUpload(frame, is_final);
628 } else {
629 upstream_fetcher_->AppendChunkToUpload(data, is_final);
633 GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
634 : event(event_value) {
637 GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() {
640 } // namespace content