Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / content / browser / speech / google_streaming_remote_engine.cc
blob712a09bb1fa19f035daa830f88ecf747bc2ff6a0
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/browser/speech/google_streaming_remote_engine.h"
7 #include <algorithm>
8 #include <vector>
10 #include "base/big_endian.h"
11 #include "base/bind.h"
12 #include "base/rand_util.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/time/time.h"
17 #include "content/browser/speech/audio_buffer.h"
18 #include "content/browser/speech/proto/google_streaming_api.pb.h"
19 #include "content/public/common/speech_recognition_error.h"
20 #include "content/public/common/speech_recognition_result.h"
21 #include "google_apis/google_api_keys.h"
22 #include "net/base/escape.h"
23 #include "net/base/load_flags.h"
24 #include "net/url_request/http_user_agent_settings.h"
25 #include "net/url_request/url_fetcher.h"
26 #include "net/url_request/url_request_context.h"
27 #include "net/url_request/url_request_context_getter.h"
28 #include "net/url_request/url_request_status.h"
30 using net::URLFetcher;
32 namespace content {
33 namespace {
35 const char kWebServiceBaseUrl[] =
36 "https://www.google.com/speech-api/full-duplex/v1";
37 const char kDownstreamUrl[] = "/down?";
38 const char kUpstreamUrl[] = "/up?";
40 // This matches the maximum maxAlternatives value supported by the server.
41 const uint32 kMaxMaxAlternatives = 30;
43 // TODO(hans): Remove this and other logging when we don't need it anymore.
44 void DumpResponse(const std::string& response) {
45 DVLOG(1) << "------------";
46 proto::SpeechRecognitionEvent event;
47 if (!event.ParseFromString(response)) {
48 DVLOG(1) << "Parse failed!";
49 return;
51 if (event.has_status())
52 DVLOG(1) << "STATUS\t" << event.status();
53 for (int i = 0; i < event.result_size(); ++i) {
54 DVLOG(1) << "RESULT #" << i << ":";
55 const proto::SpeechRecognitionResult& res = event.result(i);
56 if (res.has_final())
57 DVLOG(1) << " final:\t" << res.final();
58 if (res.has_stability())
59 DVLOG(1) << " STABILITY:\t" << res.stability();
60 for (int j = 0; j < res.alternative_size(); ++j) {
61 const proto::SpeechRecognitionAlternative& alt =
62 res.alternative(j);
63 if (alt.has_confidence())
64 DVLOG(1) << " CONFIDENCE:\t" << alt.confidence();
65 if (alt.has_transcript())
66 DVLOG(1) << " TRANSCRIPT:\t" << alt.transcript();
71 } // namespace
73 const int GoogleStreamingRemoteEngine::kAudioPacketIntervalMs = 100;
74 const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTesting = 0;
75 const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTesting = 1;
76 const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0;
77 const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5;
79 GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine(
80 net::URLRequestContextGetter* context)
81 : url_context_(context),
82 previous_response_length_(0),
83 got_last_definitive_result_(false),
84 is_dispatching_event_(false),
85 use_framed_post_data_(false),
86 state_(STATE_IDLE) {}
88 GoogleStreamingRemoteEngine::~GoogleStreamingRemoteEngine() {}
90 void GoogleStreamingRemoteEngine::SetConfig(
91 const SpeechRecognitionEngineConfig& config) {
92 config_ = config;
95 void GoogleStreamingRemoteEngine::StartRecognition() {
96 FSMEventArgs event_args(EVENT_START_RECOGNITION);
97 DispatchEvent(event_args);
100 void GoogleStreamingRemoteEngine::EndRecognition() {
101 FSMEventArgs event_args(EVENT_END_RECOGNITION);
102 DispatchEvent(event_args);
105 void GoogleStreamingRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
106 FSMEventArgs event_args(EVENT_AUDIO_CHUNK);
107 event_args.audio_data = &data;
108 DispatchEvent(event_args);
111 void GoogleStreamingRemoteEngine::AudioChunksEnded() {
112 FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);
113 DispatchEvent(event_args);
116 void GoogleStreamingRemoteEngine::OnURLFetchComplete(const URLFetcher* source) {
117 const bool kResponseComplete = true;
118 DispatchHTTPResponse(source, kResponseComplete);
121 void GoogleStreamingRemoteEngine::OnURLFetchDownloadProgress(
122 const URLFetcher* source, int64 current, int64 total) {
123 const bool kPartialResponse = false;
124 DispatchHTTPResponse(source, kPartialResponse);
127 void GoogleStreamingRemoteEngine::DispatchHTTPResponse(const URLFetcher* source,
128 bool end_of_response) {
129 DCHECK(CalledOnValidThread());
130 DCHECK(source);
131 const bool response_is_good = source->GetStatus().is_success() &&
132 source->GetResponseCode() == 200;
133 std::string response;
134 if (response_is_good)
135 source->GetResponseAsString(&response);
136 const size_t current_response_length = response.size();
138 DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")
139 << "HTTP, code: " << source->GetResponseCode()
140 << " length: " << current_response_length
141 << " eor: " << end_of_response;
143 // URLFetcher provides always the entire response buffer, but we are only
144 // interested in the fresh data introduced by the last chunk. Therefore, we
145 // drop the previous content we have already processed.
146 if (current_response_length != 0) {
147 DCHECK_GE(current_response_length, previous_response_length_);
148 response.erase(0, previous_response_length_);
149 previous_response_length_ = current_response_length;
152 if (!response_is_good && source == downstream_fetcher_.get()) {
153 DVLOG(1) << "Downstream error " << source->GetResponseCode();
154 FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);
155 DispatchEvent(event_args);
156 return;
158 if (!response_is_good && source == upstream_fetcher_.get()) {
159 DVLOG(1) << "Upstream error " << source->GetResponseCode()
160 << " EOR " << end_of_response;
161 FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);
162 DispatchEvent(event_args);
163 return;
166 // Ignore incoming data on the upstream connection.
167 if (source == upstream_fetcher_.get())
168 return;
170 DCHECK(response_is_good && source == downstream_fetcher_.get());
172 // The downstream response is organized in chunks, whose size is determined
173 // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.
174 // Such chunks are sent by the speech recognition webservice over the HTTP
175 // downstream channel using HTTP chunked transfer (unrelated to our chunks).
176 // This function is called every time an HTTP chunk is received by the
177 // url fetcher. However there isn't any particular matching beween our
178 // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can
179 // contain a portion of one chunk or even more chunks together.
180 chunked_byte_buffer_.Append(response);
182 // A single HTTP chunk can contain more than one data chunk, thus the while.
183 while (chunked_byte_buffer_.HasChunks()) {
184 FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);
185 event_args.response = chunked_byte_buffer_.PopChunk();
186 DCHECK(event_args.response.get());
187 DumpResponse(std::string(event_args.response->begin(),
188 event_args.response->end()));
189 DispatchEvent(event_args);
191 if (end_of_response) {
192 FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);
193 DispatchEvent(event_args);
197 bool GoogleStreamingRemoteEngine::IsRecognitionPending() const {
198 DCHECK(CalledOnValidThread());
199 return state_ != STATE_IDLE;
202 int GoogleStreamingRemoteEngine::GetDesiredAudioChunkDurationMs() const {
203 return kAudioPacketIntervalMs;
206 // ----------------------- Core FSM implementation ---------------------------
208 void GoogleStreamingRemoteEngine::DispatchEvent(
209 const FSMEventArgs& event_args) {
210 DCHECK(CalledOnValidThread());
211 DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
212 DCHECK_LE(state_, STATE_MAX_VALUE);
214 // Event dispatching must be sequential, otherwise it will break all the rules
215 // and the assumptions of the finite state automata model.
216 DCHECK(!is_dispatching_event_);
217 is_dispatching_event_ = true;
219 state_ = ExecuteTransitionAndGetNextState(event_args);
221 is_dispatching_event_ = false;
224 GoogleStreamingRemoteEngine::FSMState
225 GoogleStreamingRemoteEngine::ExecuteTransitionAndGetNextState(
226 const FSMEventArgs& event_args) {
227 const FSMEvent event = event_args.event;
228 switch (state_) {
229 case STATE_IDLE:
230 switch (event) {
231 case EVENT_START_RECOGNITION:
232 return ConnectBothStreams(event_args);
233 case EVENT_END_RECOGNITION:
234 // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of
235 // abort, so we just silently drop them here.
236 case EVENT_AUDIO_CHUNK:
237 case EVENT_AUDIO_CHUNKS_ENDED:
238 // DOWNSTREAM_CLOSED can be received if we end up here due to an error.
239 case EVENT_DOWNSTREAM_CLOSED:
240 return DoNothing(event_args);
241 case EVENT_UPSTREAM_ERROR:
242 case EVENT_DOWNSTREAM_ERROR:
243 case EVENT_DOWNSTREAM_RESPONSE:
244 return NotFeasible(event_args);
246 break;
247 case STATE_BOTH_STREAMS_CONNECTED:
248 switch (event) {
249 case EVENT_AUDIO_CHUNK:
250 return TransmitAudioUpstream(event_args);
251 case EVENT_DOWNSTREAM_RESPONSE:
252 return ProcessDownstreamResponse(event_args);
253 case EVENT_AUDIO_CHUNKS_ENDED:
254 return CloseUpstreamAndWaitForResults(event_args);
255 case EVENT_END_RECOGNITION:
256 return AbortSilently(event_args);
257 case EVENT_UPSTREAM_ERROR:
258 case EVENT_DOWNSTREAM_ERROR:
259 case EVENT_DOWNSTREAM_CLOSED:
260 return AbortWithError(event_args);
261 case EVENT_START_RECOGNITION:
262 return NotFeasible(event_args);
264 break;
265 case STATE_WAITING_DOWNSTREAM_RESULTS:
266 switch (event) {
267 case EVENT_DOWNSTREAM_RESPONSE:
268 return ProcessDownstreamResponse(event_args);
269 case EVENT_DOWNSTREAM_CLOSED:
270 return RaiseNoMatchErrorIfGotNoResults(event_args);
271 case EVENT_END_RECOGNITION:
272 return AbortSilently(event_args);
273 case EVENT_UPSTREAM_ERROR:
274 case EVENT_DOWNSTREAM_ERROR:
275 return AbortWithError(event_args);
276 case EVENT_START_RECOGNITION:
277 case EVENT_AUDIO_CHUNK:
278 case EVENT_AUDIO_CHUNKS_ENDED:
279 return NotFeasible(event_args);
281 break;
283 return NotFeasible(event_args);
286 // ----------- Contract for all the FSM evolution functions below -------------
287 // - Are guaranteed to be executed in the same thread (IO, except for tests);
288 // - Are guaranteed to be not reentrant (themselves and each other);
289 // - event_args members are guaranteed to be stable during the call;
291 GoogleStreamingRemoteEngine::FSMState
292 GoogleStreamingRemoteEngine::ConnectBothStreams(const FSMEventArgs&) {
293 DCHECK(!upstream_fetcher_.get());
294 DCHECK(!downstream_fetcher_.get());
296 encoder_.reset(new AudioEncoder(config_.audio_sample_rate,
297 config_.audio_num_bits_per_sample));
298 DCHECK(encoder_.get());
299 const std::string request_key = GenerateRequestKey();
301 // Only use the framed post data format when a preamble needs to be logged.
302 use_framed_post_data_ = (config_.preamble &&
303 !config_.preamble->sample_data.empty() &&
304 !config_.auth_token.empty() &&
305 !config_.auth_scope.empty());
306 if (use_framed_post_data_) {
307 preamble_encoder_.reset(new AudioEncoder(
308 config_.preamble->sample_rate,
309 config_.preamble->sample_depth * 8));
312 // Setup downstream fetcher.
313 std::vector<std::string> downstream_args;
314 downstream_args.push_back(
315 "key=" + net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
316 downstream_args.push_back("pair=" + request_key);
317 downstream_args.push_back("output=pb");
318 GURL downstream_url(std::string(kWebServiceBaseUrl) +
319 std::string(kDownstreamUrl) +
320 base::JoinString(downstream_args, "&"));
322 downstream_fetcher_ = URLFetcher::Create(
323 kDownstreamUrlFetcherIdForTesting, downstream_url, URLFetcher::GET, this);
324 downstream_fetcher_->SetRequestContext(url_context_.get());
325 downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
326 net::LOAD_DO_NOT_SEND_COOKIES |
327 net::LOAD_DO_NOT_SEND_AUTH_DATA);
328 downstream_fetcher_->Start();
330 // Setup upstream fetcher.
331 // TODO(hans): Support for user-selected grammars.
332 std::vector<std::string> upstream_args;
333 upstream_args.push_back("key=" +
334 net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
335 upstream_args.push_back("pair=" + request_key);
336 upstream_args.push_back("output=pb");
337 upstream_args.push_back(
338 "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));
339 upstream_args.push_back(
340 config_.filter_profanities ? "pFilter=2" : "pFilter=0");
341 if (config_.max_hypotheses > 0U) {
342 int max_alternatives = std::min(kMaxMaxAlternatives,
343 config_.max_hypotheses);
344 upstream_args.push_back("maxAlternatives=" +
345 base::UintToString(max_alternatives));
347 upstream_args.push_back("app=chromium");
348 if (!config_.hardware_info.empty()) {
349 upstream_args.push_back(
350 "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));
352 for (const SpeechRecognitionGrammar& grammar : config_.grammars) {
353 std::string grammar_value(
354 base::DoubleToString(grammar.weight) + ":" + grammar.url);
355 upstream_args.push_back(
356 "grammar=" + net::EscapeQueryParamValue(grammar_value, true));
358 if (config_.continuous)
359 upstream_args.push_back("continuous");
360 if (config_.interim_results)
361 upstream_args.push_back("interim");
362 if (!config_.auth_token.empty() && !config_.auth_scope.empty()) {
363 upstream_args.push_back(
364 "authScope=" + net::EscapeQueryParamValue(config_.auth_scope, true));
365 upstream_args.push_back(
366 "authToken=" + net::EscapeQueryParamValue(config_.auth_token, true));
368 if (use_framed_post_data_) {
369 std::string audio_format;
370 if (preamble_encoder_)
371 audio_format = preamble_encoder_->GetMimeType() + ",";
372 audio_format += encoder_->GetMimeType();
373 upstream_args.push_back(
374 "audioFormat=" + net::EscapeQueryParamValue(audio_format, true));
376 GURL upstream_url(std::string(kWebServiceBaseUrl) +
377 std::string(kUpstreamUrl) +
378 base::JoinString(upstream_args, "&"));
380 upstream_fetcher_ = URLFetcher::Create(kUpstreamUrlFetcherIdForTesting,
381 upstream_url, URLFetcher::POST, this);
382 if (use_framed_post_data_)
383 upstream_fetcher_->SetChunkedUpload("application/octet-stream");
384 else
385 upstream_fetcher_->SetChunkedUpload(encoder_->GetMimeType());
386 upstream_fetcher_->SetRequestContext(url_context_.get());
387 upstream_fetcher_->SetReferrer(config_.origin_url);
388 upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
389 net::LOAD_DO_NOT_SEND_COOKIES |
390 net::LOAD_DO_NOT_SEND_AUTH_DATA);
391 upstream_fetcher_->Start();
392 previous_response_length_ = 0;
394 if (preamble_encoder_) {
395 // Encode and send preamble right away.
396 scoped_refptr<AudioChunk> chunk = new AudioChunk(
397 reinterpret_cast<const uint8*>(config_.preamble->sample_data.data()),
398 config_.preamble->sample_data.size(),
399 config_.preamble->sample_depth);
400 preamble_encoder_->Encode(*chunk);
401 preamble_encoder_->Flush();
402 scoped_refptr<AudioChunk> encoded_data(
403 preamble_encoder_->GetEncodedDataAndClear());
404 UploadAudioChunk(encoded_data->AsString(), FRAME_PREAMBLE_AUDIO, false);
406 return STATE_BOTH_STREAMS_CONNECTED;
409 GoogleStreamingRemoteEngine::FSMState
410 GoogleStreamingRemoteEngine::TransmitAudioUpstream(
411 const FSMEventArgs& event_args) {
412 DCHECK(upstream_fetcher_.get());
413 DCHECK(event_args.audio_data.get());
414 const AudioChunk& audio = *(event_args.audio_data.get());
416 DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
417 encoder_->Encode(audio);
418 scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
419 UploadAudioChunk(encoded_data->AsString(), FRAME_RECOGNITION_AUDIO, false);
420 return state_;
423 GoogleStreamingRemoteEngine::FSMState
424 GoogleStreamingRemoteEngine::ProcessDownstreamResponse(
425 const FSMEventArgs& event_args) {
426 DCHECK(event_args.response.get());
428 proto::SpeechRecognitionEvent ws_event;
429 if (!ws_event.ParseFromString(std::string(event_args.response->begin(),
430 event_args.response->end())))
431 return AbortWithError(event_args);
433 // An empty (default) event is used to notify us that the upstream has
434 // been connected. Ignore.
435 if (!ws_event.result_size() && (!ws_event.has_status() ||
436 ws_event.status() == proto::SpeechRecognitionEvent::STATUS_SUCCESS)) {
437 DVLOG(1) << "Received empty response";
438 return state_;
441 if (ws_event.has_status()) {
442 switch (ws_event.status()) {
443 case proto::SpeechRecognitionEvent::STATUS_SUCCESS:
444 break;
445 case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:
446 return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH);
447 case proto::SpeechRecognitionEvent::STATUS_ABORTED:
448 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
449 case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:
450 return Abort(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE);
451 case proto::SpeechRecognitionEvent::STATUS_NETWORK:
452 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
453 case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:
454 return Abort(SPEECH_RECOGNITION_ERROR_NOT_ALLOWED);
455 case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:
456 return Abort(SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED);
457 case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:
458 return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR);
459 case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:
460 return Abort(SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED);
464 SpeechRecognitionResults results;
465 for (int i = 0; i < ws_event.result_size(); ++i) {
466 const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);
467 results.push_back(SpeechRecognitionResult());
468 SpeechRecognitionResult& result = results.back();
469 result.is_provisional = !(ws_result.has_final() && ws_result.final());
471 if (!result.is_provisional)
472 got_last_definitive_result_ = true;
474 for (int j = 0; j < ws_result.alternative_size(); ++j) {
475 const proto::SpeechRecognitionAlternative& ws_alternative =
476 ws_result.alternative(j);
477 SpeechRecognitionHypothesis hypothesis;
478 if (ws_alternative.has_confidence())
479 hypothesis.confidence = ws_alternative.confidence();
480 else if (ws_result.has_stability())
481 hypothesis.confidence = ws_result.stability();
482 DCHECK(ws_alternative.has_transcript());
483 // TODO(hans): Perhaps the transcript should be required in the proto?
484 if (ws_alternative.has_transcript())
485 hypothesis.utterance = base::UTF8ToUTF16(ws_alternative.transcript());
487 result.hypotheses.push_back(hypothesis);
491 delegate()->OnSpeechRecognitionEngineResults(results);
493 return state_;
496 GoogleStreamingRemoteEngine::FSMState
497 GoogleStreamingRemoteEngine::RaiseNoMatchErrorIfGotNoResults(
498 const FSMEventArgs& event_args) {
499 if (!got_last_definitive_result_) {
500 // Provide an empty result to notify that recognition is ended with no
501 // errors, yet neither any further results.
502 delegate()->OnSpeechRecognitionEngineResults(SpeechRecognitionResults());
504 return AbortSilently(event_args);
507 GoogleStreamingRemoteEngine::FSMState
508 GoogleStreamingRemoteEngine::CloseUpstreamAndWaitForResults(
509 const FSMEventArgs&) {
510 DCHECK(upstream_fetcher_.get());
511 DCHECK(encoder_.get());
513 DVLOG(1) << "Closing upstream.";
515 // The encoder requires a non-empty final buffer. So we encode a packet
516 // of silence in case encoder had no data already.
517 size_t sample_count =
518 config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
519 scoped_refptr<AudioChunk> dummy_chunk = new AudioChunk(
520 sample_count * sizeof(int16), encoder_->GetBitsPerSample() / 8);
521 encoder_->Encode(*dummy_chunk.get());
522 encoder_->Flush();
523 scoped_refptr<AudioChunk> encoded_dummy_data =
524 encoder_->GetEncodedDataAndClear();
525 DCHECK(!encoded_dummy_data->IsEmpty());
526 encoder_.reset();
528 UploadAudioChunk(encoded_dummy_data->AsString(),
529 FRAME_RECOGNITION_AUDIO,
530 true);
531 got_last_definitive_result_ = false;
532 return STATE_WAITING_DOWNSTREAM_RESULTS;
535 GoogleStreamingRemoteEngine::FSMState
536 GoogleStreamingRemoteEngine::CloseDownstream(const FSMEventArgs&) {
537 DCHECK(!upstream_fetcher_.get());
538 DCHECK(downstream_fetcher_.get());
540 DVLOG(1) << "Closing downstream.";
541 downstream_fetcher_.reset();
542 return STATE_IDLE;
545 GoogleStreamingRemoteEngine::FSMState
546 GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) {
547 return Abort(SPEECH_RECOGNITION_ERROR_NONE);
550 GoogleStreamingRemoteEngine::FSMState
551 GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) {
552 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
555 GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort(
556 SpeechRecognitionErrorCode error_code) {
557 DVLOG(1) << "Aborting with error " << error_code;
559 if (error_code != SPEECH_RECOGNITION_ERROR_NONE) {
560 delegate()->OnSpeechRecognitionEngineError(
561 SpeechRecognitionError(error_code));
563 downstream_fetcher_.reset();
564 upstream_fetcher_.reset();
565 encoder_.reset();
566 return STATE_IDLE;
569 GoogleStreamingRemoteEngine::FSMState
570 GoogleStreamingRemoteEngine::DoNothing(const FSMEventArgs&) {
571 return state_;
574 GoogleStreamingRemoteEngine::FSMState
575 GoogleStreamingRemoteEngine::NotFeasible(const FSMEventArgs& event_args) {
576 NOTREACHED() << "Unfeasible event " << event_args.event
577 << " in state " << state_;
578 return state_;
581 std::string GoogleStreamingRemoteEngine::GetAcceptedLanguages() const {
582 std::string langs = config_.language;
583 if (langs.empty() && url_context_.get()) {
584 // If no language is provided then we use the first from the accepted
585 // language list. If this list is empty then it defaults to "en-US".
586 // Example of the contents of this list: "es,en-GB;q=0.8", ""
587 net::URLRequestContext* request_context =
588 url_context_->GetURLRequestContext();
589 DCHECK(request_context);
590 // TODO(pauljensen): GoogleStreamingRemoteEngine should be constructed with
591 // a reference to the HttpUserAgentSettings rather than accessing the
592 // accept language through the URLRequestContext.
593 if (request_context->http_user_agent_settings()) {
594 std::string accepted_language_list =
595 request_context->http_user_agent_settings()->GetAcceptLanguage();
596 size_t separator = accepted_language_list.find_first_of(",;");
597 if (separator != std::string::npos)
598 langs = accepted_language_list.substr(0, separator);
601 if (langs.empty())
602 langs = "en-US";
603 return langs;
606 // TODO(primiano): Is there any utility in the codebase that already does this?
607 std::string GoogleStreamingRemoteEngine::GenerateRequestKey() const {
608 const int64 kKeepLowBytes = 0x00000000FFFFFFFFLL;
609 const int64 kKeepHighBytes = 0xFFFFFFFF00000000LL;
611 // Just keep the least significant bits of timestamp, in order to reduce
612 // probability of collisions.
613 int64 key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) |
614 (base::RandUint64() & kKeepHighBytes);
615 return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));
618 void GoogleStreamingRemoteEngine::UploadAudioChunk(const std::string& data,
619 FrameType type,
620 bool is_final) {
621 if (use_framed_post_data_) {
622 std::string frame(data.size() + 8, 0);
623 base::WriteBigEndian(&frame[0], static_cast<uint32_t>(data.size()));
624 base::WriteBigEndian(&frame[4], static_cast<uint32_t>(type));
625 frame.replace(8, data.size(), data);
626 upstream_fetcher_->AppendChunkToUpload(frame, is_final);
627 } else {
628 upstream_fetcher_->AppendChunkToUpload(data, is_final);
632 GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
633 : event(event_value) {
636 GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() {
639 } // namespace content