content/browser/speech/google_streaming_remote_engine.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/browser/speech/google_streaming_remote_engine.h"
   6
   7 #include <algorithm>
   8 #include <vector>
   9
  10 #include "base/big_endian.h"
  11 #include "base/bind.h"
  12 #include "base/rand_util.h"
  13 #include "base/strings/string_number_conversions.h"
  14 #include "base/strings/string_util.h"
  15 #include "base/strings/utf_string_conversions.h"
  16 #include "base/time/time.h"
  17 #include "content/browser/speech/audio_buffer.h"
  18 #include "content/browser/speech/proto/google_streaming_api.pb.h"
  19 #include "content/public/common/speech_recognition_error.h"
  20 #include "content/public/common/speech_recognition_result.h"
  21 #include "google_apis/google_api_keys.h"
  22 #include "net/base/escape.h"
  23 #include "net/base/load_flags.h"
  24 #include "net/url_request/http_user_agent_settings.h"
  25 #include "net/url_request/url_fetcher.h"
  26 #include "net/url_request/url_request_context.h"
  27 #include "net/url_request/url_request_context_getter.h"
  28 #include "net/url_request/url_request_status.h"
  29
  30 using net::URLFetcher;
  31
  32 namespace content {
  33 namespace {
  34
  35 const char kWebServiceBaseUrl[] =
  36     "https://www.google.com/speech-api/full-duplex/v1";
  37 const char kDownstreamUrl[] = "/down?";
  38 const char kUpstreamUrl[] = "/up?";
  39
  40 // This matches the maximum maxAlternatives value supported by the server.
  41 const uint32 kMaxMaxAlternatives = 30;
  42
  43 // TODO(hans): Remove this and other logging when we don't need it anymore.
  44 void DumpResponse(const std::string& response) {
  45   DVLOG(1) << "------------";
  46   proto::SpeechRecognitionEvent event;
  47   if (!event.ParseFromString(response)) {
  48     DVLOG(1) << "Parse failed!";
  49     return;
  50   }
  51   if (event.has_status())
  52     DVLOG(1) << "STATUS\t" << event.status();
  53   for (int i = 0; i < event.result_size(); ++i) {
  54     DVLOG(1) << "RESULT #" << i << ":";
  55     const proto::SpeechRecognitionResult& res = event.result(i);
  56     if (res.has_final())
  57       DVLOG(1) << "  final:\t" << res.final();
  58     if (res.has_stability())
  59       DVLOG(1) << "  STABILITY:\t" << res.stability();
  60     for (int j = 0; j < res.alternative_size(); ++j) {
  61       const proto::SpeechRecognitionAlternative& alt =
  62           res.alternative(j);
  63       if (alt.has_confidence())
  64         DVLOG(1) << "    CONFIDENCE:\t" << alt.confidence();
  65       if (alt.has_transcript())
  66         DVLOG(1) << "    TRANSCRIPT:\t" << alt.transcript();
  67     }
  68   }
  69 }
  70
  71 }  // namespace
  72
  73 const int GoogleStreamingRemoteEngine::kAudioPacketIntervalMs = 100;
  74 const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTesting = 0;
  75 const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTesting = 1;
  76 const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0;
  77 const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5;
  78
  79 GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine(
  80     net::URLRequestContextGetter* context)
  81     : url_context_(context),
  82       previous_response_length_(0),
  83       got_last_definitive_result_(false),
  84       is_dispatching_event_(false),
  85       use_framed_post_data_(false),
  86       state_(STATE_IDLE) {}
  87
  88 GoogleStreamingRemoteEngine::~GoogleStreamingRemoteEngine() {}
  89
  90 void GoogleStreamingRemoteEngine::SetConfig(
  91     const SpeechRecognitionEngineConfig& config) {
  92   config_ = config;
  93 }
  94
  95 void GoogleStreamingRemoteEngine::StartRecognition() {
  96   FSMEventArgs event_args(EVENT_START_RECOGNITION);
  97   DispatchEvent(event_args);
  98 }
  99
 100 void GoogleStreamingRemoteEngine::EndRecognition() {
 101   FSMEventArgs event_args(EVENT_END_RECOGNITION);
 102   DispatchEvent(event_args);
 103 }
 104
 105 void GoogleStreamingRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
 106   FSMEventArgs event_args(EVENT_AUDIO_CHUNK);
 107   event_args.audio_data = &data;
 108   DispatchEvent(event_args);
 109 }
 110
 111 void GoogleStreamingRemoteEngine::AudioChunksEnded() {
 112   FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);
 113   DispatchEvent(event_args);
 114 }
 115
 116 void GoogleStreamingRemoteEngine::OnURLFetchComplete(const URLFetcher* source) {
 117   const bool kResponseComplete = true;
 118   DispatchHTTPResponse(source, kResponseComplete);
 119 }
 120
 121 void GoogleStreamingRemoteEngine::OnURLFetchDownloadProgress(
 122     const URLFetcher* source, int64 current, int64 total) {
 123   const bool kPartialResponse = false;
 124   DispatchHTTPResponse(source, kPartialResponse);
 125 }
 126
 127 void GoogleStreamingRemoteEngine::DispatchHTTPResponse(const URLFetcher* source,
 128                                                        bool end_of_response) {
 129   DCHECK(CalledOnValidThread());
 130   DCHECK(source);
 131   const bool response_is_good = source->GetStatus().is_success() &&
 132                                 source->GetResponseCode() == 200;
 133   std::string response;
 134   if (response_is_good)
 135     source->GetResponseAsString(&response);
 136   const size_t current_response_length = response.size();
 137
 138   DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")
 139            << "HTTP, code: " << source->GetResponseCode()
 140            << "      length: " << current_response_length
 141            << "      eor: " << end_of_response;
 142
 143   // URLFetcher provides always the entire response buffer, but we are only
 144   // interested in the fresh data introduced by the last chunk. Therefore, we
 145   // drop the previous content we have already processed.
 146   if (current_response_length != 0) {
 147     DCHECK_GE(current_response_length, previous_response_length_);
 148     response.erase(0, previous_response_length_);
 149     previous_response_length_ = current_response_length;
 150   }
 151
 152   if (!response_is_good && source == downstream_fetcher_.get()) {
 153     DVLOG(1) << "Downstream error " << source->GetResponseCode();
 154     FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);
 155     DispatchEvent(event_args);
 156     return;
 157   }
 158   if (!response_is_good && source == upstream_fetcher_.get()) {
 159     DVLOG(1) << "Upstream error " << source->GetResponseCode()
 160              << " EOR " << end_of_response;
 161     FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);
 162     DispatchEvent(event_args);
 163     return;
 164   }
 165
 166   // Ignore incoming data on the upstream connection.
 167   if (source == upstream_fetcher_.get())
 168     return;
 169
 170   DCHECK(response_is_good && source == downstream_fetcher_.get());
 171
 172   // The downstream response is organized in chunks, whose size is determined
 173   // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.
 174   // Such chunks are sent by the speech recognition webservice over the HTTP
 175   // downstream channel using HTTP chunked transfer (unrelated to our chunks).
 176   // This function is called every time an HTTP chunk is received by the
 177   // url fetcher. However there isn't any particular matching beween our
 178   // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can
 179   // contain a portion of one chunk or even more chunks together.
 180   chunked_byte_buffer_.Append(response);
 181
 182   // A single HTTP chunk can contain more than one data chunk, thus the while.
 183   while (chunked_byte_buffer_.HasChunks()) {
 184     FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);
 185     event_args.response = chunked_byte_buffer_.PopChunk();
 186     DCHECK(event_args.response.get());
 187     DumpResponse(std::string(event_args.response->begin(),
 188                              event_args.response->end()));
 189     DispatchEvent(event_args);
 190   }
 191   if (end_of_response) {
 192     FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);
 193     DispatchEvent(event_args);
 194   }
 195 }
 196
 197 bool GoogleStreamingRemoteEngine::IsRecognitionPending() const {
 198   DCHECK(CalledOnValidThread());
 199   return state_ != STATE_IDLE;
 200 }
 201
 202 int GoogleStreamingRemoteEngine::GetDesiredAudioChunkDurationMs() const {
 203   return kAudioPacketIntervalMs;
 204 }
 205
 206 // -----------------------  Core FSM implementation ---------------------------
 207
 208 void GoogleStreamingRemoteEngine::DispatchEvent(
 209     const FSMEventArgs& event_args) {
 210   DCHECK(CalledOnValidThread());
 211   DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
 212   DCHECK_LE(state_, STATE_MAX_VALUE);
 213
 214   // Event dispatching must be sequential, otherwise it will break all the rules
 215   // and the assumptions of the finite state automata model.
 216   DCHECK(!is_dispatching_event_);
 217   is_dispatching_event_ = true;
 218
 219   state_ = ExecuteTransitionAndGetNextState(event_args);
 220
 221   is_dispatching_event_ = false;
 222 }
 223
 224 GoogleStreamingRemoteEngine::FSMState
 225 GoogleStreamingRemoteEngine::ExecuteTransitionAndGetNextState(
 226     const FSMEventArgs& event_args) {
 227   const FSMEvent event = event_args.event;
 228   switch (state_) {
 229     case STATE_IDLE:
 230       switch (event) {
 231         case EVENT_START_RECOGNITION:
 232           return ConnectBothStreams(event_args);
 233         case EVENT_END_RECOGNITION:
 234         // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of
 235         // abort, so we just silently drop them here.
 236         case EVENT_AUDIO_CHUNK:
 237         case EVENT_AUDIO_CHUNKS_ENDED:
 238         // DOWNSTREAM_CLOSED can be received if we end up here due to an error.
 239         case EVENT_DOWNSTREAM_CLOSED:
 240           return DoNothing(event_args);
 241         case EVENT_UPSTREAM_ERROR:
 242         case EVENT_DOWNSTREAM_ERROR:
 243         case EVENT_DOWNSTREAM_RESPONSE:
 244           return NotFeasible(event_args);
 245       }
 246       break;
 247     case STATE_BOTH_STREAMS_CONNECTED:
 248       switch (event) {
 249         case EVENT_AUDIO_CHUNK:
 250           return TransmitAudioUpstream(event_args);
 251         case EVENT_DOWNSTREAM_RESPONSE:
 252           return ProcessDownstreamResponse(event_args);
 253         case EVENT_AUDIO_CHUNKS_ENDED:
 254           return CloseUpstreamAndWaitForResults(event_args);
 255         case EVENT_END_RECOGNITION:
 256           return AbortSilently(event_args);
 257         case EVENT_UPSTREAM_ERROR:
 258         case EVENT_DOWNSTREAM_ERROR:
 259         case EVENT_DOWNSTREAM_CLOSED:
 260           return AbortWithError(event_args);
 261         case EVENT_START_RECOGNITION:
 262           return NotFeasible(event_args);
 263       }
 264       break;
 265     case STATE_WAITING_DOWNSTREAM_RESULTS:
 266       switch (event) {
 267         case EVENT_DOWNSTREAM_RESPONSE:
 268           return ProcessDownstreamResponse(event_args);
 269         case EVENT_DOWNSTREAM_CLOSED:
 270           return RaiseNoMatchErrorIfGotNoResults(event_args);
 271         case EVENT_END_RECOGNITION:
 272           return AbortSilently(event_args);
 273         case EVENT_UPSTREAM_ERROR:
 274         case EVENT_DOWNSTREAM_ERROR:
 275           return AbortWithError(event_args);
 276         case EVENT_START_RECOGNITION:
 277         case EVENT_AUDIO_CHUNK:
 278         case EVENT_AUDIO_CHUNKS_ENDED:
 279           return NotFeasible(event_args);
 280       }
 281       break;
 282   }
 283   return NotFeasible(event_args);
 284 }
 285
 286 // ----------- Contract for all the FSM evolution functions below -------------
 287 //  - Are guaranteed to be executed in the same thread (IO, except for tests);
 288 //  - Are guaranteed to be not reentrant (themselves and each other);
 289 //  - event_args members are guaranteed to be stable during the call;
 290
 291 GoogleStreamingRemoteEngine::FSMState
 292 GoogleStreamingRemoteEngine::ConnectBothStreams(const FSMEventArgs&) {
 293   DCHECK(!upstream_fetcher_.get());
 294   DCHECK(!downstream_fetcher_.get());
 295
 296   encoder_.reset(new AudioEncoder(config_.audio_sample_rate,
 297                                   config_.audio_num_bits_per_sample));
 298   DCHECK(encoder_.get());
 299   const std::string request_key = GenerateRequestKey();
 300
 301   // Only use the framed post data format when a preamble needs to be logged.
 302   use_framed_post_data_ = (config_.preamble &&
 303                            !config_.preamble->sample_data.empty() &&
 304                            !config_.auth_token.empty() &&
 305                            !config_.auth_scope.empty());
 306   if (use_framed_post_data_) {
 307     preamble_encoder_.reset(new AudioEncoder(
 308         config_.preamble->sample_rate,
 309         config_.preamble->sample_depth * 8));
 310   }
 311
 312   // Setup downstream fetcher.
 313   std::vector<std::string> downstream_args;
 314   downstream_args.push_back(
 315       "key=" + net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
 316   downstream_args.push_back("pair=" + request_key);
 317   downstream_args.push_back("output=pb");
 318   GURL downstream_url(std::string(kWebServiceBaseUrl) +
 319                       std::string(kDownstreamUrl) +
 320                       base::JoinString(downstream_args, "&"));
 321
 322   downstream_fetcher_ = URLFetcher::Create(
 323       kDownstreamUrlFetcherIdForTesting, downstream_url, URLFetcher::GET, this);
 324   downstream_fetcher_->SetRequestContext(url_context_.get());
 325   downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
 326                                     net::LOAD_DO_NOT_SEND_COOKIES |
 327                                     net::LOAD_DO_NOT_SEND_AUTH_DATA);
 328   downstream_fetcher_->Start();
 329
 330   // Setup upstream fetcher.
 331   // TODO(hans): Support for user-selected grammars.
 332   std::vector<std::string> upstream_args;
 333   upstream_args.push_back("key=" +
 334       net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
 335   upstream_args.push_back("pair=" + request_key);
 336   upstream_args.push_back("output=pb");
 337   upstream_args.push_back(
 338       "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));
 339   upstream_args.push_back(
 340       config_.filter_profanities ? "pFilter=2" : "pFilter=0");
 341   if (config_.max_hypotheses > 0U) {
 342     int max_alternatives = std::min(kMaxMaxAlternatives,
 343                                     config_.max_hypotheses);
 344     upstream_args.push_back("maxAlternatives=" +
 345                             base::UintToString(max_alternatives));
 346   }
 347   upstream_args.push_back("app=chromium");
 348   if (!config_.hardware_info.empty()) {
 349     upstream_args.push_back(
 350         "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));
 351   }
 352   for (const SpeechRecognitionGrammar& grammar : config_.grammars) {
 353     std::string grammar_value(
 354         base::DoubleToString(grammar.weight) + ":" + grammar.url);
 355     upstream_args.push_back(
 356         "grammar=" + net::EscapeQueryParamValue(grammar_value, true));
 357   }
 358   if (config_.continuous)
 359     upstream_args.push_back("continuous");
 360   if (config_.interim_results)
 361     upstream_args.push_back("interim");
 362   if (!config_.auth_token.empty() && !config_.auth_scope.empty()) {
 363     upstream_args.push_back(
 364         "authScope=" + net::EscapeQueryParamValue(config_.auth_scope, true));
 365     upstream_args.push_back(
 366         "authToken=" + net::EscapeQueryParamValue(config_.auth_token, true));
 367   }
 368   if (use_framed_post_data_) {
 369     std::string audio_format;
 370     if (preamble_encoder_)
 371       audio_format = preamble_encoder_->GetMimeType() + ",";
 372     audio_format += encoder_->GetMimeType();
 373     upstream_args.push_back(
 374         "audioFormat=" + net::EscapeQueryParamValue(audio_format, true));
 375   }
 376   GURL upstream_url(std::string(kWebServiceBaseUrl) +
 377                     std::string(kUpstreamUrl) +
 378                     base::JoinString(upstream_args, "&"));
 379
 380   upstream_fetcher_ = URLFetcher::Create(kUpstreamUrlFetcherIdForTesting,
 381                                          upstream_url, URLFetcher::POST, this);
 382   if (use_framed_post_data_)
 383     upstream_fetcher_->SetChunkedUpload("application/octet-stream");
 384   else
 385     upstream_fetcher_->SetChunkedUpload(encoder_->GetMimeType());
 386   upstream_fetcher_->SetRequestContext(url_context_.get());
 387   upstream_fetcher_->SetReferrer(config_.origin_url);
 388   upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
 389                                   net::LOAD_DO_NOT_SEND_COOKIES |
 390                                   net::LOAD_DO_NOT_SEND_AUTH_DATA);
 391   upstream_fetcher_->Start();
 392   previous_response_length_ = 0;
 393
 394   if (preamble_encoder_) {
 395     // Encode and send preamble right away.
 396     scoped_refptr<AudioChunk> chunk = new AudioChunk(
 397         reinterpret_cast<const uint8*>(config_.preamble->sample_data.data()),
 398         config_.preamble->sample_data.size(),
 399         config_.preamble->sample_depth);
 400     preamble_encoder_->Encode(*chunk);
 401     preamble_encoder_->Flush();
 402     scoped_refptr<AudioChunk> encoded_data(
 403         preamble_encoder_->GetEncodedDataAndClear());
 404     UploadAudioChunk(encoded_data->AsString(), FRAME_PREAMBLE_AUDIO, false);
 405   }
 406   return STATE_BOTH_STREAMS_CONNECTED;
 407 }
 408
 409 GoogleStreamingRemoteEngine::FSMState
 410 GoogleStreamingRemoteEngine::TransmitAudioUpstream(
 411     const FSMEventArgs& event_args) {
 412   DCHECK(upstream_fetcher_.get());
 413   DCHECK(event_args.audio_data.get());
 414   const AudioChunk& audio = *(event_args.audio_data.get());
 415
 416   DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
 417   encoder_->Encode(audio);
 418   scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
 419   UploadAudioChunk(encoded_data->AsString(), FRAME_RECOGNITION_AUDIO, false);
 420   return state_;
 421 }
 422
 423 GoogleStreamingRemoteEngine::FSMState
 424 GoogleStreamingRemoteEngine::ProcessDownstreamResponse(
 425     const FSMEventArgs& event_args) {
 426   DCHECK(event_args.response.get());
 427
 428   proto::SpeechRecognitionEvent ws_event;
 429   if (!ws_event.ParseFromString(std::string(event_args.response->begin(),
 430                                             event_args.response->end())))
 431     return AbortWithError(event_args);
 432
 433   // An empty (default) event is used to notify us that the upstream has
 434   // been connected. Ignore.
 435   if (!ws_event.result_size() && (!ws_event.has_status() ||
 436       ws_event.status() == proto::SpeechRecognitionEvent::STATUS_SUCCESS)) {
 437     DVLOG(1) << "Received empty response";
 438     return state_;
 439   }
 440
 441   if (ws_event.has_status()) {
 442     switch (ws_event.status()) {
 443       case proto::SpeechRecognitionEvent::STATUS_SUCCESS:
 444         break;
 445       case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:
 446         return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH);
 447       case proto::SpeechRecognitionEvent::STATUS_ABORTED:
 448         return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
 449       case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:
 450         return Abort(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE);
 451       case proto::SpeechRecognitionEvent::STATUS_NETWORK:
 452         return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
 453       case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:
 454         return Abort(SPEECH_RECOGNITION_ERROR_NOT_ALLOWED);
 455       case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:
 456         return Abort(SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED);
 457       case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:
 458         return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR);
 459       case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:
 460         return Abort(SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED);
 461     }
 462   }
 463
 464   SpeechRecognitionResults results;
 465   for (int i = 0; i < ws_event.result_size(); ++i) {
 466     const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);
 467     results.push_back(SpeechRecognitionResult());
 468     SpeechRecognitionResult& result = results.back();
 469     result.is_provisional = !(ws_result.has_final() && ws_result.final());
 470
 471     if (!result.is_provisional)
 472       got_last_definitive_result_ = true;
 473
 474     for (int j = 0; j < ws_result.alternative_size(); ++j) {
 475       const proto::SpeechRecognitionAlternative& ws_alternative =
 476           ws_result.alternative(j);
 477       SpeechRecognitionHypothesis hypothesis;
 478       if (ws_alternative.has_confidence())
 479         hypothesis.confidence = ws_alternative.confidence();
 480       else if (ws_result.has_stability())
 481         hypothesis.confidence = ws_result.stability();
 482       DCHECK(ws_alternative.has_transcript());
 483       // TODO(hans): Perhaps the transcript should be required in the proto?
 484       if (ws_alternative.has_transcript())
 485         hypothesis.utterance = base::UTF8ToUTF16(ws_alternative.transcript());
 486
 487       result.hypotheses.push_back(hypothesis);
 488     }
 489   }
 490
 491   delegate()->OnSpeechRecognitionEngineResults(results);
 492
 493   return state_;
 494 }
 495
 496 GoogleStreamingRemoteEngine::FSMState
 497 GoogleStreamingRemoteEngine::RaiseNoMatchErrorIfGotNoResults(
 498     const FSMEventArgs& event_args) {
 499   if (!got_last_definitive_result_) {
 500     // Provide an empty result to notify that recognition is ended with no
 501     // errors, yet neither any further results.
 502     delegate()->OnSpeechRecognitionEngineResults(SpeechRecognitionResults());
 503   }
 504   return AbortSilently(event_args);
 505 }
 506
 507 GoogleStreamingRemoteEngine::FSMState
 508 GoogleStreamingRemoteEngine::CloseUpstreamAndWaitForResults(
 509     const FSMEventArgs&) {
 510   DCHECK(upstream_fetcher_.get());
 511   DCHECK(encoder_.get());
 512
 513   DVLOG(1) <<  "Closing upstream.";
 514
 515   // The encoder requires a non-empty final buffer. So we encode a packet
 516   // of silence in case encoder had no data already.
 517   size_t sample_count =
 518       config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
 519   scoped_refptr<AudioChunk> dummy_chunk = new AudioChunk(
 520       sample_count * sizeof(int16), encoder_->GetBitsPerSample() / 8);
 521   encoder_->Encode(*dummy_chunk.get());
 522   encoder_->Flush();
 523   scoped_refptr<AudioChunk> encoded_dummy_data =
 524       encoder_->GetEncodedDataAndClear();
 525   DCHECK(!encoded_dummy_data->IsEmpty());
 526   encoder_.reset();
 527
 528   UploadAudioChunk(encoded_dummy_data->AsString(),
 529                    FRAME_RECOGNITION_AUDIO,
 530                    true);
 531   got_last_definitive_result_ = false;
 532   return STATE_WAITING_DOWNSTREAM_RESULTS;
 533 }
 534
 535 GoogleStreamingRemoteEngine::FSMState
 536 GoogleStreamingRemoteEngine::CloseDownstream(const FSMEventArgs&) {
 537   DCHECK(!upstream_fetcher_.get());
 538   DCHECK(downstream_fetcher_.get());
 539
 540   DVLOG(1) <<  "Closing downstream.";
 541   downstream_fetcher_.reset();
 542   return STATE_IDLE;
 543 }
 544
 545 GoogleStreamingRemoteEngine::FSMState
 546 GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) {
 547   return Abort(SPEECH_RECOGNITION_ERROR_NONE);
 548 }
 549
 550 GoogleStreamingRemoteEngine::FSMState
 551 GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) {
 552   return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
 553 }
 554
 555 GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort(
 556     SpeechRecognitionErrorCode error_code) {
 557   DVLOG(1) << "Aborting with error " << error_code;
 558
 559   if (error_code != SPEECH_RECOGNITION_ERROR_NONE) {
 560     delegate()->OnSpeechRecognitionEngineError(
 561         SpeechRecognitionError(error_code));
 562   }
 563   downstream_fetcher_.reset();
 564   upstream_fetcher_.reset();
 565   encoder_.reset();
 566   return STATE_IDLE;
 567 }
 568
 569 GoogleStreamingRemoteEngine::FSMState
 570 GoogleStreamingRemoteEngine::DoNothing(const FSMEventArgs&) {
 571   return state_;
 572 }
 573
 574 GoogleStreamingRemoteEngine::FSMState
 575 GoogleStreamingRemoteEngine::NotFeasible(const FSMEventArgs& event_args) {
 576   NOTREACHED() << "Unfeasible event " << event_args.event
 577                << " in state " << state_;
 578   return state_;
 579 }
 580
 581 std::string GoogleStreamingRemoteEngine::GetAcceptedLanguages() const {
 582   std::string langs = config_.language;
 583   if (langs.empty() && url_context_.get()) {
 584     // If no language is provided then we use the first from the accepted
 585     // language list. If this list is empty then it defaults to "en-US".
 586     // Example of the contents of this list: "es,en-GB;q=0.8", ""
 587     net::URLRequestContext* request_context =
 588         url_context_->GetURLRequestContext();
 589     DCHECK(request_context);
 590     // TODO(pauljensen): GoogleStreamingRemoteEngine should be constructed with
 591     // a reference to the HttpUserAgentSettings rather than accessing the
 592     // accept language through the URLRequestContext.
 593     if (request_context->http_user_agent_settings()) {
 594       std::string accepted_language_list =
 595           request_context->http_user_agent_settings()->GetAcceptLanguage();
 596       size_t separator = accepted_language_list.find_first_of(",;");
 597       if (separator != std::string::npos)
 598         langs = accepted_language_list.substr(0, separator);
 599     }
 600   }
 601   if (langs.empty())
 602     langs = "en-US";
 603   return langs;
 604 }
 605
 606 // TODO(primiano): Is there any utility in the codebase that already does this?
 607 std::string GoogleStreamingRemoteEngine::GenerateRequestKey() const {
 608   const int64 kKeepLowBytes = 0x00000000FFFFFFFFLL;
 609   const int64 kKeepHighBytes = 0xFFFFFFFF00000000LL;
 610
 611   // Just keep the least significant bits of timestamp, in order to reduce
 612   // probability of collisions.
 613   int64 key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) |
 614               (base::RandUint64() & kKeepHighBytes);
 615   return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));
 616 }
 617
 618 void GoogleStreamingRemoteEngine::UploadAudioChunk(const std::string& data,
 619                                                    FrameType type,
 620                                                    bool is_final) {
 621   if (use_framed_post_data_) {
 622     std::string frame(data.size() + 8, 0);
 623     base::WriteBigEndian(&frame[0], static_cast<uint32_t>(data.size()));
 624     base::WriteBigEndian(&frame[4], static_cast<uint32_t>(type));
 625     frame.replace(8, data.size(), data);
 626     upstream_fetcher_->AppendChunkToUpload(frame, is_final);
 627   } else {
 628     upstream_fetcher_->AppendChunkToUpload(data, is_final);
 629   }
 630 }
 631
 632 GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
 633     : event(event_value) {
 634 }
 635
 636 GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() {
 637 }
 638
 639 }  // namespace content