content/browser/speech/google_streaming_remote_engine.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/browser/speech/google_streaming_remote_engine.h"
   6
   7 #include <algorithm>
   8 #include <vector>
   9
  10 #include "base/big_endian.h"
  11 #include "base/bind.h"
  12 #include "base/rand_util.h"
  13 #include "base/strings/string_number_conversions.h"
  14 #include "base/strings/string_util.h"
  15 #include "base/strings/utf_string_conversions.h"
  16 #include "base/time/time.h"
  17 #include "content/browser/speech/audio_buffer.h"
  18 #include "content/browser/speech/proto/google_streaming_api.pb.h"
  19 #include "content/public/common/speech_recognition_error.h"
  20 #include "content/public/common/speech_recognition_result.h"
  21 #include "google_apis/google_api_keys.h"
  22 #include "net/base/escape.h"
  23 #include "net/base/load_flags.h"
  24 #include "net/url_request/http_user_agent_settings.h"
  25 #include "net/url_request/url_fetcher.h"
  26 #include "net/url_request/url_request_context.h"
  27 #include "net/url_request/url_request_context_getter.h"
  28 #include "net/url_request/url_request_status.h"
  29
  30 using net::URLFetcher;
  31
  32 namespace content {
  33 namespace {
  34
  35 const char kWebServiceBaseUrl[] =
  36     "https://www.google.com/speech-api/full-duplex/v1";
  37 const char kDownstreamUrl[] = "/down?";
  38 const char kUpstreamUrl[] = "/up?";
  39
  40 // This matches the maximum maxAlternatives value supported by the server.
  41 const uint32 kMaxMaxAlternatives = 30;
  42
  43 // TODO(hans): Remove this and other logging when we don't need it anymore.
  44 void DumpResponse(const std::string& response) {
  45   DVLOG(1) << "------------";
  46   proto::SpeechRecognitionEvent event;
  47   if (!event.ParseFromString(response)) {
  48     DVLOG(1) << "Parse failed!";
  49     return;
  50   }
  51   if (event.has_status())
  52     DVLOG(1) << "STATUS\t" << event.status();
  53   for (int i = 0; i < event.result_size(); ++i) {
  54     DVLOG(1) << "RESULT #" << i << ":";
  55     const proto::SpeechRecognitionResult& res = event.result(i);
  56     if (res.has_final())
  57       DVLOG(1) << "  final:\t" << res.final();
  58     if (res.has_stability())
  59       DVLOG(1) << "  STABILITY:\t" << res.stability();
  60     for (int j = 0; j < res.alternative_size(); ++j) {
  61       const proto::SpeechRecognitionAlternative& alt =
  62           res.alternative(j);
  63       if (alt.has_confidence())
  64         DVLOG(1) << "    CONFIDENCE:\t" << alt.confidence();
  65       if (alt.has_transcript())
  66         DVLOG(1) << "    TRANSCRIPT:\t" << alt.transcript();
  67     }
  68   }
  69 }
  70
  71 }  // namespace
  72
  73 const int GoogleStreamingRemoteEngine::kAudioPacketIntervalMs = 100;
  74 const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTesting = 0;
  75 const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTesting = 1;
  76 const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0;
  77 const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5;
  78
  79 GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine(
  80     net::URLRequestContextGetter* context)
  81     : url_context_(context),
  82       previous_response_length_(0),
  83       got_last_definitive_result_(false),
  84       is_dispatching_event_(false),
  85       use_framed_post_data_(false),
  86       state_(STATE_IDLE) {}
  87
  88 GoogleStreamingRemoteEngine::~GoogleStreamingRemoteEngine() {}
  89
  90 void GoogleStreamingRemoteEngine::SetConfig(
  91     const SpeechRecognitionEngineConfig& config) {
  92   config_ = config;
  93 }
  94
  95 void GoogleStreamingRemoteEngine::StartRecognition() {
  96   FSMEventArgs event_args(EVENT_START_RECOGNITION);
  97   DispatchEvent(event_args);
  98 }
  99
 100 void GoogleStreamingRemoteEngine::EndRecognition() {
 101   FSMEventArgs event_args(EVENT_END_RECOGNITION);
 102   DispatchEvent(event_args);
 103 }
 104
 105 void GoogleStreamingRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
 106   FSMEventArgs event_args(EVENT_AUDIO_CHUNK);
 107   event_args.audio_data = &data;
 108   DispatchEvent(event_args);
 109 }
 110
 111 void GoogleStreamingRemoteEngine::AudioChunksEnded() {
 112   FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);
 113   DispatchEvent(event_args);
 114 }
 115
 116 void GoogleStreamingRemoteEngine::OnURLFetchComplete(const URLFetcher* source) {
 117   const bool kResponseComplete = true;
 118   DispatchHTTPResponse(source, kResponseComplete);
 119 }
 120
 121 void GoogleStreamingRemoteEngine::OnURLFetchDownloadProgress(
 122     const URLFetcher* source, int64 current, int64 total) {
 123   const bool kPartialResponse = false;
 124   DispatchHTTPResponse(source, kPartialResponse);
 125 }
 126
 127 void GoogleStreamingRemoteEngine::DispatchHTTPResponse(const URLFetcher* source,
 128                                                        bool end_of_response) {
 129   DCHECK(CalledOnValidThread());
 130   DCHECK(source);
 131   const bool response_is_good = source->GetStatus().is_success() &&
 132                                 source->GetResponseCode() == 200;
 133   std::string response;
 134   if (response_is_good)
 135     source->GetResponseAsString(&response);
 136   const size_t current_response_length = response.size();
 137
 138   DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")
 139            << "HTTP, code: " << source->GetResponseCode()
 140            << "      length: " << current_response_length
 141            << "      eor: " << end_of_response;
 142
 143   // URLFetcher provides always the entire response buffer, but we are only
 144   // interested in the fresh data introduced by the last chunk. Therefore, we
 145   // drop the previous content we have already processed.
 146   if (current_response_length != 0) {
 147     DCHECK_GE(current_response_length, previous_response_length_);
 148     response.erase(0, previous_response_length_);
 149     previous_response_length_ = current_response_length;
 150   }
 151
 152   if (!response_is_good && source == downstream_fetcher_.get()) {
 153     DVLOG(1) << "Downstream error " << source->GetResponseCode();
 154     FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);
 155     DispatchEvent(event_args);
 156     return;
 157   }
 158   if (!response_is_good && source == upstream_fetcher_.get()) {
 159     DVLOG(1) << "Upstream error " << source->GetResponseCode()
 160              << " EOR " << end_of_response;
 161     FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);
 162     DispatchEvent(event_args);
 163     return;
 164   }
 165
 166   // Ignore incoming data on the upstream connection.
 167   if (source == upstream_fetcher_.get())
 168     return;
 169
 170   DCHECK(response_is_good && source == downstream_fetcher_.get());
 171
 172   // The downstream response is organized in chunks, whose size is determined
 173   // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.
 174   // Such chunks are sent by the speech recognition webservice over the HTTP
 175   // downstream channel using HTTP chunked transfer (unrelated to our chunks).
 176   // This function is called every time an HTTP chunk is received by the
 177   // url fetcher. However there isn't any particular matching beween our
 178   // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can
 179   // contain a portion of one chunk or even more chunks together.
 180   chunked_byte_buffer_.Append(response);
 181
 182   // A single HTTP chunk can contain more than one data chunk, thus the while.
 183   while (chunked_byte_buffer_.HasChunks()) {
 184     FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);
 185     event_args.response = chunked_byte_buffer_.PopChunk();
 186     DCHECK(event_args.response.get());
 187     DumpResponse(std::string(event_args.response->begin(),
 188                              event_args.response->end()));
 189     DispatchEvent(event_args);
 190   }
 191   if (end_of_response) {
 192     FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);
 193     DispatchEvent(event_args);
 194   }
 195 }
 196
 197 bool GoogleStreamingRemoteEngine::IsRecognitionPending() const {
 198   DCHECK(CalledOnValidThread());
 199   return state_ != STATE_IDLE;
 200 }
 201
 202 int GoogleStreamingRemoteEngine::GetDesiredAudioChunkDurationMs() const {
 203   return kAudioPacketIntervalMs;
 204 }
 205
 206 // -----------------------  Core FSM implementation ---------------------------
 207
 208 void GoogleStreamingRemoteEngine::DispatchEvent(
 209     const FSMEventArgs& event_args) {
 210   DCHECK(CalledOnValidThread());
 211   DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
 212   DCHECK_LE(state_, STATE_MAX_VALUE);
 213
 214   // Event dispatching must be sequential, otherwise it will break all the rules
 215   // and the assumptions of the finite state automata model.
 216   DCHECK(!is_dispatching_event_);
 217   is_dispatching_event_ = true;
 218
 219   state_ = ExecuteTransitionAndGetNextState(event_args);
 220
 221   is_dispatching_event_ = false;
 222 }
 223
 224 GoogleStreamingRemoteEngine::FSMState
 225 GoogleStreamingRemoteEngine::ExecuteTransitionAndGetNextState(
 226     const FSMEventArgs& event_args) {
 227   const FSMEvent event = event_args.event;
 228   switch (state_) {
 229     case STATE_IDLE:
 230       switch (event) {
 231         case EVENT_START_RECOGNITION:
 232           return ConnectBothStreams(event_args);
 233         case EVENT_END_RECOGNITION:
 234         // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of
 235         // abort, so we just silently drop them here.
 236         case EVENT_AUDIO_CHUNK:
 237         case EVENT_AUDIO_CHUNKS_ENDED:
 238         // DOWNSTREAM_CLOSED can be received if we end up here due to an error.
 239         case EVENT_DOWNSTREAM_CLOSED:
 240           return DoNothing(event_args);
 241         case EVENT_UPSTREAM_ERROR:
 242         case EVENT_DOWNSTREAM_ERROR:
 243         case EVENT_DOWNSTREAM_RESPONSE:
 244           return NotFeasible(event_args);
 245       }
 246       break;
 247     case STATE_BOTH_STREAMS_CONNECTED:
 248       switch (event) {
 249         case EVENT_AUDIO_CHUNK:
 250           return TransmitAudioUpstream(event_args);
 251         case EVENT_DOWNSTREAM_RESPONSE:
 252           return ProcessDownstreamResponse(event_args);
 253         case EVENT_AUDIO_CHUNKS_ENDED:
 254           return CloseUpstreamAndWaitForResults(event_args);
 255         case EVENT_END_RECOGNITION:
 256           return AbortSilently(event_args);
 257         case EVENT_UPSTREAM_ERROR:
 258         case EVENT_DOWNSTREAM_ERROR:
 259         case EVENT_DOWNSTREAM_CLOSED:
 260           return AbortWithError(event_args);
 261         case EVENT_START_RECOGNITION:
 262           return NotFeasible(event_args);
 263       }
 264       break;
 265     case STATE_WAITING_DOWNSTREAM_RESULTS:
 266       switch (event) {
 267         case EVENT_DOWNSTREAM_RESPONSE:
 268           return ProcessDownstreamResponse(event_args);
 269         case EVENT_DOWNSTREAM_CLOSED:
 270           return RaiseNoMatchErrorIfGotNoResults(event_args);
 271         case EVENT_END_RECOGNITION:
 272           return AbortSilently(event_args);
 273         case EVENT_UPSTREAM_ERROR:
 274         case EVENT_DOWNSTREAM_ERROR:
 275           return AbortWithError(event_args);
 276         case EVENT_START_RECOGNITION:
 277         case EVENT_AUDIO_CHUNK:
 278         case EVENT_AUDIO_CHUNKS_ENDED:
 279           return NotFeasible(event_args);
 280       }
 281       break;
 282   }
 283   return NotFeasible(event_args);
 284 }
 285
 286 // ----------- Contract for all the FSM evolution functions below -------------
 287 //  - Are guaranteed to be executed in the same thread (IO, except for tests);
 288 //  - Are guaranteed to be not reentrant (themselves and each other);
 289 //  - event_args members are guaranteed to be stable during the call;
 290
 291 GoogleStreamingRemoteEngine::FSMState
 292 GoogleStreamingRemoteEngine::ConnectBothStreams(const FSMEventArgs&) {
 293   DCHECK(!upstream_fetcher_.get());
 294   DCHECK(!downstream_fetcher_.get());
 295
 296   encoder_.reset(new AudioEncoder(config_.audio_sample_rate,
 297                                   config_.audio_num_bits_per_sample));
 298   DCHECK(encoder_.get());
 299   const std::string request_key = GenerateRequestKey();
 300
 301   // Only use the framed post data format when a preamble needs to be logged.
 302   use_framed_post_data_ = (config_.preamble &&
 303                            !config_.preamble->sample_data.empty() &&
 304                            !config_.auth_token.empty() &&
 305                            !config_.auth_scope.empty());
 306   if (use_framed_post_data_) {
 307     preamble_encoder_.reset(new AudioEncoder(
 308         config_.preamble->sample_rate,
 309         config_.preamble->sample_depth * 8));
 310   }
 311
 312   // Setup downstream fetcher.
 313   std::vector<std::string> downstream_args;
 314   downstream_args.push_back(
 315       "key=" + net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
 316   downstream_args.push_back("pair=" + request_key);
 317   downstream_args.push_back("output=pb");
 318   GURL downstream_url(std::string(kWebServiceBaseUrl) +
 319                       std::string(kDownstreamUrl) +
 320                       base::JoinString(downstream_args, "&"));
 321
 322   downstream_fetcher_ = URLFetcher::Create(
 323       kDownstreamUrlFetcherIdForTesting, downstream_url, URLFetcher::GET, this);
 324   downstream_fetcher_->SetRequestContext(url_context_.get());
 325   downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
 326                                     net::LOAD_DO_NOT_SEND_COOKIES |
 327                                     net::LOAD_DO_NOT_SEND_AUTH_DATA);
 328   downstream_fetcher_->Start();
 329
 330   // Setup upstream fetcher.
 331   // TODO(hans): Support for user-selected grammars.
 332   std::vector<std::string> upstream_args;
 333   upstream_args.push_back("key=" +
 334       net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
 335   upstream_args.push_back("pair=" + request_key);
 336   upstream_args.push_back("output=pb");
 337   upstream_args.push_back(
 338       "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));
 339   upstream_args.push_back(
 340       config_.filter_profanities ? "pFilter=2" : "pFilter=0");
 341   if (config_.max_hypotheses > 0U) {
 342     int max_alternatives = std::min(kMaxMaxAlternatives,
 343                                     config_.max_hypotheses);
 344     upstream_args.push_back("maxAlternatives=" +
 345                             base::UintToString(max_alternatives));
 346   }
 347   upstream_args.push_back("client=chromium");
 348   if (!config_.hardware_info.empty()) {
 349     upstream_args.push_back(
 350         "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));
 351   }
 352   if (config_.continuous)
 353     upstream_args.push_back("continuous");
 354   if (config_.interim_results)
 355     upstream_args.push_back("interim");
 356   if (!config_.auth_token.empty() && !config_.auth_scope.empty()) {
 357     upstream_args.push_back(
 358         "authScope=" + net::EscapeQueryParamValue(config_.auth_scope, true));
 359     upstream_args.push_back(
 360         "authToken=" + net::EscapeQueryParamValue(config_.auth_token, true));
 361   }
 362   if (use_framed_post_data_) {
 363     std::string audio_format;
 364     if (preamble_encoder_)
 365       audio_format = preamble_encoder_->GetMimeType() + ",";
 366     audio_format += encoder_->GetMimeType();
 367     upstream_args.push_back(
 368         "audioFormat=" + net::EscapeQueryParamValue(audio_format, true));
 369   }
 370   GURL upstream_url(std::string(kWebServiceBaseUrl) +
 371                     std::string(kUpstreamUrl) +
 372                     base::JoinString(upstream_args, "&"));
 373
 374   upstream_fetcher_ = URLFetcher::Create(kUpstreamUrlFetcherIdForTesting,
 375                                          upstream_url, URLFetcher::POST, this);
 376   if (use_framed_post_data_)
 377     upstream_fetcher_->SetChunkedUpload("application/octet-stream");
 378   else
 379     upstream_fetcher_->SetChunkedUpload(encoder_->GetMimeType());
 380   upstream_fetcher_->SetRequestContext(url_context_.get());
 381   upstream_fetcher_->SetReferrer(config_.origin_url);
 382   upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
 383                                   net::LOAD_DO_NOT_SEND_COOKIES |
 384                                   net::LOAD_DO_NOT_SEND_AUTH_DATA);
 385   upstream_fetcher_->Start();
 386   previous_response_length_ = 0;
 387
 388   if (preamble_encoder_) {
 389     // Encode and send preamble right away.
 390     scoped_refptr<AudioChunk> chunk = new AudioChunk(
 391         reinterpret_cast<const uint8*>(config_.preamble->sample_data.data()),
 392         config_.preamble->sample_data.size(),
 393         config_.preamble->sample_depth);
 394     preamble_encoder_->Encode(*chunk);
 395     preamble_encoder_->Flush();
 396     scoped_refptr<AudioChunk> encoded_data(
 397         preamble_encoder_->GetEncodedDataAndClear());
 398     UploadAudioChunk(encoded_data->AsString(), FRAME_PREAMBLE_AUDIO, false);
 399   }
 400   return STATE_BOTH_STREAMS_CONNECTED;
 401 }
 402
 403 GoogleStreamingRemoteEngine::FSMState
 404 GoogleStreamingRemoteEngine::TransmitAudioUpstream(
 405     const FSMEventArgs& event_args) {
 406   DCHECK(upstream_fetcher_.get());
 407   DCHECK(event_args.audio_data.get());
 408   const AudioChunk& audio = *(event_args.audio_data.get());
 409
 410   DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
 411   encoder_->Encode(audio);
 412   scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
 413   UploadAudioChunk(encoded_data->AsString(), FRAME_RECOGNITION_AUDIO, false);
 414   return state_;
 415 }
 416
 417 GoogleStreamingRemoteEngine::FSMState
 418 GoogleStreamingRemoteEngine::ProcessDownstreamResponse(
 419     const FSMEventArgs& event_args) {
 420   DCHECK(event_args.response.get());
 421
 422   proto::SpeechRecognitionEvent ws_event;
 423   if (!ws_event.ParseFromString(std::string(event_args.response->begin(),
 424                                             event_args.response->end())))
 425     return AbortWithError(event_args);
 426
 427   // An empty (default) event is used to notify us that the upstream has
 428   // been connected. Ignore.
 429   if (!ws_event.result_size() && (!ws_event.has_status() ||
 430       ws_event.status() == proto::SpeechRecognitionEvent::STATUS_SUCCESS)) {
 431     DVLOG(1) << "Received empty response";
 432     return state_;
 433   }
 434
 435   if (ws_event.has_status()) {
 436     switch (ws_event.status()) {
 437       case proto::SpeechRecognitionEvent::STATUS_SUCCESS:
 438         break;
 439       case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:
 440         return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH);
 441       case proto::SpeechRecognitionEvent::STATUS_ABORTED:
 442         return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
 443       case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:
 444         return Abort(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE);
 445       case proto::SpeechRecognitionEvent::STATUS_NETWORK:
 446         return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
 447       case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:
 448         return Abort(SPEECH_RECOGNITION_ERROR_NOT_ALLOWED);
 449       case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:
 450         return Abort(SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED);
 451       case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:
 452         return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR);
 453       case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:
 454         return Abort(SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED);
 455     }
 456   }
 457
 458   SpeechRecognitionResults results;
 459   for (int i = 0; i < ws_event.result_size(); ++i) {
 460     const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);
 461     results.push_back(SpeechRecognitionResult());
 462     SpeechRecognitionResult& result = results.back();
 463     result.is_provisional = !(ws_result.has_final() && ws_result.final());
 464
 465     if (!result.is_provisional)
 466       got_last_definitive_result_ = true;
 467
 468     for (int j = 0; j < ws_result.alternative_size(); ++j) {
 469       const proto::SpeechRecognitionAlternative& ws_alternative =
 470           ws_result.alternative(j);
 471       SpeechRecognitionHypothesis hypothesis;
 472       if (ws_alternative.has_confidence())
 473         hypothesis.confidence = ws_alternative.confidence();
 474       else if (ws_result.has_stability())
 475         hypothesis.confidence = ws_result.stability();
 476       DCHECK(ws_alternative.has_transcript());
 477       // TODO(hans): Perhaps the transcript should be required in the proto?
 478       if (ws_alternative.has_transcript())
 479         hypothesis.utterance = base::UTF8ToUTF16(ws_alternative.transcript());
 480
 481       result.hypotheses.push_back(hypothesis);
 482     }
 483   }
 484
 485   delegate()->OnSpeechRecognitionEngineResults(results);
 486
 487   return state_;
 488 }
 489
 490 GoogleStreamingRemoteEngine::FSMState
 491 GoogleStreamingRemoteEngine::RaiseNoMatchErrorIfGotNoResults(
 492     const FSMEventArgs& event_args) {
 493   if (!got_last_definitive_result_) {
 494     // Provide an empty result to notify that recognition is ended with no
 495     // errors, yet neither any further results.
 496     delegate()->OnSpeechRecognitionEngineResults(SpeechRecognitionResults());
 497   }
 498   return AbortSilently(event_args);
 499 }
 500
 501 GoogleStreamingRemoteEngine::FSMState
 502 GoogleStreamingRemoteEngine::CloseUpstreamAndWaitForResults(
 503     const FSMEventArgs&) {
 504   DCHECK(upstream_fetcher_.get());
 505   DCHECK(encoder_.get());
 506
 507   DVLOG(1) <<  "Closing upstream.";
 508
 509   // The encoder requires a non-empty final buffer. So we encode a packet
 510   // of silence in case encoder had no data already.
 511   size_t sample_count =
 512       config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
 513   scoped_refptr<AudioChunk> dummy_chunk = new AudioChunk(
 514       sample_count * sizeof(int16), encoder_->GetBitsPerSample() / 8);
 515   encoder_->Encode(*dummy_chunk.get());
 516   encoder_->Flush();
 517   scoped_refptr<AudioChunk> encoded_dummy_data =
 518       encoder_->GetEncodedDataAndClear();
 519   DCHECK(!encoded_dummy_data->IsEmpty());
 520   encoder_.reset();
 521
 522   UploadAudioChunk(encoded_dummy_data->AsString(),
 523                    FRAME_RECOGNITION_AUDIO,
 524                    true);
 525   got_last_definitive_result_ = false;
 526   return STATE_WAITING_DOWNSTREAM_RESULTS;
 527 }
 528
 529 GoogleStreamingRemoteEngine::FSMState
 530 GoogleStreamingRemoteEngine::CloseDownstream(const FSMEventArgs&) {
 531   DCHECK(!upstream_fetcher_.get());
 532   DCHECK(downstream_fetcher_.get());
 533
 534   DVLOG(1) <<  "Closing downstream.";
 535   downstream_fetcher_.reset();
 536   return STATE_IDLE;
 537 }
 538
 539 GoogleStreamingRemoteEngine::FSMState
 540 GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) {
 541   return Abort(SPEECH_RECOGNITION_ERROR_NONE);
 542 }
 543
 544 GoogleStreamingRemoteEngine::FSMState
 545 GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) {
 546   return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
 547 }
 548
 549 GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort(
 550     SpeechRecognitionErrorCode error_code) {
 551   DVLOG(1) << "Aborting with error " << error_code;
 552
 553   if (error_code != SPEECH_RECOGNITION_ERROR_NONE) {
 554     delegate()->OnSpeechRecognitionEngineError(
 555         SpeechRecognitionError(error_code));
 556   }
 557   downstream_fetcher_.reset();
 558   upstream_fetcher_.reset();
 559   encoder_.reset();
 560   return STATE_IDLE;
 561 }
 562
 563 GoogleStreamingRemoteEngine::FSMState
 564 GoogleStreamingRemoteEngine::DoNothing(const FSMEventArgs&) {
 565   return state_;
 566 }
 567
 568 GoogleStreamingRemoteEngine::FSMState
 569 GoogleStreamingRemoteEngine::NotFeasible(const FSMEventArgs& event_args) {
 570   NOTREACHED() << "Unfeasible event " << event_args.event
 571                << " in state " << state_;
 572   return state_;
 573 }
 574
 575 std::string GoogleStreamingRemoteEngine::GetAcceptedLanguages() const {
 576   std::string langs = config_.language;
 577   if (langs.empty() && url_context_.get()) {
 578     // If no language is provided then we use the first from the accepted
 579     // language list. If this list is empty then it defaults to "en-US".
 580     // Example of the contents of this list: "es,en-GB;q=0.8", ""
 581     net::URLRequestContext* request_context =
 582         url_context_->GetURLRequestContext();
 583     DCHECK(request_context);
 584     // TODO(pauljensen): GoogleStreamingRemoteEngine should be constructed with
 585     // a reference to the HttpUserAgentSettings rather than accessing the
 586     // accept language through the URLRequestContext.
 587     if (request_context->http_user_agent_settings()) {
 588       std::string accepted_language_list =
 589           request_context->http_user_agent_settings()->GetAcceptLanguage();
 590       size_t separator = accepted_language_list.find_first_of(",;");
 591       if (separator != std::string::npos)
 592         langs = accepted_language_list.substr(0, separator);
 593     }
 594   }
 595   if (langs.empty())
 596     langs = "en-US";
 597   return langs;
 598 }
 599
 600 // TODO(primiano): Is there any utility in the codebase that already does this?
 601 std::string GoogleStreamingRemoteEngine::GenerateRequestKey() const {
 602   const int64 kKeepLowBytes = 0x00000000FFFFFFFFLL;
 603   const int64 kKeepHighBytes = 0xFFFFFFFF00000000LL;
 604
 605   // Just keep the least significant bits of timestamp, in order to reduce
 606   // probability of collisions.
 607   int64 key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) |
 608               (base::RandUint64() & kKeepHighBytes);
 609   return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));
 610 }
 611
 612 void GoogleStreamingRemoteEngine::UploadAudioChunk(const std::string& data,
 613                                                    FrameType type,
 614                                                    bool is_final) {
 615   if (use_framed_post_data_) {
 616     std::string frame(data.size() + 8, 0);
 617     base::WriteBigEndian(&frame[0], static_cast<uint32_t>(data.size()));
 618     base::WriteBigEndian(&frame[4], static_cast<uint32_t>(type));
 619     frame.replace(8, data.size(), data);
 620     upstream_fetcher_->AppendChunkToUpload(frame, is_final);
 621   } else {
 622     upstream_fetcher_->AppendChunkToUpload(data, is_final);
 623   }
 624 }
 625
 626 GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
 627     : event(event_value) {
 628 }
 629
 630 GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() {
 631 }
 632
 633 }  // namespace content