content/browser/speech/google_streaming_remote_engine.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/browser/speech/google_streaming_remote_engine.h"
   6
   7 #include <algorithm>
   8 #include <vector>
   9
  10 #include "base/big_endian.h"
  11 #include "base/bind.h"
  12 #include "base/rand_util.h"
  13 #include "base/strings/string_number_conversions.h"
  14 #include "base/strings/string_util.h"
  15 #include "base/strings/utf_string_conversions.h"
  16 #include "base/time/time.h"
  17 #include "content/browser/speech/audio_buffer.h"
  18 #include "content/browser/speech/proto/google_streaming_api.pb.h"
  19 #include "content/public/common/speech_recognition_error.h"
  20 #include "content/public/common/speech_recognition_result.h"
  21 #include "google_apis/google_api_keys.h"
  22 #include "net/base/escape.h"
  23 #include "net/base/load_flags.h"
  24 #include "net/url_request/http_user_agent_settings.h"
  25 #include "net/url_request/url_fetcher.h"
  26 #include "net/url_request/url_request_context.h"
  27 #include "net/url_request/url_request_context_getter.h"
  28 #include "net/url_request/url_request_status.h"
  29
  30 using net::URLFetcher;
  31
  32 namespace content {
  33 namespace {
  34
  35 const char kWebServiceBaseUrl[] =
  36     "https://www.google.com/speech-api/full-duplex/v1";
  37 const char kDownstreamUrl[] = "/down?";
  38 const char kUpstreamUrl[] = "/up?";
  39 const AudioEncoder::Codec kDefaultAudioCodec = AudioEncoder::CODEC_FLAC;
  40
  41 // This matches the maximum maxAlternatives value supported by the server.
  42 const uint32 kMaxMaxAlternatives = 30;
  43
  44 // TODO(hans): Remove this and other logging when we don't need it anymore.
  45 void DumpResponse(const std::string& response) {
  46   DVLOG(1) << "------------";
  47   proto::SpeechRecognitionEvent event;
  48   if (!event.ParseFromString(response)) {
  49     DVLOG(1) << "Parse failed!";
  50     return;
  51   }
  52   if (event.has_status())
  53     DVLOG(1) << "STATUS\t" << event.status();
  54   for (int i = 0; i < event.result_size(); ++i) {
  55     DVLOG(1) << "RESULT #" << i << ":";
  56     const proto::SpeechRecognitionResult& res = event.result(i);
  57     if (res.has_final())
  58       DVLOG(1) << "  final:\t" << res.final();
  59     if (res.has_stability())
  60       DVLOG(1) << "  STABILITY:\t" << res.stability();
  61     for (int j = 0; j < res.alternative_size(); ++j) {
  62       const proto::SpeechRecognitionAlternative& alt =
  63           res.alternative(j);
  64       if (alt.has_confidence())
  65         DVLOG(1) << "    CONFIDENCE:\t" << alt.confidence();
  66       if (alt.has_transcript())
  67         DVLOG(1) << "    TRANSCRIPT:\t" << alt.transcript();
  68     }
  69   }
  70 }
  71
  72 }  // namespace
  73
  74 const int GoogleStreamingRemoteEngine::kAudioPacketIntervalMs = 100;
  75 const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTesting = 0;
  76 const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTesting = 1;
  77 const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0;
  78 const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5;
  79
  80 GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine(
  81     net::URLRequestContextGetter* context)
  82     : url_context_(context),
  83       previous_response_length_(0),
  84       got_last_definitive_result_(false),
  85       is_dispatching_event_(false),
  86       use_framed_post_data_(false),
  87       state_(STATE_IDLE) {}
  88
  89 GoogleStreamingRemoteEngine::~GoogleStreamingRemoteEngine() {}
  90
  91 void GoogleStreamingRemoteEngine::SetConfig(
  92     const SpeechRecognitionEngineConfig& config) {
  93   config_ = config;
  94 }
  95
  96 void GoogleStreamingRemoteEngine::StartRecognition() {
  97   FSMEventArgs event_args(EVENT_START_RECOGNITION);
  98   DispatchEvent(event_args);
  99 }
 100
 101 void GoogleStreamingRemoteEngine::EndRecognition() {
 102   FSMEventArgs event_args(EVENT_END_RECOGNITION);
 103   DispatchEvent(event_args);
 104 }
 105
 106 void GoogleStreamingRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
 107   FSMEventArgs event_args(EVENT_AUDIO_CHUNK);
 108   event_args.audio_data = &data;
 109   DispatchEvent(event_args);
 110 }
 111
 112 void GoogleStreamingRemoteEngine::AudioChunksEnded() {
 113   FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);
 114   DispatchEvent(event_args);
 115 }
 116
 117 void GoogleStreamingRemoteEngine::OnURLFetchComplete(const URLFetcher* source) {
 118   const bool kResponseComplete = true;
 119   DispatchHTTPResponse(source, kResponseComplete);
 120 }
 121
 122 void GoogleStreamingRemoteEngine::OnURLFetchDownloadProgress(
 123     const URLFetcher* source, int64 current, int64 total) {
 124   const bool kPartialResponse = false;
 125   DispatchHTTPResponse(source, kPartialResponse);
 126 }
 127
 128 void GoogleStreamingRemoteEngine::DispatchHTTPResponse(const URLFetcher* source,
 129                                                        bool end_of_response) {
 130   DCHECK(CalledOnValidThread());
 131   DCHECK(source);
 132   const bool response_is_good = source->GetStatus().is_success() &&
 133                                 source->GetResponseCode() == 200;
 134   std::string response;
 135   if (response_is_good)
 136     source->GetResponseAsString(&response);
 137   const size_t current_response_length = response.size();
 138
 139   DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")
 140            << "HTTP, code: " << source->GetResponseCode()
 141            << "      length: " << current_response_length
 142            << "      eor: " << end_of_response;
 143
 144   // URLFetcher provides always the entire response buffer, but we are only
 145   // interested in the fresh data introduced by the last chunk. Therefore, we
 146   // drop the previous content we have already processed.
 147   if (current_response_length != 0) {
 148     DCHECK_GE(current_response_length, previous_response_length_);
 149     response.erase(0, previous_response_length_);
 150     previous_response_length_ = current_response_length;
 151   }
 152
 153   if (!response_is_good && source == downstream_fetcher_.get()) {
 154     DVLOG(1) << "Downstream error " << source->GetResponseCode();
 155     FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);
 156     DispatchEvent(event_args);
 157     return;
 158   }
 159   if (!response_is_good && source == upstream_fetcher_.get()) {
 160     DVLOG(1) << "Upstream error " << source->GetResponseCode()
 161              << " EOR " << end_of_response;
 162     FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);
 163     DispatchEvent(event_args);
 164     return;
 165   }
 166
 167   // Ignore incoming data on the upstream connection.
 168   if (source == upstream_fetcher_.get())
 169     return;
 170
 171   DCHECK(response_is_good && source == downstream_fetcher_.get());
 172
 173   // The downstream response is organized in chunks, whose size is determined
 174   // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.
 175   // Such chunks are sent by the speech recognition webservice over the HTTP
 176   // downstream channel using HTTP chunked transfer (unrelated to our chunks).
 177   // This function is called every time an HTTP chunk is received by the
 178   // url fetcher. However there isn't any particular matching beween our
 179   // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can
 180   // contain a portion of one chunk or even more chunks together.
 181   chunked_byte_buffer_.Append(response);
 182
 183   // A single HTTP chunk can contain more than one data chunk, thus the while.
 184   while (chunked_byte_buffer_.HasChunks()) {
 185     FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);
 186     event_args.response = chunked_byte_buffer_.PopChunk();
 187     DCHECK(event_args.response.get());
 188     DumpResponse(std::string(event_args.response->begin(),
 189                              event_args.response->end()));
 190     DispatchEvent(event_args);
 191   }
 192   if (end_of_response) {
 193     FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);
 194     DispatchEvent(event_args);
 195   }
 196 }
 197
 198 bool GoogleStreamingRemoteEngine::IsRecognitionPending() const {
 199   DCHECK(CalledOnValidThread());
 200   return state_ != STATE_IDLE;
 201 }
 202
 203 int GoogleStreamingRemoteEngine::GetDesiredAudioChunkDurationMs() const {
 204   return kAudioPacketIntervalMs;
 205 }
 206
 207 // -----------------------  Core FSM implementation ---------------------------
 208
 209 void GoogleStreamingRemoteEngine::DispatchEvent(
 210     const FSMEventArgs& event_args) {
 211   DCHECK(CalledOnValidThread());
 212   DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
 213   DCHECK_LE(state_, STATE_MAX_VALUE);
 214
 215   // Event dispatching must be sequential, otherwise it will break all the rules
 216   // and the assumptions of the finite state automata model.
 217   DCHECK(!is_dispatching_event_);
 218   is_dispatching_event_ = true;
 219
 220   state_ = ExecuteTransitionAndGetNextState(event_args);
 221
 222   is_dispatching_event_ = false;
 223 }
 224
 225 GoogleStreamingRemoteEngine::FSMState
 226 GoogleStreamingRemoteEngine::ExecuteTransitionAndGetNextState(
 227     const FSMEventArgs& event_args) {
 228   const FSMEvent event = event_args.event;
 229   switch (state_) {
 230     case STATE_IDLE:
 231       switch (event) {
 232         case EVENT_START_RECOGNITION:
 233           return ConnectBothStreams(event_args);
 234         case EVENT_END_RECOGNITION:
 235         // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of
 236         // abort, so we just silently drop them here.
 237         case EVENT_AUDIO_CHUNK:
 238         case EVENT_AUDIO_CHUNKS_ENDED:
 239         // DOWNSTREAM_CLOSED can be received if we end up here due to an error.
 240         case EVENT_DOWNSTREAM_CLOSED:
 241           return DoNothing(event_args);
 242         case EVENT_UPSTREAM_ERROR:
 243         case EVENT_DOWNSTREAM_ERROR:
 244         case EVENT_DOWNSTREAM_RESPONSE:
 245           return NotFeasible(event_args);
 246       }
 247       break;
 248     case STATE_BOTH_STREAMS_CONNECTED:
 249       switch (event) {
 250         case EVENT_AUDIO_CHUNK:
 251           return TransmitAudioUpstream(event_args);
 252         case EVENT_DOWNSTREAM_RESPONSE:
 253           return ProcessDownstreamResponse(event_args);
 254         case EVENT_AUDIO_CHUNKS_ENDED:
 255           return CloseUpstreamAndWaitForResults(event_args);
 256         case EVENT_END_RECOGNITION:
 257           return AbortSilently(event_args);
 258         case EVENT_UPSTREAM_ERROR:
 259         case EVENT_DOWNSTREAM_ERROR:
 260         case EVENT_DOWNSTREAM_CLOSED:
 261           return AbortWithError(event_args);
 262         case EVENT_START_RECOGNITION:
 263           return NotFeasible(event_args);
 264       }
 265       break;
 266     case STATE_WAITING_DOWNSTREAM_RESULTS:
 267       switch (event) {
 268         case EVENT_DOWNSTREAM_RESPONSE:
 269           return ProcessDownstreamResponse(event_args);
 270         case EVENT_DOWNSTREAM_CLOSED:
 271           return RaiseNoMatchErrorIfGotNoResults(event_args);
 272         case EVENT_END_RECOGNITION:
 273           return AbortSilently(event_args);
 274         case EVENT_UPSTREAM_ERROR:
 275         case EVENT_DOWNSTREAM_ERROR:
 276           return AbortWithError(event_args);
 277         case EVENT_START_RECOGNITION:
 278         case EVENT_AUDIO_CHUNK:
 279         case EVENT_AUDIO_CHUNKS_ENDED:
 280           return NotFeasible(event_args);
 281       }
 282       break;
 283   }
 284   return NotFeasible(event_args);
 285 }
 286
 287 // ----------- Contract for all the FSM evolution functions below -------------
 288 //  - Are guaranteed to be executed in the same thread (IO, except for tests);
 289 //  - Are guaranteed to be not reentrant (themselves and each other);
 290 //  - event_args members are guaranteed to be stable during the call;
 291
 292 GoogleStreamingRemoteEngine::FSMState
 293 GoogleStreamingRemoteEngine::ConnectBothStreams(const FSMEventArgs&) {
 294   DCHECK(!upstream_fetcher_.get());
 295   DCHECK(!downstream_fetcher_.get());
 296
 297   encoder_.reset(AudioEncoder::Create(kDefaultAudioCodec,
 298                                       config_.audio_sample_rate,
 299                                       config_.audio_num_bits_per_sample));
 300   DCHECK(encoder_.get());
 301   const std::string request_key = GenerateRequestKey();
 302
 303   // Only use the framed post data format when a preamble needs to be logged.
 304   use_framed_post_data_ = (config_.preamble &&
 305                            !config_.preamble->sample_data.empty() &&
 306                            !config_.auth_token.empty() &&
 307                            !config_.auth_scope.empty());
 308   if (use_framed_post_data_) {
 309     preamble_encoder_.reset(AudioEncoder::Create(
 310         kDefaultAudioCodec,
 311         config_.preamble->sample_rate,
 312         config_.preamble->sample_depth * 8));
 313   }
 314
 315   // Setup downstream fetcher.
 316   std::vector<std::string> downstream_args;
 317   downstream_args.push_back(
 318       "key=" + net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
 319   downstream_args.push_back("pair=" + request_key);
 320   downstream_args.push_back("output=pb");
 321   GURL downstream_url(std::string(kWebServiceBaseUrl) +
 322                       std::string(kDownstreamUrl) +
 323                       JoinString(downstream_args, '&'));
 324
 325   downstream_fetcher_ = URLFetcher::Create(
 326       kDownstreamUrlFetcherIdForTesting, downstream_url, URLFetcher::GET, this);
 327   downstream_fetcher_->SetRequestContext(url_context_.get());
 328   downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
 329                                     net::LOAD_DO_NOT_SEND_COOKIES |
 330                                     net::LOAD_DO_NOT_SEND_AUTH_DATA);
 331   downstream_fetcher_->Start();
 332
 333   // Setup upstream fetcher.
 334   // TODO(hans): Support for user-selected grammars.
 335   std::vector<std::string> upstream_args;
 336   upstream_args.push_back("key=" +
 337       net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
 338   upstream_args.push_back("pair=" + request_key);
 339   upstream_args.push_back("output=pb");
 340   upstream_args.push_back(
 341       "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));
 342   upstream_args.push_back(
 343       config_.filter_profanities ? "pFilter=2" : "pFilter=0");
 344   if (config_.max_hypotheses > 0U) {
 345     int max_alternatives = std::min(kMaxMaxAlternatives,
 346                                     config_.max_hypotheses);
 347     upstream_args.push_back("maxAlternatives=" +
 348                             base::UintToString(max_alternatives));
 349   }
 350   upstream_args.push_back("client=chromium");
 351   if (!config_.hardware_info.empty()) {
 352     upstream_args.push_back(
 353         "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));
 354   }
 355   if (config_.continuous)
 356     upstream_args.push_back("continuous");
 357   if (config_.interim_results)
 358     upstream_args.push_back("interim");
 359   if (!config_.auth_token.empty() && !config_.auth_scope.empty()) {
 360     upstream_args.push_back(
 361         "authScope=" + net::EscapeQueryParamValue(config_.auth_scope, true));
 362     upstream_args.push_back(
 363         "authToken=" + net::EscapeQueryParamValue(config_.auth_token, true));
 364   }
 365   if (use_framed_post_data_) {
 366     std::string audio_format;
 367     if (preamble_encoder_)
 368       audio_format = preamble_encoder_->mime_type() + ",";
 369     audio_format += encoder_->mime_type();
 370     upstream_args.push_back(
 371         "audioFormat=" + net::EscapeQueryParamValue(audio_format, true));
 372   }
 373   GURL upstream_url(std::string(kWebServiceBaseUrl) +
 374                     std::string(kUpstreamUrl) +
 375                     JoinString(upstream_args, '&'));
 376
 377   upstream_fetcher_ = URLFetcher::Create(kUpstreamUrlFetcherIdForTesting,
 378                                          upstream_url, URLFetcher::POST, this);
 379   if (use_framed_post_data_)
 380     upstream_fetcher_->SetChunkedUpload("application/octet-stream");
 381   else
 382     upstream_fetcher_->SetChunkedUpload(encoder_->mime_type());
 383   upstream_fetcher_->SetRequestContext(url_context_.get());
 384   upstream_fetcher_->SetReferrer(config_.origin_url);
 385   upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
 386                                   net::LOAD_DO_NOT_SEND_COOKIES |
 387                                   net::LOAD_DO_NOT_SEND_AUTH_DATA);
 388   upstream_fetcher_->Start();
 389   previous_response_length_ = 0;
 390
 391   if (preamble_encoder_) {
 392     // Encode and send preamble right away.
 393     scoped_refptr<AudioChunk> chunk = new AudioChunk(
 394         reinterpret_cast<const uint8*>(config_.preamble->sample_data.data()),
 395         config_.preamble->sample_data.size(),
 396         config_.preamble->sample_depth);
 397     preamble_encoder_->Encode(*chunk);
 398     preamble_encoder_->Flush();
 399     scoped_refptr<AudioChunk> encoded_data(
 400         preamble_encoder_->GetEncodedDataAndClear());
 401     UploadAudioChunk(encoded_data->AsString(), FRAME_PREAMBLE_AUDIO, false);
 402   }
 403   return STATE_BOTH_STREAMS_CONNECTED;
 404 }
 405
 406 GoogleStreamingRemoteEngine::FSMState
 407 GoogleStreamingRemoteEngine::TransmitAudioUpstream(
 408     const FSMEventArgs& event_args) {
 409   DCHECK(upstream_fetcher_.get());
 410   DCHECK(event_args.audio_data.get());
 411   const AudioChunk& audio = *(event_args.audio_data.get());
 412
 413   DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
 414   encoder_->Encode(audio);
 415   scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
 416   UploadAudioChunk(encoded_data->AsString(), FRAME_RECOGNITION_AUDIO, false);
 417   return state_;
 418 }
 419
 420 GoogleStreamingRemoteEngine::FSMState
 421 GoogleStreamingRemoteEngine::ProcessDownstreamResponse(
 422     const FSMEventArgs& event_args) {
 423   DCHECK(event_args.response.get());
 424
 425   proto::SpeechRecognitionEvent ws_event;
 426   if (!ws_event.ParseFromString(std::string(event_args.response->begin(),
 427                                             event_args.response->end())))
 428     return AbortWithError(event_args);
 429
 430   // An empty (default) event is used to notify us that the upstream has
 431   // been connected. Ignore.
 432   if (!ws_event.result_size() && (!ws_event.has_status() ||
 433       ws_event.status() == proto::SpeechRecognitionEvent::STATUS_SUCCESS)) {
 434     DVLOG(1) << "Received empty response";
 435     return state_;
 436   }
 437
 438   if (ws_event.has_status()) {
 439     switch (ws_event.status()) {
 440       case proto::SpeechRecognitionEvent::STATUS_SUCCESS:
 441         break;
 442       case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:
 443         return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH);
 444       case proto::SpeechRecognitionEvent::STATUS_ABORTED:
 445         return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
 446       case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:
 447         return Abort(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE);
 448       case proto::SpeechRecognitionEvent::STATUS_NETWORK:
 449         return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
 450       case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:
 451         return Abort(SPEECH_RECOGNITION_ERROR_NOT_ALLOWED);
 452       case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:
 453         return Abort(SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED);
 454       case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:
 455         return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR);
 456       case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:
 457         return Abort(SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED);
 458     }
 459   }
 460
 461   SpeechRecognitionResults results;
 462   for (int i = 0; i < ws_event.result_size(); ++i) {
 463     const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);
 464     results.push_back(SpeechRecognitionResult());
 465     SpeechRecognitionResult& result = results.back();
 466     result.is_provisional = !(ws_result.has_final() && ws_result.final());
 467
 468     if (!result.is_provisional)
 469       got_last_definitive_result_ = true;
 470
 471     for (int j = 0; j < ws_result.alternative_size(); ++j) {
 472       const proto::SpeechRecognitionAlternative& ws_alternative =
 473           ws_result.alternative(j);
 474       SpeechRecognitionHypothesis hypothesis;
 475       if (ws_alternative.has_confidence())
 476         hypothesis.confidence = ws_alternative.confidence();
 477       else if (ws_result.has_stability())
 478         hypothesis.confidence = ws_result.stability();
 479       DCHECK(ws_alternative.has_transcript());
 480       // TODO(hans): Perhaps the transcript should be required in the proto?
 481       if (ws_alternative.has_transcript())
 482         hypothesis.utterance = base::UTF8ToUTF16(ws_alternative.transcript());
 483
 484       result.hypotheses.push_back(hypothesis);
 485     }
 486   }
 487
 488   delegate()->OnSpeechRecognitionEngineResults(results);
 489
 490   return state_;
 491 }
 492
 493 GoogleStreamingRemoteEngine::FSMState
 494 GoogleStreamingRemoteEngine::RaiseNoMatchErrorIfGotNoResults(
 495     const FSMEventArgs& event_args) {
 496   if (!got_last_definitive_result_) {
 497     // Provide an empty result to notify that recognition is ended with no
 498     // errors, yet neither any further results.
 499     delegate()->OnSpeechRecognitionEngineResults(SpeechRecognitionResults());
 500   }
 501   return AbortSilently(event_args);
 502 }
 503
 504 GoogleStreamingRemoteEngine::FSMState
 505 GoogleStreamingRemoteEngine::CloseUpstreamAndWaitForResults(
 506     const FSMEventArgs&) {
 507   DCHECK(upstream_fetcher_.get());
 508   DCHECK(encoder_.get());
 509
 510   DVLOG(1) <<  "Closing upstream.";
 511
 512   // The encoder requires a non-empty final buffer. So we encode a packet
 513   // of silence in case encoder had no data already.
 514   size_t sample_count =
 515       config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
 516   scoped_refptr<AudioChunk> dummy_chunk = new AudioChunk(
 517       sample_count * sizeof(int16), encoder_->bits_per_sample() / 8);
 518   encoder_->Encode(*dummy_chunk.get());
 519   encoder_->Flush();
 520   scoped_refptr<AudioChunk> encoded_dummy_data =
 521       encoder_->GetEncodedDataAndClear();
 522   DCHECK(!encoded_dummy_data->IsEmpty());
 523   encoder_.reset();
 524
 525   UploadAudioChunk(encoded_dummy_data->AsString(),
 526                    FRAME_RECOGNITION_AUDIO,
 527                    true);
 528   got_last_definitive_result_ = false;
 529   return STATE_WAITING_DOWNSTREAM_RESULTS;
 530 }
 531
 532 GoogleStreamingRemoteEngine::FSMState
 533 GoogleStreamingRemoteEngine::CloseDownstream(const FSMEventArgs&) {
 534   DCHECK(!upstream_fetcher_.get());
 535   DCHECK(downstream_fetcher_.get());
 536
 537   DVLOG(1) <<  "Closing downstream.";
 538   downstream_fetcher_.reset();
 539   return STATE_IDLE;
 540 }
 541
 542 GoogleStreamingRemoteEngine::FSMState
 543 GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) {
 544   return Abort(SPEECH_RECOGNITION_ERROR_NONE);
 545 }
 546
 547 GoogleStreamingRemoteEngine::FSMState
 548 GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) {
 549   return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
 550 }
 551
 552 GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort(
 553     SpeechRecognitionErrorCode error_code) {
 554   DVLOG(1) << "Aborting with error " << error_code;
 555
 556   if (error_code != SPEECH_RECOGNITION_ERROR_NONE) {
 557     delegate()->OnSpeechRecognitionEngineError(
 558         SpeechRecognitionError(error_code));
 559   }
 560   downstream_fetcher_.reset();
 561   upstream_fetcher_.reset();
 562   encoder_.reset();
 563   return STATE_IDLE;
 564 }
 565
 566 GoogleStreamingRemoteEngine::FSMState
 567 GoogleStreamingRemoteEngine::DoNothing(const FSMEventArgs&) {
 568   return state_;
 569 }
 570
 571 GoogleStreamingRemoteEngine::FSMState
 572 GoogleStreamingRemoteEngine::NotFeasible(const FSMEventArgs& event_args) {
 573   NOTREACHED() << "Unfeasible event " << event_args.event
 574                << " in state " << state_;
 575   return state_;
 576 }
 577
 578 std::string GoogleStreamingRemoteEngine::GetAcceptedLanguages() const {
 579   std::string langs = config_.language;
 580   if (langs.empty() && url_context_.get()) {
 581     // If no language is provided then we use the first from the accepted
 582     // language list. If this list is empty then it defaults to "en-US".
 583     // Example of the contents of this list: "es,en-GB;q=0.8", ""
 584     net::URLRequestContext* request_context =
 585         url_context_->GetURLRequestContext();
 586     DCHECK(request_context);
 587     // TODO(pauljensen): GoogleStreamingRemoteEngine should be constructed with
 588     // a reference to the HttpUserAgentSettings rather than accessing the
 589     // accept language through the URLRequestContext.
 590     if (request_context->http_user_agent_settings()) {
 591       std::string accepted_language_list =
 592           request_context->http_user_agent_settings()->GetAcceptLanguage();
 593       size_t separator = accepted_language_list.find_first_of(",;");
 594       if (separator != std::string::npos)
 595         langs = accepted_language_list.substr(0, separator);
 596     }
 597   }
 598   if (langs.empty())
 599     langs = "en-US";
 600   return langs;
 601 }
 602
 603 // TODO(primiano): Is there any utility in the codebase that already does this?
 604 std::string GoogleStreamingRemoteEngine::GenerateRequestKey() const {
 605   const int64 kKeepLowBytes = 0x00000000FFFFFFFFLL;
 606   const int64 kKeepHighBytes = 0xFFFFFFFF00000000LL;
 607
 608   // Just keep the least significant bits of timestamp, in order to reduce
 609   // probability of collisions.
 610   int64 key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) |
 611               (base::RandUint64() & kKeepHighBytes);
 612   return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));
 613 }
 614
 615 void GoogleStreamingRemoteEngine::UploadAudioChunk(const std::string& data,
 616                                                    FrameType type,
 617                                                    bool is_final) {
 618   if (use_framed_post_data_) {
 619     std::string frame(data.size() + 8, 0);
 620     base::WriteBigEndian(&frame[0], static_cast<uint32_t>(data.size()));
 621     base::WriteBigEndian(&frame[4], static_cast<uint32_t>(type));
 622     frame.replace(8, data.size(), data);
 623     upstream_fetcher_->AppendChunkToUpload(frame, is_final);
 624   } else {
 625     upstream_fetcher_->AppendChunkToUpload(data, is_final);
 626   }
 627 }
 628
 629 GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
 630     : event(event_value) {
 631 }
 632
 633 GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() {
 634 }
 635
 636 }  // namespace content