content/browser/speech/google_streaming_remote_engine.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/browser/speech/google_streaming_remote_engine.h"
   6
   7 #include <algorithm>
   8 #include <vector>
   9
  10 #include "base/big_endian.h"
  11 #include "base/bind.h"
  12 #include "base/rand_util.h"
  13 #include "base/strings/string_number_conversions.h"
  14 #include "base/strings/string_util.h"
  15 #include "base/strings/utf_string_conversions.h"
  16 #include "base/time/time.h"
  17 #include "content/browser/speech/audio_buffer.h"
  18 #include "content/browser/speech/proto/google_streaming_api.pb.h"
  19 #include "content/public/common/speech_recognition_error.h"
  20 #include "content/public/common/speech_recognition_result.h"
  21 #include "google_apis/google_api_keys.h"
  22 #include "net/base/escape.h"
  23 #include "net/base/load_flags.h"
  24 #include "net/url_request/http_user_agent_settings.h"
  25 #include "net/url_request/url_fetcher.h"
  26 #include "net/url_request/url_request_context.h"
  27 #include "net/url_request/url_request_context_getter.h"
  28 #include "net/url_request/url_request_status.h"
  29
  30 using net::URLFetcher;
  31
  32 namespace content {
  33 namespace {
  34
  35 const char kWebServiceBaseUrl[] =
  36     "https://www.google.com/speech-api/full-duplex/v1";
  37 const char kDownstreamUrl[] = "/down?";
  38 const char kUpstreamUrl[] = "/up?";
  39 const AudioEncoder::Codec kDefaultAudioCodec = AudioEncoder::CODEC_FLAC;
  40
  41 // This matches the maximum maxAlternatives value supported by the server.
  42 const uint32 kMaxMaxAlternatives = 30;
  43
  44 // TODO(hans): Remove this and other logging when we don't need it anymore.
  45 void DumpResponse(const std::string& response) {
  46   DVLOG(1) << "------------";
  47   proto::SpeechRecognitionEvent event;
  48   if (!event.ParseFromString(response)) {
  49     DVLOG(1) << "Parse failed!";
  50     return;
  51   }
  52   if (event.has_status())
  53     DVLOG(1) << "STATUS\t" << event.status();
  54   for (int i = 0; i < event.result_size(); ++i) {
  55     DVLOG(1) << "RESULT #" << i << ":";
  56     const proto::SpeechRecognitionResult& res = event.result(i);
  57     if (res.has_final())
  58       DVLOG(1) << "  final:\t" << res.final();
  59     if (res.has_stability())
  60       DVLOG(1) << "  STABILITY:\t" << res.stability();
  61     for (int j = 0; j < res.alternative_size(); ++j) {
  62       const proto::SpeechRecognitionAlternative& alt =
  63           res.alternative(j);
  64       if (alt.has_confidence())
  65         DVLOG(1) << "    CONFIDENCE:\t" << alt.confidence();
  66       if (alt.has_transcript())
  67         DVLOG(1) << "    TRANSCRIPT:\t" << alt.transcript();
  68     }
  69   }
  70 }
  71
  72 }  // namespace
  73
  74 const int GoogleStreamingRemoteEngine::kAudioPacketIntervalMs = 100;
  75 const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTesting = 0;
  76 const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTesting = 1;
  77 const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0;
  78 const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5;
  79
  80 GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine(
  81     net::URLRequestContextGetter* context)
  82     : url_context_(context),
  83       previous_response_length_(0),
  84       got_last_definitive_result_(false),
  85       is_dispatching_event_(false),
  86       use_framed_post_data_(false),
  87       state_(STATE_IDLE) {}
  88
  89 GoogleStreamingRemoteEngine::~GoogleStreamingRemoteEngine() {}
  90
  91 void GoogleStreamingRemoteEngine::SetConfig(
  92     const SpeechRecognitionEngineConfig& config) {
  93   config_ = config;
  94 }
  95
  96 void GoogleStreamingRemoteEngine::StartRecognition() {
  97   FSMEventArgs event_args(EVENT_START_RECOGNITION);
  98   DispatchEvent(event_args);
  99 }
 100
 101 void GoogleStreamingRemoteEngine::EndRecognition() {
 102   FSMEventArgs event_args(EVENT_END_RECOGNITION);
 103   DispatchEvent(event_args);
 104 }
 105
 106 void GoogleStreamingRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
 107   FSMEventArgs event_args(EVENT_AUDIO_CHUNK);
 108   event_args.audio_data = &data;
 109   DispatchEvent(event_args);
 110 }
 111
 112 void GoogleStreamingRemoteEngine::AudioChunksEnded() {
 113   FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);
 114   DispatchEvent(event_args);
 115 }
 116
 117 void GoogleStreamingRemoteEngine::OnURLFetchComplete(const URLFetcher* source) {
 118   const bool kResponseComplete = true;
 119   DispatchHTTPResponse(source, kResponseComplete);
 120 }
 121
 122 void GoogleStreamingRemoteEngine::OnURLFetchDownloadProgress(
 123     const URLFetcher* source, int64 current, int64 total) {
 124   const bool kPartialResponse = false;
 125   DispatchHTTPResponse(source, kPartialResponse);
 126 }
 127
 128 void GoogleStreamingRemoteEngine::DispatchHTTPResponse(const URLFetcher* source,
 129                                                        bool end_of_response) {
 130   DCHECK(CalledOnValidThread());
 131   DCHECK(source);
 132   const bool response_is_good = source->GetStatus().is_success() &&
 133                                 source->GetResponseCode() == 200;
 134   std::string response;
 135   if (response_is_good)
 136     source->GetResponseAsString(&response);
 137   const size_t current_response_length = response.size();
 138
 139   DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")
 140            << "HTTP, code: " << source->GetResponseCode()
 141            << "      length: " << current_response_length
 142            << "      eor: " << end_of_response;
 143
 144   // URLFetcher provides always the entire response buffer, but we are only
 145   // interested in the fresh data introduced by the last chunk. Therefore, we
 146   // drop the previous content we have already processed.
 147   if (current_response_length != 0) {
 148     DCHECK_GE(current_response_length, previous_response_length_);
 149     response.erase(0, previous_response_length_);
 150     previous_response_length_ = current_response_length;
 151   }
 152
 153   if (!response_is_good && source == downstream_fetcher_.get()) {
 154     DVLOG(1) << "Downstream error " << source->GetResponseCode();
 155     FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);
 156     DispatchEvent(event_args);
 157     return;
 158   }
 159   if (!response_is_good && source == upstream_fetcher_.get()) {
 160     DVLOG(1) << "Upstream error " << source->GetResponseCode()
 161              << " EOR " << end_of_response;
 162     FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);
 163     DispatchEvent(event_args);
 164     return;
 165   }
 166
 167   // Ignore incoming data on the upstream connection.
 168   if (source == upstream_fetcher_.get())
 169     return;
 170
 171   DCHECK(response_is_good && source == downstream_fetcher_.get());
 172
 173   // The downstream response is organized in chunks, whose size is determined
 174   // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.
 175   // Such chunks are sent by the speech recognition webservice over the HTTP
 176   // downstream channel using HTTP chunked transfer (unrelated to our chunks).
 177   // This function is called every time an HTTP chunk is received by the
 178   // url fetcher. However there isn't any particular matching beween our
 179   // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can
 180   // contain a portion of one chunk or even more chunks together.
 181   chunked_byte_buffer_.Append(response);
 182
 183   // A single HTTP chunk can contain more than one data chunk, thus the while.
 184   while (chunked_byte_buffer_.HasChunks()) {
 185     FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);
 186     event_args.response = chunked_byte_buffer_.PopChunk();
 187     DCHECK(event_args.response.get());
 188     DumpResponse(std::string(event_args.response->begin(),
 189                              event_args.response->end()));
 190     DispatchEvent(event_args);
 191   }
 192   if (end_of_response) {
 193     FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);
 194     DispatchEvent(event_args);
 195   }
 196 }
 197
 198 bool GoogleStreamingRemoteEngine::IsRecognitionPending() const {
 199   DCHECK(CalledOnValidThread());
 200   return state_ != STATE_IDLE;
 201 }
 202
 203 int GoogleStreamingRemoteEngine::GetDesiredAudioChunkDurationMs() const {
 204   return kAudioPacketIntervalMs;
 205 }
 206
 207 // -----------------------  Core FSM implementation ---------------------------
 208
 209 void GoogleStreamingRemoteEngine::DispatchEvent(
 210     const FSMEventArgs& event_args) {
 211   DCHECK(CalledOnValidThread());
 212   DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
 213   DCHECK_LE(state_, STATE_MAX_VALUE);
 214
 215   // Event dispatching must be sequential, otherwise it will break all the rules
 216   // and the assumptions of the finite state automata model.
 217   DCHECK(!is_dispatching_event_);
 218   is_dispatching_event_ = true;
 219
 220   state_ = ExecuteTransitionAndGetNextState(event_args);
 221
 222   is_dispatching_event_ = false;
 223 }
 224
 225 GoogleStreamingRemoteEngine::FSMState
 226 GoogleStreamingRemoteEngine::ExecuteTransitionAndGetNextState(
 227     const FSMEventArgs& event_args) {
 228   const FSMEvent event = event_args.event;
 229   switch (state_) {
 230     case STATE_IDLE:
 231       switch (event) {
 232         case EVENT_START_RECOGNITION:
 233           return ConnectBothStreams(event_args);
 234         case EVENT_END_RECOGNITION:
 235         // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of
 236         // abort, so we just silently drop them here.
 237         case EVENT_AUDIO_CHUNK:
 238         case EVENT_AUDIO_CHUNKS_ENDED:
 239         // DOWNSTREAM_CLOSED can be received if we end up here due to an error.
 240         case EVENT_DOWNSTREAM_CLOSED:
 241           return DoNothing(event_args);
 242         case EVENT_UPSTREAM_ERROR:
 243         case EVENT_DOWNSTREAM_ERROR:
 244         case EVENT_DOWNSTREAM_RESPONSE:
 245           return NotFeasible(event_args);
 246       }
 247       break;
 248     case STATE_BOTH_STREAMS_CONNECTED:
 249       switch (event) {
 250         case EVENT_AUDIO_CHUNK:
 251           return TransmitAudioUpstream(event_args);
 252         case EVENT_DOWNSTREAM_RESPONSE:
 253           return ProcessDownstreamResponse(event_args);
 254         case EVENT_AUDIO_CHUNKS_ENDED:
 255           return CloseUpstreamAndWaitForResults(event_args);
 256         case EVENT_END_RECOGNITION:
 257           return AbortSilently(event_args);
 258         case EVENT_UPSTREAM_ERROR:
 259         case EVENT_DOWNSTREAM_ERROR:
 260         case EVENT_DOWNSTREAM_CLOSED:
 261           return AbortWithError(event_args);
 262         case EVENT_START_RECOGNITION:
 263           return NotFeasible(event_args);
 264       }
 265       break;
 266     case STATE_WAITING_DOWNSTREAM_RESULTS:
 267       switch (event) {
 268         case EVENT_DOWNSTREAM_RESPONSE:
 269           return ProcessDownstreamResponse(event_args);
 270         case EVENT_DOWNSTREAM_CLOSED:
 271           return RaiseNoMatchErrorIfGotNoResults(event_args);
 272         case EVENT_END_RECOGNITION:
 273           return AbortSilently(event_args);
 274         case EVENT_UPSTREAM_ERROR:
 275         case EVENT_DOWNSTREAM_ERROR:
 276           return AbortWithError(event_args);
 277         case EVENT_START_RECOGNITION:
 278         case EVENT_AUDIO_CHUNK:
 279         case EVENT_AUDIO_CHUNKS_ENDED:
 280           return NotFeasible(event_args);
 281       }
 282       break;
 283   }
 284   return NotFeasible(event_args);
 285 }
 286
 287 // ----------- Contract for all the FSM evolution functions below -------------
 288 //  - Are guaranteed to be executed in the same thread (IO, except for tests);
 289 //  - Are guaranteed to be not reentrant (themselves and each other);
 290 //  - event_args members are guaranteed to be stable during the call;
 291
 292 GoogleStreamingRemoteEngine::FSMState
 293 GoogleStreamingRemoteEngine::ConnectBothStreams(const FSMEventArgs&) {
 294   DCHECK(!upstream_fetcher_.get());
 295   DCHECK(!downstream_fetcher_.get());
 296
 297   encoder_.reset(AudioEncoder::Create(kDefaultAudioCodec,
 298                                       config_.audio_sample_rate,
 299                                       config_.audio_num_bits_per_sample));
 300   DCHECK(encoder_.get());
 301   const std::string request_key = GenerateRequestKey();
 302
 303   // Only use the framed post data format when a preamble needs to be logged.
 304   use_framed_post_data_ = (config_.preamble &&
 305                            !config_.preamble->sample_data.empty() &&
 306                            !config_.auth_token.empty() &&
 307                            !config_.auth_scope.empty());
 308   if (use_framed_post_data_) {
 309     preamble_encoder_.reset(AudioEncoder::Create(
 310         kDefaultAudioCodec,
 311         config_.preamble->sample_rate,
 312         config_.preamble->sample_depth * 8));
 313   }
 314
 315   // Setup downstream fetcher.
 316   std::vector<std::string> downstream_args;
 317   downstream_args.push_back(
 318       "key=" + net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
 319   downstream_args.push_back("pair=" + request_key);
 320   downstream_args.push_back("output=pb");
 321   GURL downstream_url(std::string(kWebServiceBaseUrl) +
 322                       std::string(kDownstreamUrl) +
 323                       JoinString(downstream_args, '&'));
 324
 325   downstream_fetcher_.reset(URLFetcher::Create(
 326       kDownstreamUrlFetcherIdForTesting, downstream_url, URLFetcher::GET,
 327       this));
 328   downstream_fetcher_->SetRequestContext(url_context_.get());
 329   downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
 330                                     net::LOAD_DO_NOT_SEND_COOKIES |
 331                                     net::LOAD_DO_NOT_SEND_AUTH_DATA);
 332   downstream_fetcher_->Start();
 333
 334   // Setup upstream fetcher.
 335   // TODO(hans): Support for user-selected grammars.
 336   std::vector<std::string> upstream_args;
 337   upstream_args.push_back("key=" +
 338       net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
 339   upstream_args.push_back("pair=" + request_key);
 340   upstream_args.push_back("output=pb");
 341   upstream_args.push_back(
 342       "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));
 343   upstream_args.push_back(
 344       config_.filter_profanities ? "pFilter=2" : "pFilter=0");
 345   if (config_.max_hypotheses > 0U) {
 346     int max_alternatives = std::min(kMaxMaxAlternatives,
 347                                     config_.max_hypotheses);
 348     upstream_args.push_back("maxAlternatives=" +
 349                             base::UintToString(max_alternatives));
 350   }
 351   upstream_args.push_back("client=chromium");
 352   if (!config_.hardware_info.empty()) {
 353     upstream_args.push_back(
 354         "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));
 355   }
 356   if (config_.continuous)
 357     upstream_args.push_back("continuous");
 358   if (config_.interim_results)
 359     upstream_args.push_back("interim");
 360   if (!config_.auth_token.empty() && !config_.auth_scope.empty()) {
 361     upstream_args.push_back(
 362         "authScope=" + net::EscapeQueryParamValue(config_.auth_scope, true));
 363     upstream_args.push_back(
 364         "authToken=" + net::EscapeQueryParamValue(config_.auth_token, true));
 365   }
 366   if (use_framed_post_data_) {
 367     std::string audio_format;
 368     if (preamble_encoder_)
 369       audio_format = preamble_encoder_->mime_type() + ",";
 370     audio_format += encoder_->mime_type();
 371     upstream_args.push_back(
 372         "audioFormat=" + net::EscapeQueryParamValue(audio_format, true));
 373   }
 374   GURL upstream_url(std::string(kWebServiceBaseUrl) +
 375                     std::string(kUpstreamUrl) +
 376                     JoinString(upstream_args, '&'));
 377
 378   upstream_fetcher_.reset(URLFetcher::Create(
 379       kUpstreamUrlFetcherIdForTesting, upstream_url, URLFetcher::POST, this));
 380   if (use_framed_post_data_)
 381     upstream_fetcher_->SetChunkedUpload("application/octet-stream");
 382   else
 383     upstream_fetcher_->SetChunkedUpload(encoder_->mime_type());
 384   upstream_fetcher_->SetRequestContext(url_context_.get());
 385   upstream_fetcher_->SetReferrer(config_.origin_url);
 386   upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
 387                                   net::LOAD_DO_NOT_SEND_COOKIES |
 388                                   net::LOAD_DO_NOT_SEND_AUTH_DATA);
 389   upstream_fetcher_->Start();
 390   previous_response_length_ = 0;
 391
 392   if (preamble_encoder_) {
 393     // Encode and send preamble right away.
 394     scoped_refptr<AudioChunk> chunk = new AudioChunk(
 395         reinterpret_cast<const uint8*>(config_.preamble->sample_data.data()),
 396         config_.preamble->sample_data.size(),
 397         config_.preamble->sample_depth);
 398     preamble_encoder_->Encode(*chunk);
 399     preamble_encoder_->Flush();
 400     scoped_refptr<AudioChunk> encoded_data(
 401         preamble_encoder_->GetEncodedDataAndClear());
 402     UploadAudioChunk(encoded_data->AsString(), FRAME_PREAMBLE_AUDIO, false);
 403   }
 404   return STATE_BOTH_STREAMS_CONNECTED;
 405 }
 406
 407 GoogleStreamingRemoteEngine::FSMState
 408 GoogleStreamingRemoteEngine::TransmitAudioUpstream(
 409     const FSMEventArgs& event_args) {
 410   DCHECK(upstream_fetcher_.get());
 411   DCHECK(event_args.audio_data.get());
 412   const AudioChunk& audio = *(event_args.audio_data.get());
 413
 414   DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
 415   encoder_->Encode(audio);
 416   scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
 417   UploadAudioChunk(encoded_data->AsString(), FRAME_RECOGNITION_AUDIO, false);
 418   return state_;
 419 }
 420
 421 GoogleStreamingRemoteEngine::FSMState
 422 GoogleStreamingRemoteEngine::ProcessDownstreamResponse(
 423     const FSMEventArgs& event_args) {
 424   DCHECK(event_args.response.get());
 425
 426   proto::SpeechRecognitionEvent ws_event;
 427   if (!ws_event.ParseFromString(std::string(event_args.response->begin(),
 428                                             event_args.response->end())))
 429     return AbortWithError(event_args);
 430
 431   // An empty (default) event is used to notify us that the upstream has
 432   // been connected. Ignore.
 433   if (!ws_event.result_size() && (!ws_event.has_status() ||
 434       ws_event.status() == proto::SpeechRecognitionEvent::STATUS_SUCCESS)) {
 435     DVLOG(1) << "Received empty response";
 436     return state_;
 437   }
 438
 439   if (ws_event.has_status()) {
 440     switch (ws_event.status()) {
 441       case proto::SpeechRecognitionEvent::STATUS_SUCCESS:
 442         break;
 443       case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:
 444         return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH);
 445       case proto::SpeechRecognitionEvent::STATUS_ABORTED:
 446         return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
 447       case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:
 448         return Abort(SPEECH_RECOGNITION_ERROR_AUDIO);
 449       case proto::SpeechRecognitionEvent::STATUS_NETWORK:
 450         return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
 451       case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:
 452         // TODO(hans): We need a better error code for this.
 453         return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
 454       case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:
 455         // TODO(hans): We need a better error code for this.
 456         return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
 457       case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:
 458         return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR);
 459       case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:
 460         // TODO(hans): We need a better error code for this.
 461         return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
 462     }
 463   }
 464
 465   SpeechRecognitionResults results;
 466   for (int i = 0; i < ws_event.result_size(); ++i) {
 467     const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);
 468     results.push_back(SpeechRecognitionResult());
 469     SpeechRecognitionResult& result = results.back();
 470     result.is_provisional = !(ws_result.has_final() && ws_result.final());
 471
 472     if (!result.is_provisional)
 473       got_last_definitive_result_ = true;
 474
 475     for (int j = 0; j < ws_result.alternative_size(); ++j) {
 476       const proto::SpeechRecognitionAlternative& ws_alternative =
 477           ws_result.alternative(j);
 478       SpeechRecognitionHypothesis hypothesis;
 479       if (ws_alternative.has_confidence())
 480         hypothesis.confidence = ws_alternative.confidence();
 481       else if (ws_result.has_stability())
 482         hypothesis.confidence = ws_result.stability();
 483       DCHECK(ws_alternative.has_transcript());
 484       // TODO(hans): Perhaps the transcript should be required in the proto?
 485       if (ws_alternative.has_transcript())
 486         hypothesis.utterance = base::UTF8ToUTF16(ws_alternative.transcript());
 487
 488       result.hypotheses.push_back(hypothesis);
 489     }
 490   }
 491
 492   delegate()->OnSpeechRecognitionEngineResults(results);
 493
 494   return state_;
 495 }
 496
 497 GoogleStreamingRemoteEngine::FSMState
 498 GoogleStreamingRemoteEngine::RaiseNoMatchErrorIfGotNoResults(
 499     const FSMEventArgs& event_args) {
 500   if (!got_last_definitive_result_) {
 501     // Provide an empty result to notify that recognition is ended with no
 502     // errors, yet neither any further results.
 503     delegate()->OnSpeechRecognitionEngineResults(SpeechRecognitionResults());
 504   }
 505   return AbortSilently(event_args);
 506 }
 507
 508 GoogleStreamingRemoteEngine::FSMState
 509 GoogleStreamingRemoteEngine::CloseUpstreamAndWaitForResults(
 510     const FSMEventArgs&) {
 511   DCHECK(upstream_fetcher_.get());
 512   DCHECK(encoder_.get());
 513
 514   DVLOG(1) <<  "Closing upstream.";
 515
 516   // The encoder requires a non-empty final buffer. So we encode a packet
 517   // of silence in case encoder had no data already.
 518   size_t sample_count =
 519       config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
 520   scoped_refptr<AudioChunk> dummy_chunk = new AudioChunk(
 521       sample_count * sizeof(int16), encoder_->bits_per_sample() / 8);
 522   encoder_->Encode(*dummy_chunk.get());
 523   encoder_->Flush();
 524   scoped_refptr<AudioChunk> encoded_dummy_data =
 525       encoder_->GetEncodedDataAndClear();
 526   DCHECK(!encoded_dummy_data->IsEmpty());
 527   encoder_.reset();
 528
 529   UploadAudioChunk(encoded_dummy_data->AsString(),
 530                    FRAME_RECOGNITION_AUDIO,
 531                    true);
 532   got_last_definitive_result_ = false;
 533   return STATE_WAITING_DOWNSTREAM_RESULTS;
 534 }
 535
 536 GoogleStreamingRemoteEngine::FSMState
 537 GoogleStreamingRemoteEngine::CloseDownstream(const FSMEventArgs&) {
 538   DCHECK(!upstream_fetcher_.get());
 539   DCHECK(downstream_fetcher_.get());
 540
 541   DVLOG(1) <<  "Closing downstream.";
 542   downstream_fetcher_.reset();
 543   return STATE_IDLE;
 544 }
 545
 546 GoogleStreamingRemoteEngine::FSMState
 547 GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) {
 548   return Abort(SPEECH_RECOGNITION_ERROR_NONE);
 549 }
 550
 551 GoogleStreamingRemoteEngine::FSMState
 552 GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) {
 553   return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
 554 }
 555
 556 GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort(
 557     SpeechRecognitionErrorCode error_code) {
 558   DVLOG(1) << "Aborting with error " << error_code;
 559
 560   if (error_code != SPEECH_RECOGNITION_ERROR_NONE) {
 561     delegate()->OnSpeechRecognitionEngineError(
 562         SpeechRecognitionError(error_code));
 563   }
 564   downstream_fetcher_.reset();
 565   upstream_fetcher_.reset();
 566   encoder_.reset();
 567   return STATE_IDLE;
 568 }
 569
 570 GoogleStreamingRemoteEngine::FSMState
 571 GoogleStreamingRemoteEngine::DoNothing(const FSMEventArgs&) {
 572   return state_;
 573 }
 574
 575 GoogleStreamingRemoteEngine::FSMState
 576 GoogleStreamingRemoteEngine::NotFeasible(const FSMEventArgs& event_args) {
 577   NOTREACHED() << "Unfeasible event " << event_args.event
 578                << " in state " << state_;
 579   return state_;
 580 }
 581
 582 std::string GoogleStreamingRemoteEngine::GetAcceptedLanguages() const {
 583   std::string langs = config_.language;
 584   if (langs.empty() && url_context_.get()) {
 585     // If no language is provided then we use the first from the accepted
 586     // language list. If this list is empty then it defaults to "en-US".
 587     // Example of the contents of this list: "es,en-GB;q=0.8", ""
 588     net::URLRequestContext* request_context =
 589         url_context_->GetURLRequestContext();
 590     DCHECK(request_context);
 591     // TODO(pauljensen): GoogleStreamingRemoteEngine should be constructed with
 592     // a reference to the HttpUserAgentSettings rather than accessing the
 593     // accept language through the URLRequestContext.
 594     if (request_context->http_user_agent_settings()) {
 595       std::string accepted_language_list =
 596           request_context->http_user_agent_settings()->GetAcceptLanguage();
 597       size_t separator = accepted_language_list.find_first_of(",;");
 598       if (separator != std::string::npos)
 599         langs = accepted_language_list.substr(0, separator);
 600     }
 601   }
 602   if (langs.empty())
 603     langs = "en-US";
 604   return langs;
 605 }
 606
 607 // TODO(primiano): Is there any utility in the codebase that already does this?
 608 std::string GoogleStreamingRemoteEngine::GenerateRequestKey() const {
 609   const int64 kKeepLowBytes = 0x00000000FFFFFFFFLL;
 610   const int64 kKeepHighBytes = 0xFFFFFFFF00000000LL;
 611
 612   // Just keep the least significant bits of timestamp, in order to reduce
 613   // probability of collisions.
 614   int64 key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) |
 615               (base::RandUint64() & kKeepHighBytes);
 616   return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));
 617 }
 618
 619 void GoogleStreamingRemoteEngine::UploadAudioChunk(const std::string& data,
 620                                                    FrameType type,
 621                                                    bool is_final) {
 622   if (use_framed_post_data_) {
 623     std::string frame(data.size() + 8, 0);
 624     base::WriteBigEndian(&frame[0], static_cast<uint32_t>(data.size()));
 625     base::WriteBigEndian(&frame[4], static_cast<uint32_t>(type));
 626     frame.replace(8, data.size(), data);
 627     upstream_fetcher_->AppendChunkToUpload(frame, is_final);
 628   } else {
 629     upstream_fetcher_->AppendChunkToUpload(data, is_final);
 630   }
 631 }
 632
 633 GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
 634     : event(event_value) {
 635 }
 636
 637 GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() {
 638 }
 639
 640 }  // namespace content