Update V8 to version 4.6.55.
[chromium-blink-merge.git] / content / browser / speech / google_one_shot_remote_engine.cc
blob5cc68e175ac4fe2b6cd1dd7d7656e24b9083a7ca
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/browser/speech/google_one_shot_remote_engine.h"
7 #include <vector>
9 #include "base/json/json_reader.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "base/values.h"
13 #include "content/browser/speech/audio_buffer.h"
14 #include "content/public/common/speech_recognition_error.h"
15 #include "content/public/common/speech_recognition_result.h"
16 #include "google_apis/google_api_keys.h"
17 #include "net/base/escape.h"
18 #include "net/base/load_flags.h"
19 #include "net/url_request/http_user_agent_settings.h"
20 #include "net/url_request/url_fetcher.h"
21 #include "net/url_request/url_request_context.h"
22 #include "net/url_request/url_request_context_getter.h"
23 #include "net/url_request/url_request_status.h"
25 namespace content {
26 namespace {
28 const char* const kDefaultSpeechRecognitionUrl =
29 "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&";
30 const char* const kStatusString = "status";
31 const char* const kHypothesesString = "hypotheses";
32 const char* const kUtteranceString = "utterance";
33 const char* const kConfidenceString = "confidence";
34 const int kWebServiceStatusNoError = 0;
35 const int kWebServiceStatusNoSpeech = 4;
36 const int kWebServiceStatusNoMatch = 5;
38 bool ParseServerResponse(const std::string& response_body,
39 SpeechRecognitionResult* result,
40 SpeechRecognitionError* error) {
41 if (response_body.empty()) {
42 LOG(WARNING) << "ParseServerResponse: Response was empty.";
43 return false;
45 DVLOG(1) << "ParseServerResponse: Parsing response " << response_body;
47 // Parse the response, ignoring comments.
48 std::string error_msg;
49 scoped_ptr<base::Value> response_value(
50 base::JSONReader::DeprecatedReadAndReturnError(
51 response_body, base::JSON_PARSE_RFC, NULL, &error_msg));
52 if (response_value == NULL) {
53 LOG(WARNING) << "ParseServerResponse: JSONReader failed : " << error_msg;
54 return false;
57 if (!response_value->IsType(base::Value::TYPE_DICTIONARY)) {
58 DVLOG(1) << "ParseServerResponse: Unexpected response type "
59 << response_value->GetType();
60 return false;
62 const base::DictionaryValue* response_object =
63 static_cast<const base::DictionaryValue*>(response_value.get());
65 // Get the status.
66 int status;
67 if (!response_object->GetInteger(kStatusString, &status)) {
68 DVLOG(1) << "ParseServerResponse: " << kStatusString
69 << " is not a valid integer value.";
70 return false;
73 // Process the status.
74 switch (status) {
75 case kWebServiceStatusNoError:
76 break;
77 case kWebServiceStatusNoSpeech:
78 error->code = SPEECH_RECOGNITION_ERROR_NO_SPEECH;
79 return false;
80 case kWebServiceStatusNoMatch:
81 error->code = SPEECH_RECOGNITION_ERROR_NO_MATCH;
82 return false;
83 default:
84 error->code = SPEECH_RECOGNITION_ERROR_NETWORK;
85 // Other status codes should not be returned by the server.
86 DVLOG(1) << "ParseServerResponse: unexpected status code " << status;
87 return false;
90 // Get the hypotheses.
91 const base::Value* hypotheses_value = NULL;
92 if (!response_object->Get(kHypothesesString, &hypotheses_value)) {
93 DVLOG(1) << "ParseServerResponse: Missing hypotheses attribute.";
94 return false;
97 DCHECK(hypotheses_value);
98 if (!hypotheses_value->IsType(base::Value::TYPE_LIST)) {
99 DVLOG(1) << "ParseServerResponse: Unexpected hypotheses type "
100 << hypotheses_value->GetType();
101 return false;
104 const base::ListValue* hypotheses_list =
105 static_cast<const base::ListValue*>(hypotheses_value);
107 // For now we support only single shot recognition, so we are giving only a
108 // final result, consisting of one fragment (with one or more hypotheses).
109 size_t index = 0;
110 for (; index < hypotheses_list->GetSize(); ++index) {
111 const base::Value* hypothesis = NULL;
112 if (!hypotheses_list->Get(index, &hypothesis)) {
113 LOG(WARNING) << "ParseServerResponse: Unable to read hypothesis value.";
114 break;
116 DCHECK(hypothesis);
117 if (!hypothesis->IsType(base::Value::TYPE_DICTIONARY)) {
118 LOG(WARNING) << "ParseServerResponse: Unexpected value type "
119 << hypothesis->GetType();
120 break;
123 const base::DictionaryValue* hypothesis_value =
124 static_cast<const base::DictionaryValue*>(hypothesis);
125 base::string16 utterance;
127 if (!hypothesis_value->GetString(kUtteranceString, &utterance)) {
128 LOG(WARNING) << "ParseServerResponse: Missing utterance value.";
129 break;
132 // It is not an error if the 'confidence' field is missing.
133 double confidence = 0.0;
134 hypothesis_value->GetDouble(kConfidenceString, &confidence);
135 result->hypotheses.push_back(SpeechRecognitionHypothesis(utterance,
136 confidence));
139 if (index < hypotheses_list->GetSize()) {
140 result->hypotheses.clear();
141 return false;
143 return true;
146 } // namespace
148 const int GoogleOneShotRemoteEngine::kAudioPacketIntervalMs = 100;
149 int GoogleOneShotRemoteEngine::url_fetcher_id_for_tests = 0;
151 GoogleOneShotRemoteEngine::GoogleOneShotRemoteEngine(
152 net::URLRequestContextGetter* context)
153 : url_context_(context) {
156 GoogleOneShotRemoteEngine::~GoogleOneShotRemoteEngine() {}
158 void GoogleOneShotRemoteEngine::SetConfig(
159 const SpeechRecognitionEngineConfig& config) {
160 config_ = config;
163 void GoogleOneShotRemoteEngine::StartRecognition() {
164 DCHECK(delegate());
165 DCHECK(!url_fetcher_.get());
166 std::string lang_param = config_.language;
168 if (lang_param.empty() && url_context_.get()) {
169 // If no language is provided then we use the first from the accepted
170 // language list. If this list is empty then it defaults to "en-US".
171 // Example of the contents of this list: "es,en-GB;q=0.8", ""
172 net::URLRequestContext* request_context =
173 url_context_->GetURLRequestContext();
174 DCHECK(request_context);
175 // TODO(pauljensen): GoogleOneShotRemoteEngine should be constructed with
176 // a reference to the HttpUserAgentSettings rather than accessing the
177 // accept language through the URLRequestContext.
178 if (request_context->http_user_agent_settings()) {
179 std::string accepted_language_list =
180 request_context->http_user_agent_settings()->GetAcceptLanguage();
181 size_t separator = accepted_language_list.find_first_of(",;");
182 lang_param = accepted_language_list.substr(0, separator);
186 if (lang_param.empty())
187 lang_param = "en-US";
189 std::vector<std::string> parts;
190 parts.push_back("lang=" + net::EscapeQueryParamValue(lang_param, true));
192 if (!config_.grammars.empty()) {
193 DCHECK_EQ(config_.grammars.size(), 1U);
194 parts.push_back("lm=" + net::EscapeQueryParamValue(config_.grammars[0].url,
195 true));
198 if (!config_.hardware_info.empty())
199 parts.push_back("xhw=" + net::EscapeQueryParamValue(config_.hardware_info,
200 true));
201 parts.push_back("maxresults=" + base::UintToString(config_.max_hypotheses));
202 parts.push_back(config_.filter_profanities ? "pfilter=2" : "pfilter=0");
204 std::string api_key = google_apis::GetAPIKey();
205 parts.push_back("key=" + net::EscapeQueryParamValue(api_key, true));
207 GURL url(std::string(kDefaultSpeechRecognitionUrl) +
208 base::JoinString(parts, "&"));
210 encoder_.reset(new AudioEncoder(config_.audio_sample_rate,
211 config_.audio_num_bits_per_sample));
212 DCHECK(encoder_.get());
213 url_fetcher_ = net::URLFetcher::Create(url_fetcher_id_for_tests, url,
214 net::URLFetcher::POST, this);
215 url_fetcher_->SetChunkedUpload(encoder_->GetMimeType());
216 url_fetcher_->SetRequestContext(url_context_.get());
217 url_fetcher_->SetReferrer(config_.origin_url);
219 // The speech recognition API does not require user identification as part
220 // of requests, so we don't send cookies or auth data for these requests to
221 // prevent any accidental connection between users who are logged into the
222 // domain for other services (e.g. bookmark sync) with the speech requests.
223 url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
224 net::LOAD_DO_NOT_SEND_COOKIES |
225 net::LOAD_DO_NOT_SEND_AUTH_DATA);
226 url_fetcher_->Start();
229 void GoogleOneShotRemoteEngine::EndRecognition() {
230 url_fetcher_.reset();
233 void GoogleOneShotRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
234 DCHECK(url_fetcher_.get());
235 DCHECK(encoder_.get());
236 DCHECK_EQ(data.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
237 encoder_->Encode(data);
238 scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
239 url_fetcher_->AppendChunkToUpload(encoded_data->AsString(), false);
242 void GoogleOneShotRemoteEngine::AudioChunksEnded() {
243 DCHECK(url_fetcher_.get());
244 DCHECK(encoder_.get());
246 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet
247 // of silence in case encoder had no data already.
248 size_t sample_count =
249 config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
250 scoped_refptr<AudioChunk> dummy_chunk(new AudioChunk(
251 sample_count * sizeof(int16), encoder_->GetBitsPerSample() / 8));
252 encoder_->Encode(*dummy_chunk.get());
253 encoder_->Flush();
254 scoped_refptr<AudioChunk> encoded_dummy_data(
255 encoder_->GetEncodedDataAndClear());
256 DCHECK(!encoded_dummy_data->IsEmpty());
257 encoder_.reset();
259 url_fetcher_->AppendChunkToUpload(encoded_dummy_data->AsString(), true);
262 void GoogleOneShotRemoteEngine::OnURLFetchComplete(
263 const net::URLFetcher* source) {
264 DCHECK_EQ(url_fetcher_.get(), source);
265 SpeechRecognitionResults results;
266 results.push_back(SpeechRecognitionResult());
267 SpeechRecognitionResult& result = results.back();
268 SpeechRecognitionError error(SPEECH_RECOGNITION_ERROR_NETWORK);
269 std::string data;
271 // The default error code in case of parse errors is NETWORK_FAILURE, however
272 // ParseServerResponse can change the error to a more appropriate one.
273 bool error_occurred = (!source->GetStatus().is_success() ||
274 source->GetResponseCode() != 200 ||
275 !source->GetResponseAsString(&data) ||
276 !ParseServerResponse(data, &result, &error));
277 url_fetcher_.reset();
278 if (error_occurred) {
279 DVLOG(1) << "GoogleOneShotRemoteEngine: Network Error " << error.code;
280 delegate()->OnSpeechRecognitionEngineError(error);
281 } else {
282 DVLOG(1) << "GoogleOneShotRemoteEngine: Invoking delegate with result.";
283 delegate()->OnSpeechRecognitionEngineResults(results);
287 bool GoogleOneShotRemoteEngine::IsRecognitionPending() const {
288 return url_fetcher_ != NULL;
291 int GoogleOneShotRemoteEngine::GetDesiredAudioChunkDurationMs() const {
292 return kAudioPacketIntervalMs;
295 } // namespace content