[sql] Remove _HAS_EXCEPTIONS=0 from build info.
[chromium-blink-merge.git] / chrome / browser / supervised_user / experimental / supervised_user_async_url_checker.cc
blob00417e96252da43a6df3fb7781fa946784597548
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/supervised_user/experimental/supervised_user_async_url_checker.h"
7 #include <string>
9 #include "base/callback.h"
10 #include "base/json/json_reader.h"
11 #include "base/metrics/histogram.h"
12 #include "base/stl_util.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/time/time.h"
17 #include "base/values.h"
18 #include "components/google/core/browser/google_util.h"
19 #include "google_apis/google_api_keys.h"
20 #include "net/base/escape.h"
21 #include "net/base/load_flags.h"
22 #include "net/url_request/url_fetcher.h"
23 #include "net/url_request/url_request_context.h"
24 #include "url/url_constants.h"
26 using net::URLFetcher;
27 using net::URLFetcherDelegate;
28 using net::URLRequestContextGetter;
29 using net::URLRequestStatus;
31 namespace {
33 const char kQueryFormat[] = "https://www.googleapis.com/customsearch/v1"
34 "?cx=017993620680222980993%%3A1wdumejvx5i&key=%s&q=inurl%%3A%s";
35 const char kQuerySafeParam[] = "&safe=high";
37 const char kIdSearchInfo[] = "searchInformation";
38 const char kIdResultCount[] = "totalResults";
39 const char kIdResults[] = "items";
40 const char kIdResultURL[] = "link";
42 const size_t kDefaultCacheSize = 1000;
44 // Build a normalized version of |url| for comparisons. Sets the scheme to a
45 // common default and strips a leading "www." from the host.
46 GURL GetNormalizedURL(const GURL& url) {
47 GURL::Replacements replacements;
48 // Set scheme to http.
49 replacements.SetSchemeStr(url::kHttpScheme);
50 // Strip leading "www." (if any).
51 const std::string www("www.");
52 const std::string host(url.host());
53 if (base::StartsWith(host, www, base::CompareCase::SENSITIVE))
54 replacements.SetHostStr(base::StringPiece(host).substr(www.size()));
55 // Strip trailing slash (if any).
56 const std::string path(url.path());
57 if (base::EndsWith(path, "/", base::CompareCase::SENSITIVE))
58 replacements.SetPathStr(base::StringPiece(path).substr(0, path.size() - 1));
59 return url.ReplaceComponents(replacements);
62 // Builds a URL for a web search for |url| (using the "inurl:" query parameter
63 // and a Custom Search Engine, using the specified |api_key|). If |safe| is
64 // specified, enables the SafeSearch query parameter.
65 GURL BuildSearchURL(const std::string& api_key,
66 const GURL& url,
67 bool safe) {
68 // Normalize the URL and strip the scheme.
69 std::string query =
70 net::EscapeQueryParamValue(GetNormalizedURL(url).GetContent(), true);
71 std::string search_url = base::StringPrintf(
72 kQueryFormat,
73 api_key.c_str(),
74 query.c_str());
75 if (safe)
76 search_url.append(kQuerySafeParam);
77 return GURL(search_url);
80 // Creates a URLFetcher for a Google web search for |url|. If |safe| is
81 // specified, enables SafeSearch for this request.
82 scoped_ptr<net::URLFetcher> CreateFetcher(
83 URLFetcherDelegate* delegate,
84 URLRequestContextGetter* context,
85 const std::string& api_key,
86 const GURL& url,
87 bool safe) {
88 const int kSafeId = 0;
89 const int kUnsafeId = 1;
90 int id = safe ? kSafeId : kUnsafeId;
91 scoped_ptr<net::URLFetcher> fetcher = URLFetcher::Create(
92 id, BuildSearchURL(api_key, url, safe), URLFetcher::GET, delegate);
93 fetcher->SetRequestContext(context);
94 fetcher->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
95 net::LOAD_DO_NOT_SAVE_COOKIES);
96 return fetcher.Pass();
99 // Checks whether the search |response| (in JSON format) contains an entry for
100 // the given |url|.
101 bool ResponseContainsURL(const std::string& response, const GURL& url) {
102 scoped_ptr<base::Value> value = base::JSONReader::Read(response);
103 const base::DictionaryValue* dict = NULL;
104 if (!value || !value->GetAsDictionary(&dict)) {
105 DLOG(WARNING) << "ResponseContainsURL failed to parse global dictionary";
106 return false;
108 const base::DictionaryValue* search_info_dict = NULL;
109 if (!dict->GetDictionary(kIdSearchInfo, &search_info_dict)) {
110 DLOG(WARNING) << "ResponseContainsURL failed to parse search information";
111 return false;
113 std::string result_count;
114 if (!search_info_dict->GetString(kIdResultCount, &result_count)) {
115 DLOG(WARNING) << "ResponseContainsURL failed to parse result count";
116 return false;
118 if (result_count == "0")
119 return false;
120 const base::ListValue* results_list = NULL;
121 if (!dict->GetList(kIdResults, &results_list)) {
122 DLOG(WARNING) << "ResponseContainsURL failed to parse list of results";
123 return false;
125 GURL url_normalized = GetNormalizedURL(url);
126 for (const base::Value* entry : *results_list) {
127 const base::DictionaryValue* result_dict = NULL;
128 if (!entry->GetAsDictionary(&result_dict)) {
129 DLOG(WARNING) << "ResponseContainsURL failed to parse result dictionary";
130 return false;
132 std::string result_url;
133 if (!result_dict->GetString(kIdResultURL, &result_url)) {
134 DLOG(WARNING) << "ResponseContainsURL failed to parse URL from result";
135 return false;
137 if (url_normalized == GetNormalizedURL(GURL(result_url)))
138 return true;
140 return false;
143 } // namespace
145 struct SupervisedUserAsyncURLChecker::Check {
146 Check(const GURL& url,
147 scoped_ptr<net::URLFetcher> fetcher_safe,
148 scoped_ptr<net::URLFetcher> fetcher_unsafe,
149 const CheckCallback& callback);
150 ~Check();
152 GURL url;
153 scoped_ptr<net::URLFetcher> fetcher_safe;
154 scoped_ptr<net::URLFetcher> fetcher_unsafe;
155 std::vector<CheckCallback> callbacks;
156 bool safe_done;
157 bool unsafe_done;
158 base::Time start_time;
161 SupervisedUserAsyncURLChecker::Check::Check(
162 const GURL& url,
163 scoped_ptr<net::URLFetcher> fetcher_safe,
164 scoped_ptr<net::URLFetcher> fetcher_unsafe,
165 const CheckCallback& callback)
166 : url(url),
167 fetcher_safe(fetcher_safe.Pass()),
168 fetcher_unsafe(fetcher_unsafe.Pass()),
169 callbacks(1, callback),
170 safe_done(false),
171 unsafe_done(false),
172 start_time(base::Time::Now()) {
175 SupervisedUserAsyncURLChecker::Check::~Check() {}
177 SupervisedUserAsyncURLChecker::CheckResult::CheckResult(
178 SupervisedUserURLFilter::FilteringBehavior behavior, bool uncertain)
179 : behavior(behavior), uncertain(uncertain) {
182 SupervisedUserAsyncURLChecker::SupervisedUserAsyncURLChecker(
183 URLRequestContextGetter* context)
184 : context_(context), cache_(kDefaultCacheSize) {
187 SupervisedUserAsyncURLChecker::SupervisedUserAsyncURLChecker(
188 URLRequestContextGetter* context,
189 size_t cache_size)
190 : context_(context), cache_(cache_size) {
193 SupervisedUserAsyncURLChecker::~SupervisedUserAsyncURLChecker() {}
195 bool SupervisedUserAsyncURLChecker::CheckURL(const GURL& url,
196 const CheckCallback& callback) {
197 // TODO(treib): Hack: For now, allow all Google URLs to save search QPS. If we
198 // ever remove this, we should find a way to allow at least the NTP.
199 if (google_util::IsGoogleDomainUrl(url,
200 google_util::ALLOW_SUBDOMAIN,
201 google_util::ALLOW_NON_STANDARD_PORTS)) {
202 callback.Run(url, SupervisedUserURLFilter::ALLOW, false);
203 return true;
205 // TODO(treib): Hack: For now, allow all YouTube URLs since YouTube has its
206 // own Safety Mode anyway.
207 if (google_util::IsYoutubeDomainUrl(url,
208 google_util::ALLOW_SUBDOMAIN,
209 google_util::ALLOW_NON_STANDARD_PORTS)) {
210 callback.Run(url, SupervisedUserURLFilter::ALLOW, false);
211 return true;
214 auto cache_it = cache_.Get(url);
215 if (cache_it != cache_.end()) {
216 const CheckResult& result = cache_it->second;
217 DVLOG(1) << "Cache hit! " << url.spec() << " is "
218 << (result.behavior == SupervisedUserURLFilter::BLOCK ? "NOT" : "")
219 << " safe; certain: " << !result.uncertain;
220 callback.Run(url, result.behavior, result.uncertain);
221 return true;
224 // See if we already have a check in progress for this URL.
225 for (Check* check : checks_in_progress_) {
226 if (check->url == url) {
227 DVLOG(1) << "Adding to pending check for " << url.spec();
228 check->callbacks.push_back(callback);
229 return false;
233 DVLOG(1) << "Checking URL " << url;
234 std::string api_key = google_apis::GetSafeSitesAPIKey();
235 scoped_ptr<URLFetcher> fetcher_safe(
236 CreateFetcher(this, context_, api_key, url, true));
237 scoped_ptr<URLFetcher> fetcher_unsafe(
238 CreateFetcher(this, context_, api_key, url, false));
239 fetcher_safe->Start();
240 fetcher_unsafe->Start();
241 checks_in_progress_.push_back(
242 new Check(url, fetcher_safe.Pass(), fetcher_unsafe.Pass(), callback));
243 return false;
246 void SupervisedUserAsyncURLChecker::OnURLFetchComplete(
247 const net::URLFetcher* source) {
248 ScopedVector<Check>::iterator it = checks_in_progress_.begin();
249 bool is_safe_search_request = false;
250 while (it != checks_in_progress_.end()) {
251 if (source == (*it)->fetcher_safe.get()) {
252 is_safe_search_request = true;
253 (*it)->safe_done = true;
254 break;
255 } else if (source == (*it)->fetcher_unsafe.get()) {
256 (*it)->unsafe_done = true;
257 break;
259 ++it;
261 DCHECK(it != checks_in_progress_.end());
262 Check* check = *it;
264 const URLRequestStatus& status = source->GetStatus();
265 if (!status.is_success()) {
266 DLOG(WARNING) << "URL request failed! Letting through...";
267 for (size_t i = 0; i < check->callbacks.size(); i++)
268 check->callbacks[i].Run(check->url, SupervisedUserURLFilter::ALLOW, true);
269 checks_in_progress_.erase(it);
270 return;
273 std::string response_body;
274 source->GetResponseAsString(&response_body);
275 bool url_in_search_result = ResponseContainsURL(response_body, check->url);
277 // We consider a URL as safe if it turns up in a safesearch query. To handle
278 // URLs that aren't in the search index at all, we also allows URLS that don't
279 // turn up even in a non-safesearch query.
280 SupervisedUserURLFilter::FilteringBehavior behavior =
281 SupervisedUserURLFilter::ALLOW;
282 bool uncertain = true;
283 if (is_safe_search_request) {
284 if (url_in_search_result) {
285 // Found the URL with safesearch, don't block.
286 DVLOG(1) << check->url.spec() << " is safe, allowing.";
287 behavior = SupervisedUserURLFilter::ALLOW;
288 uncertain = false;
289 } else if (check->unsafe_done) {
290 // Found the URL only without safesearch, block.
291 DVLOG(1) << check->url.spec() << " is NOT safe, blocking.";
292 behavior = SupervisedUserURLFilter::BLOCK;
293 uncertain = false;
294 } else {
295 // Didn't find the URL with safesearch, have to wait for non-safe result.
296 return;
298 } else {
299 if (!url_in_search_result) {
300 // Didn't find the URL even without safesearch, have to let through.
301 DVLOG(1) << check->url.spec() << " is unknown, allowing.";
302 behavior = SupervisedUserURLFilter::ALLOW;
303 uncertain = true;
304 } else if (check->safe_done) {
305 // Found the URL only without safesearch, block.
306 DVLOG(1) << check->url.spec() << " is NOT safe, blocking.";
307 behavior = SupervisedUserURLFilter::BLOCK;
308 uncertain = false;
309 } else {
310 // Found the URL without safesearch, wait for safe result.
311 return;
315 UMA_HISTOGRAM_TIMES("ManagedUsers.SafeSitesDelay",
316 base::Time::Now() - check->start_time);
318 cache_.Put(check->url, CheckResult(behavior, uncertain));
320 for (size_t i = 0; i < check->callbacks.size(); i++)
321 check->callbacks[i].Run(check->url, behavior, uncertain);
322 checks_in_progress_.erase(it);