Move StartsWith[ASCII] to base namespace.
[chromium-blink-merge.git] / chrome / browser / supervised_user / experimental / supervised_user_async_url_checker.cc
blobf7eb8ae8ad8ec30d10f613b487b0ae4935e64c8a
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/supervised_user/experimental/supervised_user_async_url_checker.h"
7 #include <string>
9 #include "base/callback.h"
10 #include "base/json/json_reader.h"
11 #include "base/metrics/histogram.h"
12 #include "base/stl_util.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/time/time.h"
17 #include "base/values.h"
18 #include "components/google/core/browser/google_util.h"
19 #include "google_apis/google_api_keys.h"
20 #include "net/base/escape.h"
21 #include "net/base/load_flags.h"
22 #include "net/url_request/url_fetcher.h"
23 #include "net/url_request/url_request_context.h"
24 #include "url/url_constants.h"
26 using net::URLFetcher;
27 using net::URLFetcherDelegate;
28 using net::URLRequestContextGetter;
29 using net::URLRequestStatus;
31 namespace {
33 const char kQueryFormat[] = "https://www.googleapis.com/customsearch/v1"
34 "?cx=017993620680222980993%%3A1wdumejvx5i&key=%s&q=inurl%%3A%s";
35 const char kQuerySafeParam[] = "&safe=high";
37 const char kIdSearchInfo[] = "searchInformation";
38 const char kIdResultCount[] = "totalResults";
39 const char kIdResults[] = "items";
40 const char kIdResultURL[] = "link";
42 const size_t kDefaultCacheSize = 1000;
44 // Build a normalized version of |url| for comparisons. Sets the scheme to a
45 // common default and strips a leading "www." from the host.
46 GURL GetNormalizedURL(const GURL& url) {
47 GURL::Replacements replacements;
48 // Set scheme to http.
49 replacements.SetSchemeStr(url::kHttpScheme);
50 // Strip leading "www." (if any).
51 const std::string www("www.");
52 const std::string host(url.host());
53 if (base::StartsWithASCII(host, www, true))
54 replacements.SetHostStr(base::StringPiece(host).substr(www.size()));
55 // Strip trailing slash (if any).
56 const std::string path(url.path());
57 if (EndsWith(path, "/", true))
58 replacements.SetPathStr(base::StringPiece(path).substr(0, path.size() - 1));
59 return url.ReplaceComponents(replacements);
62 // Builds a URL for a web search for |url| (using the "inurl:" query parameter
63 // and a Custom Search Engine, using the specified |api_key|). If |safe| is
64 // specified, enables the SafeSearch query parameter.
65 GURL BuildSearchURL(const std::string& api_key,
66 const GURL& url,
67 bool safe) {
68 // Strip the scheme, so that we'll match any scheme.
69 std::string query = net::EscapeQueryParamValue(url.GetContent(), true);
70 std::string search_url = base::StringPrintf(
71 kQueryFormat,
72 api_key.c_str(),
73 query.c_str());
74 if (safe)
75 search_url.append(kQuerySafeParam);
76 return GURL(search_url);
79 // Creates a URLFetcher for a Google web search for |url|. If |safe| is
80 // specified, enables SafeSearch for this request.
81 scoped_ptr<net::URLFetcher> CreateFetcher(
82 URLFetcherDelegate* delegate,
83 URLRequestContextGetter* context,
84 const std::string& api_key,
85 const GURL& url,
86 bool safe) {
87 const int kSafeId = 0;
88 const int kUnsafeId = 1;
89 int id = safe ? kSafeId : kUnsafeId;
90 scoped_ptr<net::URLFetcher> fetcher = URLFetcher::Create(
91 id, BuildSearchURL(api_key, url, safe), URLFetcher::GET, delegate);
92 fetcher->SetRequestContext(context);
93 fetcher->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
94 net::LOAD_DO_NOT_SAVE_COOKIES);
95 return fetcher.Pass();
98 // Checks whether the search |response| (in JSON format) contains an entry for
99 // the given |url|.
100 bool ResponseContainsURL(const std::string& response, const GURL& url) {
101 scoped_ptr<base::Value> value = base::JSONReader::Read(response);
102 const base::DictionaryValue* dict = NULL;
103 if (!value || !value->GetAsDictionary(&dict)) {
104 DLOG(WARNING) << "ResponseContainsURL failed to parse global dictionary";
105 return false;
107 const base::DictionaryValue* search_info_dict = NULL;
108 if (!dict->GetDictionary(kIdSearchInfo, &search_info_dict)) {
109 DLOG(WARNING) << "ResponseContainsURL failed to parse search information";
110 return false;
112 std::string result_count;
113 if (!search_info_dict->GetString(kIdResultCount, &result_count)) {
114 DLOG(WARNING) << "ResponseContainsURL failed to parse result count";
115 return false;
117 if (result_count == "0")
118 return false;
119 const base::ListValue* results_list = NULL;
120 if (!dict->GetList(kIdResults, &results_list)) {
121 DLOG(WARNING) << "ResponseContainsURL failed to parse list of results";
122 return false;
124 GURL url_normalized = GetNormalizedURL(url);
125 for (const base::Value* entry : *results_list) {
126 const base::DictionaryValue* result_dict = NULL;
127 if (!entry->GetAsDictionary(&result_dict)) {
128 DLOG(WARNING) << "ResponseContainsURL failed to parse result dictionary";
129 return false;
131 std::string result_url;
132 if (!result_dict->GetString(kIdResultURL, &result_url)) {
133 DLOG(WARNING) << "ResponseContainsURL failed to parse URL from result";
134 return false;
136 if (url_normalized == GetNormalizedURL(GURL(result_url)))
137 return true;
139 return false;
142 } // namespace
144 struct SupervisedUserAsyncURLChecker::Check {
145 Check(const GURL& url,
146 scoped_ptr<net::URLFetcher> fetcher_safe,
147 scoped_ptr<net::URLFetcher> fetcher_unsafe,
148 const CheckCallback& callback);
149 ~Check();
151 GURL url;
152 scoped_ptr<net::URLFetcher> fetcher_safe;
153 scoped_ptr<net::URLFetcher> fetcher_unsafe;
154 std::vector<CheckCallback> callbacks;
155 bool safe_done;
156 bool unsafe_done;
157 base::Time start_time;
160 SupervisedUserAsyncURLChecker::Check::Check(
161 const GURL& url,
162 scoped_ptr<net::URLFetcher> fetcher_safe,
163 scoped_ptr<net::URLFetcher> fetcher_unsafe,
164 const CheckCallback& callback)
165 : url(url),
166 fetcher_safe(fetcher_safe.Pass()),
167 fetcher_unsafe(fetcher_unsafe.Pass()),
168 callbacks(1, callback),
169 safe_done(false),
170 unsafe_done(false),
171 start_time(base::Time::Now()) {
174 SupervisedUserAsyncURLChecker::Check::~Check() {}
176 SupervisedUserAsyncURLChecker::CheckResult::CheckResult(
177 SupervisedUserURLFilter::FilteringBehavior behavior, bool uncertain)
178 : behavior(behavior), uncertain(uncertain) {
181 SupervisedUserAsyncURLChecker::SupervisedUserAsyncURLChecker(
182 URLRequestContextGetter* context)
183 : context_(context), cache_(kDefaultCacheSize) {
186 SupervisedUserAsyncURLChecker::SupervisedUserAsyncURLChecker(
187 URLRequestContextGetter* context,
188 size_t cache_size)
189 : context_(context), cache_(cache_size) {
192 SupervisedUserAsyncURLChecker::~SupervisedUserAsyncURLChecker() {}
194 bool SupervisedUserAsyncURLChecker::CheckURL(const GURL& url,
195 const CheckCallback& callback) {
196 // TODO(treib): Hack: For now, allow all Google URLs to save search QPS. If we
197 // ever remove this, we should find a way to allow at least the NTP.
198 if (google_util::IsGoogleDomainUrl(url,
199 google_util::ALLOW_SUBDOMAIN,
200 google_util::ALLOW_NON_STANDARD_PORTS)) {
201 callback.Run(url, SupervisedUserURLFilter::ALLOW, false);
202 return true;
204 // TODO(treib): Hack: For now, allow all YouTube URLs since YouTube has its
205 // own Safety Mode anyway.
206 if (google_util::IsYoutubeDomainUrl(url,
207 google_util::ALLOW_SUBDOMAIN,
208 google_util::ALLOW_NON_STANDARD_PORTS)) {
209 callback.Run(url, SupervisedUserURLFilter::ALLOW, false);
210 return true;
213 auto cache_it = cache_.Get(url);
214 if (cache_it != cache_.end()) {
215 const CheckResult& result = cache_it->second;
216 DVLOG(1) << "Cache hit! " << url.spec() << " is "
217 << (result.behavior == SupervisedUserURLFilter::BLOCK ? "NOT" : "")
218 << " safe; certain: " << !result.uncertain;
219 callback.Run(url, result.behavior, result.uncertain);
220 return true;
223 // See if we already have a check in progress for this URL.
224 for (Check* check : checks_in_progress_) {
225 if (check->url == url) {
226 DVLOG(1) << "Adding to pending check for " << url.spec();
227 check->callbacks.push_back(callback);
228 return false;
232 DVLOG(1) << "Checking URL " << url;
233 std::string api_key = google_apis::GetSafeSitesAPIKey();
234 scoped_ptr<URLFetcher> fetcher_safe(
235 CreateFetcher(this, context_, api_key, url, true));
236 scoped_ptr<URLFetcher> fetcher_unsafe(
237 CreateFetcher(this, context_, api_key, url, false));
238 fetcher_safe->Start();
239 fetcher_unsafe->Start();
240 checks_in_progress_.push_back(
241 new Check(url, fetcher_safe.Pass(), fetcher_unsafe.Pass(), callback));
242 return false;
245 void SupervisedUserAsyncURLChecker::OnURLFetchComplete(
246 const net::URLFetcher* source) {
247 ScopedVector<Check>::iterator it = checks_in_progress_.begin();
248 bool is_safe_search_request = false;
249 while (it != checks_in_progress_.end()) {
250 if (source == (*it)->fetcher_safe.get()) {
251 is_safe_search_request = true;
252 (*it)->safe_done = true;
253 break;
254 } else if (source == (*it)->fetcher_unsafe.get()) {
255 (*it)->unsafe_done = true;
256 break;
258 ++it;
260 DCHECK(it != checks_in_progress_.end());
261 Check* check = *it;
263 const URLRequestStatus& status = source->GetStatus();
264 if (!status.is_success()) {
265 DLOG(WARNING) << "URL request failed! Letting through...";
266 for (size_t i = 0; i < check->callbacks.size(); i++)
267 check->callbacks[i].Run(check->url, SupervisedUserURLFilter::ALLOW, true);
268 checks_in_progress_.erase(it);
269 return;
272 std::string response_body;
273 source->GetResponseAsString(&response_body);
274 bool url_in_search_result = ResponseContainsURL(response_body, check->url);
276 // We consider a URL as safe if it turns up in a safesearch query. To handle
277 // URLs that aren't in the search index at all, we also allows URLS that don't
278 // turn up even in a non-safesearch query.
279 SupervisedUserURLFilter::FilteringBehavior behavior =
280 SupervisedUserURLFilter::ALLOW;
281 bool uncertain = true;
282 if (is_safe_search_request) {
283 if (url_in_search_result) {
284 // Found the URL with safesearch, don't block.
285 DVLOG(1) << check->url.spec() << " is safe, allowing.";
286 behavior = SupervisedUserURLFilter::ALLOW;
287 uncertain = false;
288 } else if (check->unsafe_done) {
289 // Found the URL only without safesearch, block.
290 DVLOG(1) << check->url.spec() << " is NOT safe, blocking.";
291 behavior = SupervisedUserURLFilter::BLOCK;
292 uncertain = false;
293 } else {
294 // Didn't find the URL with safesearch, have to wait for non-safe result.
295 return;
297 } else {
298 if (!url_in_search_result) {
299 // Didn't find the URL even without safesearch, have to let through.
300 DVLOG(1) << check->url.spec() << " is unknown, allowing.";
301 behavior = SupervisedUserURLFilter::ALLOW;
302 uncertain = true;
303 } else if (check->safe_done) {
304 // Found the URL only without safesearch, block.
305 DVLOG(1) << check->url.spec() << " is NOT safe, blocking.";
306 behavior = SupervisedUserURLFilter::BLOCK;
307 uncertain = false;
308 } else {
309 // Found the URL without safesearch, wait for safe result.
310 return;
314 UMA_HISTOGRAM_TIMES("ManagedUsers.SafeSitesDelay",
315 base::Time::Now() - check->start_time);
317 cache_.Put(check->url, CheckResult(behavior, uncertain));
319 for (size_t i = 0; i < check->callbacks.size(); i++)
320 check->callbacks[i].Run(check->url, behavior, uncertain);
321 checks_in_progress_.erase(it);