1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/supervised_user/experimental/supervised_user_async_url_checker.h"
9 #include "base/callback.h"
10 #include "base/json/json_reader.h"
11 #include "base/metrics/histogram.h"
12 #include "base/stl_util.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/time/time.h"
17 #include "base/values.h"
18 #include "components/google/core/browser/google_util.h"
19 #include "google_apis/google_api_keys.h"
20 #include "net/base/escape.h"
21 #include "net/base/load_flags.h"
22 #include "net/url_request/url_fetcher.h"
23 #include "net/url_request/url_request_context.h"
24 #include "url/url_constants.h"
26 using net::URLFetcher
;
27 using net::URLFetcherDelegate
;
28 using net::URLRequestContextGetter
;
29 using net::URLRequestStatus
;
33 const char kQueryFormat
[] = "https://www.googleapis.com/customsearch/v1"
34 "?cx=017993620680222980993%%3A1wdumejvx5i&key=%s&q=inurl%%3A%s";
35 const char kQuerySafeParam
[] = "&safe=high";
37 const char kIdSearchInfo
[] = "searchInformation";
38 const char kIdResultCount
[] = "totalResults";
39 const char kIdResults
[] = "items";
40 const char kIdResultURL
[] = "link";
42 const size_t kDefaultCacheSize
= 1000;
44 // Build a normalized version of |url| for comparisons. Sets the scheme to a
45 // common default and strips a leading "www." from the host.
46 GURL
GetNormalizedURL(const GURL
& url
) {
47 GURL::Replacements replacements
;
48 // Set scheme to http.
49 replacements
.SetSchemeStr(url::kHttpScheme
);
50 // Strip leading "www." (if any).
51 const std::string
www("www.");
52 const std::string
host(url
.host());
53 if (base::StartsWithASCII(host
, www
, true))
54 replacements
.SetHostStr(base::StringPiece(host
).substr(www
.size()));
55 // Strip trailing slash (if any).
56 const std::string
path(url
.path());
57 if (EndsWith(path
, "/", true))
58 replacements
.SetPathStr(base::StringPiece(path
).substr(0, path
.size() - 1));
59 return url
.ReplaceComponents(replacements
);
62 // Builds a URL for a web search for |url| (using the "inurl:" query parameter
63 // and a Custom Search Engine, using the specified |api_key|). If |safe| is
64 // specified, enables the SafeSearch query parameter.
65 GURL
BuildSearchURL(const std::string
& api_key
,
68 // Strip the scheme, so that we'll match any scheme.
69 std::string query
= net::EscapeQueryParamValue(url
.GetContent(), true);
70 std::string search_url
= base::StringPrintf(
75 search_url
.append(kQuerySafeParam
);
76 return GURL(search_url
);
79 // Creates a URLFetcher for a Google web search for |url|. If |safe| is
80 // specified, enables SafeSearch for this request.
81 scoped_ptr
<net::URLFetcher
> CreateFetcher(
82 URLFetcherDelegate
* delegate
,
83 URLRequestContextGetter
* context
,
84 const std::string
& api_key
,
87 const int kSafeId
= 0;
88 const int kUnsafeId
= 1;
89 int id
= safe
? kSafeId
: kUnsafeId
;
90 scoped_ptr
<net::URLFetcher
> fetcher
= URLFetcher::Create(
91 id
, BuildSearchURL(api_key
, url
, safe
), URLFetcher::GET
, delegate
);
92 fetcher
->SetRequestContext(context
);
93 fetcher
->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES
|
94 net::LOAD_DO_NOT_SAVE_COOKIES
);
95 return fetcher
.Pass();
98 // Checks whether the search |response| (in JSON format) contains an entry for
100 bool ResponseContainsURL(const std::string
& response
, const GURL
& url
) {
101 scoped_ptr
<base::Value
> value
= base::JSONReader::Read(response
);
102 const base::DictionaryValue
* dict
= NULL
;
103 if (!value
|| !value
->GetAsDictionary(&dict
)) {
104 DLOG(WARNING
) << "ResponseContainsURL failed to parse global dictionary";
107 const base::DictionaryValue
* search_info_dict
= NULL
;
108 if (!dict
->GetDictionary(kIdSearchInfo
, &search_info_dict
)) {
109 DLOG(WARNING
) << "ResponseContainsURL failed to parse search information";
112 std::string result_count
;
113 if (!search_info_dict
->GetString(kIdResultCount
, &result_count
)) {
114 DLOG(WARNING
) << "ResponseContainsURL failed to parse result count";
117 if (result_count
== "0")
119 const base::ListValue
* results_list
= NULL
;
120 if (!dict
->GetList(kIdResults
, &results_list
)) {
121 DLOG(WARNING
) << "ResponseContainsURL failed to parse list of results";
124 GURL url_normalized
= GetNormalizedURL(url
);
125 for (const base::Value
* entry
: *results_list
) {
126 const base::DictionaryValue
* result_dict
= NULL
;
127 if (!entry
->GetAsDictionary(&result_dict
)) {
128 DLOG(WARNING
) << "ResponseContainsURL failed to parse result dictionary";
131 std::string result_url
;
132 if (!result_dict
->GetString(kIdResultURL
, &result_url
)) {
133 DLOG(WARNING
) << "ResponseContainsURL failed to parse URL from result";
136 if (url_normalized
== GetNormalizedURL(GURL(result_url
)))
144 struct SupervisedUserAsyncURLChecker::Check
{
145 Check(const GURL
& url
,
146 scoped_ptr
<net::URLFetcher
> fetcher_safe
,
147 scoped_ptr
<net::URLFetcher
> fetcher_unsafe
,
148 const CheckCallback
& callback
);
152 scoped_ptr
<net::URLFetcher
> fetcher_safe
;
153 scoped_ptr
<net::URLFetcher
> fetcher_unsafe
;
154 std::vector
<CheckCallback
> callbacks
;
157 base::Time start_time
;
160 SupervisedUserAsyncURLChecker::Check::Check(
162 scoped_ptr
<net::URLFetcher
> fetcher_safe
,
163 scoped_ptr
<net::URLFetcher
> fetcher_unsafe
,
164 const CheckCallback
& callback
)
166 fetcher_safe(fetcher_safe
.Pass()),
167 fetcher_unsafe(fetcher_unsafe
.Pass()),
168 callbacks(1, callback
),
171 start_time(base::Time::Now()) {
174 SupervisedUserAsyncURLChecker::Check::~Check() {}
176 SupervisedUserAsyncURLChecker::CheckResult::CheckResult(
177 SupervisedUserURLFilter::FilteringBehavior behavior
, bool uncertain
)
178 : behavior(behavior
), uncertain(uncertain
) {
181 SupervisedUserAsyncURLChecker::SupervisedUserAsyncURLChecker(
182 URLRequestContextGetter
* context
)
183 : context_(context
), cache_(kDefaultCacheSize
) {
186 SupervisedUserAsyncURLChecker::SupervisedUserAsyncURLChecker(
187 URLRequestContextGetter
* context
,
189 : context_(context
), cache_(cache_size
) {
192 SupervisedUserAsyncURLChecker::~SupervisedUserAsyncURLChecker() {}
194 bool SupervisedUserAsyncURLChecker::CheckURL(const GURL
& url
,
195 const CheckCallback
& callback
) {
196 // TODO(treib): Hack: For now, allow all Google URLs to save search QPS. If we
197 // ever remove this, we should find a way to allow at least the NTP.
198 if (google_util::IsGoogleDomainUrl(url
,
199 google_util::ALLOW_SUBDOMAIN
,
200 google_util::ALLOW_NON_STANDARD_PORTS
)) {
201 callback
.Run(url
, SupervisedUserURLFilter::ALLOW
, false);
204 // TODO(treib): Hack: For now, allow all YouTube URLs since YouTube has its
205 // own Safety Mode anyway.
206 if (google_util::IsYoutubeDomainUrl(url
,
207 google_util::ALLOW_SUBDOMAIN
,
208 google_util::ALLOW_NON_STANDARD_PORTS
)) {
209 callback
.Run(url
, SupervisedUserURLFilter::ALLOW
, false);
213 auto cache_it
= cache_
.Get(url
);
214 if (cache_it
!= cache_
.end()) {
215 const CheckResult
& result
= cache_it
->second
;
216 DVLOG(1) << "Cache hit! " << url
.spec() << " is "
217 << (result
.behavior
== SupervisedUserURLFilter::BLOCK
? "NOT" : "")
218 << " safe; certain: " << !result
.uncertain
;
219 callback
.Run(url
, result
.behavior
, result
.uncertain
);
223 // See if we already have a check in progress for this URL.
224 for (Check
* check
: checks_in_progress_
) {
225 if (check
->url
== url
) {
226 DVLOG(1) << "Adding to pending check for " << url
.spec();
227 check
->callbacks
.push_back(callback
);
232 DVLOG(1) << "Checking URL " << url
;
233 std::string api_key
= google_apis::GetSafeSitesAPIKey();
234 scoped_ptr
<URLFetcher
> fetcher_safe(
235 CreateFetcher(this, context_
, api_key
, url
, true));
236 scoped_ptr
<URLFetcher
> fetcher_unsafe(
237 CreateFetcher(this, context_
, api_key
, url
, false));
238 fetcher_safe
->Start();
239 fetcher_unsafe
->Start();
240 checks_in_progress_
.push_back(
241 new Check(url
, fetcher_safe
.Pass(), fetcher_unsafe
.Pass(), callback
));
245 void SupervisedUserAsyncURLChecker::OnURLFetchComplete(
246 const net::URLFetcher
* source
) {
247 ScopedVector
<Check
>::iterator it
= checks_in_progress_
.begin();
248 bool is_safe_search_request
= false;
249 while (it
!= checks_in_progress_
.end()) {
250 if (source
== (*it
)->fetcher_safe
.get()) {
251 is_safe_search_request
= true;
252 (*it
)->safe_done
= true;
254 } else if (source
== (*it
)->fetcher_unsafe
.get()) {
255 (*it
)->unsafe_done
= true;
260 DCHECK(it
!= checks_in_progress_
.end());
263 const URLRequestStatus
& status
= source
->GetStatus();
264 if (!status
.is_success()) {
265 DLOG(WARNING
) << "URL request failed! Letting through...";
266 for (size_t i
= 0; i
< check
->callbacks
.size(); i
++)
267 check
->callbacks
[i
].Run(check
->url
, SupervisedUserURLFilter::ALLOW
, true);
268 checks_in_progress_
.erase(it
);
272 std::string response_body
;
273 source
->GetResponseAsString(&response_body
);
274 bool url_in_search_result
= ResponseContainsURL(response_body
, check
->url
);
276 // We consider a URL as safe if it turns up in a safesearch query. To handle
277 // URLs that aren't in the search index at all, we also allows URLS that don't
278 // turn up even in a non-safesearch query.
279 SupervisedUserURLFilter::FilteringBehavior behavior
=
280 SupervisedUserURLFilter::ALLOW
;
281 bool uncertain
= true;
282 if (is_safe_search_request
) {
283 if (url_in_search_result
) {
284 // Found the URL with safesearch, don't block.
285 DVLOG(1) << check
->url
.spec() << " is safe, allowing.";
286 behavior
= SupervisedUserURLFilter::ALLOW
;
288 } else if (check
->unsafe_done
) {
289 // Found the URL only without safesearch, block.
290 DVLOG(1) << check
->url
.spec() << " is NOT safe, blocking.";
291 behavior
= SupervisedUserURLFilter::BLOCK
;
294 // Didn't find the URL with safesearch, have to wait for non-safe result.
298 if (!url_in_search_result
) {
299 // Didn't find the URL even without safesearch, have to let through.
300 DVLOG(1) << check
->url
.spec() << " is unknown, allowing.";
301 behavior
= SupervisedUserURLFilter::ALLOW
;
303 } else if (check
->safe_done
) {
304 // Found the URL only without safesearch, block.
305 DVLOG(1) << check
->url
.spec() << " is NOT safe, blocking.";
306 behavior
= SupervisedUserURLFilter::BLOCK
;
309 // Found the URL without safesearch, wait for safe result.
314 UMA_HISTOGRAM_TIMES("ManagedUsers.SafeSitesDelay",
315 base::Time::Now() - check
->start_time
);
317 cache_
.Put(check
->url
, CheckResult(behavior
, uncertain
));
319 for (size_t i
= 0; i
< check
->callbacks
.size(); i
++)
320 check
->callbacks
[i
].Run(check
->url
, behavior
, uncertain
);
321 checks_in_progress_
.erase(it
);