1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/supervised_user/experimental/supervised_user_async_url_checker.h"
9 #include "base/callback.h"
10 #include "base/json/json_reader.h"
11 #include "base/metrics/histogram.h"
12 #include "base/stl_util.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/time/time.h"
17 #include "base/values.h"
18 #include "components/google/core/browser/google_util.h"
19 #include "google_apis/google_api_keys.h"
20 #include "net/base/escape.h"
21 #include "net/base/load_flags.h"
22 #include "net/url_request/url_fetcher.h"
23 #include "net/url_request/url_request_context.h"
24 #include "url/url_constants.h"
26 using net::URLFetcher
;
27 using net::URLFetcherDelegate
;
28 using net::URLRequestContextGetter
;
29 using net::URLRequestStatus
;
33 const char kQueryFormat
[] = "https://www.googleapis.com/customsearch/v1"
34 "?cx=017993620680222980993%%3A1wdumejvx5i&key=%s&q=inurl%%3A%s";
35 const char kQuerySafeParam
[] = "&safe=high";
37 const char kIdSearchInfo
[] = "searchInformation";
38 const char kIdResultCount
[] = "totalResults";
39 const char kIdResults
[] = "items";
40 const char kIdResultURL
[] = "link";
42 const size_t kDefaultCacheSize
= 1000;
44 // Build a normalized version of |url| for comparisons. Sets the scheme to a
45 // common default and strips a leading "www." from the host.
46 GURL
GetNormalizedURL(const GURL
& url
) {
47 GURL::Replacements replacements
;
48 // Set scheme to http.
49 replacements
.SetSchemeStr(url::kHttpScheme
);
50 // Strip leading "www." (if any).
51 const std::string
www("www.");
52 const std::string
host(url
.host());
53 if (base::StartsWith(host
, www
, base::CompareCase::SENSITIVE
))
54 replacements
.SetHostStr(base::StringPiece(host
).substr(www
.size()));
55 // Strip trailing slash (if any).
56 const std::string
path(url
.path());
57 if (base::EndsWith(path
, "/", base::CompareCase::SENSITIVE
))
58 replacements
.SetPathStr(base::StringPiece(path
).substr(0, path
.size() - 1));
59 return url
.ReplaceComponents(replacements
);
62 // Builds a URL for a web search for |url| (using the "inurl:" query parameter
63 // and a Custom Search Engine, using the specified |api_key|). If |safe| is
64 // specified, enables the SafeSearch query parameter.
65 GURL
BuildSearchURL(const std::string
& api_key
,
68 // Normalize the URL and strip the scheme.
70 net::EscapeQueryParamValue(GetNormalizedURL(url
).GetContent(), true);
71 std::string search_url
= base::StringPrintf(
76 search_url
.append(kQuerySafeParam
);
77 return GURL(search_url
);
80 // Creates a URLFetcher for a Google web search for |url|. If |safe| is
81 // specified, enables SafeSearch for this request.
82 scoped_ptr
<net::URLFetcher
> CreateFetcher(
83 URLFetcherDelegate
* delegate
,
84 URLRequestContextGetter
* context
,
85 const std::string
& api_key
,
88 const int kSafeId
= 0;
89 const int kUnsafeId
= 1;
90 int id
= safe
? kSafeId
: kUnsafeId
;
91 scoped_ptr
<net::URLFetcher
> fetcher
= URLFetcher::Create(
92 id
, BuildSearchURL(api_key
, url
, safe
), URLFetcher::GET
, delegate
);
93 fetcher
->SetRequestContext(context
);
94 fetcher
->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES
|
95 net::LOAD_DO_NOT_SAVE_COOKIES
);
96 return fetcher
.Pass();
99 // Checks whether the search |response| (in JSON format) contains an entry for
101 bool ResponseContainsURL(const std::string
& response
, const GURL
& url
) {
102 scoped_ptr
<base::Value
> value
= base::JSONReader::Read(response
);
103 const base::DictionaryValue
* dict
= NULL
;
104 if (!value
|| !value
->GetAsDictionary(&dict
)) {
105 DLOG(WARNING
) << "ResponseContainsURL failed to parse global dictionary";
108 const base::DictionaryValue
* search_info_dict
= NULL
;
109 if (!dict
->GetDictionary(kIdSearchInfo
, &search_info_dict
)) {
110 DLOG(WARNING
) << "ResponseContainsURL failed to parse search information";
113 std::string result_count
;
114 if (!search_info_dict
->GetString(kIdResultCount
, &result_count
)) {
115 DLOG(WARNING
) << "ResponseContainsURL failed to parse result count";
118 if (result_count
== "0")
120 const base::ListValue
* results_list
= NULL
;
121 if (!dict
->GetList(kIdResults
, &results_list
)) {
122 DLOG(WARNING
) << "ResponseContainsURL failed to parse list of results";
125 GURL url_normalized
= GetNormalizedURL(url
);
126 for (const base::Value
* entry
: *results_list
) {
127 const base::DictionaryValue
* result_dict
= NULL
;
128 if (!entry
->GetAsDictionary(&result_dict
)) {
129 DLOG(WARNING
) << "ResponseContainsURL failed to parse result dictionary";
132 std::string result_url
;
133 if (!result_dict
->GetString(kIdResultURL
, &result_url
)) {
134 DLOG(WARNING
) << "ResponseContainsURL failed to parse URL from result";
137 if (url_normalized
== GetNormalizedURL(GURL(result_url
)))
145 struct SupervisedUserAsyncURLChecker::Check
{
146 Check(const GURL
& url
,
147 scoped_ptr
<net::URLFetcher
> fetcher_safe
,
148 scoped_ptr
<net::URLFetcher
> fetcher_unsafe
,
149 const CheckCallback
& callback
);
153 scoped_ptr
<net::URLFetcher
> fetcher_safe
;
154 scoped_ptr
<net::URLFetcher
> fetcher_unsafe
;
155 std::vector
<CheckCallback
> callbacks
;
158 base::Time start_time
;
161 SupervisedUserAsyncURLChecker::Check::Check(
163 scoped_ptr
<net::URLFetcher
> fetcher_safe
,
164 scoped_ptr
<net::URLFetcher
> fetcher_unsafe
,
165 const CheckCallback
& callback
)
167 fetcher_safe(fetcher_safe
.Pass()),
168 fetcher_unsafe(fetcher_unsafe
.Pass()),
169 callbacks(1, callback
),
172 start_time(base::Time::Now()) {
175 SupervisedUserAsyncURLChecker::Check::~Check() {}
177 SupervisedUserAsyncURLChecker::CheckResult::CheckResult(
178 SupervisedUserURLFilter::FilteringBehavior behavior
, bool uncertain
)
179 : behavior(behavior
), uncertain(uncertain
) {
182 SupervisedUserAsyncURLChecker::SupervisedUserAsyncURLChecker(
183 URLRequestContextGetter
* context
)
184 : context_(context
), cache_(kDefaultCacheSize
) {
187 SupervisedUserAsyncURLChecker::SupervisedUserAsyncURLChecker(
188 URLRequestContextGetter
* context
,
190 : context_(context
), cache_(cache_size
) {
193 SupervisedUserAsyncURLChecker::~SupervisedUserAsyncURLChecker() {}
195 bool SupervisedUserAsyncURLChecker::CheckURL(const GURL
& url
,
196 const CheckCallback
& callback
) {
197 // TODO(treib): Hack: For now, allow all Google URLs to save search QPS. If we
198 // ever remove this, we should find a way to allow at least the NTP.
199 if (google_util::IsGoogleDomainUrl(url
,
200 google_util::ALLOW_SUBDOMAIN
,
201 google_util::ALLOW_NON_STANDARD_PORTS
)) {
202 callback
.Run(url
, SupervisedUserURLFilter::ALLOW
, false);
205 // TODO(treib): Hack: For now, allow all YouTube URLs since YouTube has its
206 // own Safety Mode anyway.
207 if (google_util::IsYoutubeDomainUrl(url
,
208 google_util::ALLOW_SUBDOMAIN
,
209 google_util::ALLOW_NON_STANDARD_PORTS
)) {
210 callback
.Run(url
, SupervisedUserURLFilter::ALLOW
, false);
214 auto cache_it
= cache_
.Get(url
);
215 if (cache_it
!= cache_
.end()) {
216 const CheckResult
& result
= cache_it
->second
;
217 DVLOG(1) << "Cache hit! " << url
.spec() << " is "
218 << (result
.behavior
== SupervisedUserURLFilter::BLOCK
? "NOT" : "")
219 << " safe; certain: " << !result
.uncertain
;
220 callback
.Run(url
, result
.behavior
, result
.uncertain
);
224 // See if we already have a check in progress for this URL.
225 for (Check
* check
: checks_in_progress_
) {
226 if (check
->url
== url
) {
227 DVLOG(1) << "Adding to pending check for " << url
.spec();
228 check
->callbacks
.push_back(callback
);
233 DVLOG(1) << "Checking URL " << url
;
234 std::string api_key
= google_apis::GetSafeSitesAPIKey();
235 scoped_ptr
<URLFetcher
> fetcher_safe(
236 CreateFetcher(this, context_
, api_key
, url
, true));
237 scoped_ptr
<URLFetcher
> fetcher_unsafe(
238 CreateFetcher(this, context_
, api_key
, url
, false));
239 fetcher_safe
->Start();
240 fetcher_unsafe
->Start();
241 checks_in_progress_
.push_back(
242 new Check(url
, fetcher_safe
.Pass(), fetcher_unsafe
.Pass(), callback
));
246 void SupervisedUserAsyncURLChecker::OnURLFetchComplete(
247 const net::URLFetcher
* source
) {
248 ScopedVector
<Check
>::iterator it
= checks_in_progress_
.begin();
249 bool is_safe_search_request
= false;
250 while (it
!= checks_in_progress_
.end()) {
251 if (source
== (*it
)->fetcher_safe
.get()) {
252 is_safe_search_request
= true;
253 (*it
)->safe_done
= true;
255 } else if (source
== (*it
)->fetcher_unsafe
.get()) {
256 (*it
)->unsafe_done
= true;
261 DCHECK(it
!= checks_in_progress_
.end());
264 const URLRequestStatus
& status
= source
->GetStatus();
265 if (!status
.is_success()) {
266 DLOG(WARNING
) << "URL request failed! Letting through...";
267 for (size_t i
= 0; i
< check
->callbacks
.size(); i
++)
268 check
->callbacks
[i
].Run(check
->url
, SupervisedUserURLFilter::ALLOW
, true);
269 checks_in_progress_
.erase(it
);
273 std::string response_body
;
274 source
->GetResponseAsString(&response_body
);
275 bool url_in_search_result
= ResponseContainsURL(response_body
, check
->url
);
277 // We consider a URL as safe if it turns up in a safesearch query. To handle
278 // URLs that aren't in the search index at all, we also allows URLS that don't
279 // turn up even in a non-safesearch query.
280 SupervisedUserURLFilter::FilteringBehavior behavior
=
281 SupervisedUserURLFilter::ALLOW
;
282 bool uncertain
= true;
283 if (is_safe_search_request
) {
284 if (url_in_search_result
) {
285 // Found the URL with safesearch, don't block.
286 DVLOG(1) << check
->url
.spec() << " is safe, allowing.";
287 behavior
= SupervisedUserURLFilter::ALLOW
;
289 } else if (check
->unsafe_done
) {
290 // Found the URL only without safesearch, block.
291 DVLOG(1) << check
->url
.spec() << " is NOT safe, blocking.";
292 behavior
= SupervisedUserURLFilter::BLOCK
;
295 // Didn't find the URL with safesearch, have to wait for non-safe result.
299 if (!url_in_search_result
) {
300 // Didn't find the URL even without safesearch, have to let through.
301 DVLOG(1) << check
->url
.spec() << " is unknown, allowing.";
302 behavior
= SupervisedUserURLFilter::ALLOW
;
304 } else if (check
->safe_done
) {
305 // Found the URL only without safesearch, block.
306 DVLOG(1) << check
->url
.spec() << " is NOT safe, blocking.";
307 behavior
= SupervisedUserURLFilter::BLOCK
;
310 // Found the URL without safesearch, wait for safe result.
315 UMA_HISTOGRAM_TIMES("ManagedUsers.SafeSitesDelay",
316 base::Time::Now() - check
->start_time
);
318 cache_
.Put(check
->url
, CheckResult(behavior
, uncertain
));
320 for (size_t i
= 0; i
< check
->callbacks
.size(); i
++)
321 check
->callbacks
[i
].Run(check
->url
, behavior
, uncertain
);
322 checks_in_progress_
.erase(it
);