1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "base/time/default_clock.h"
13 #include "base/values.h"
14 #include "crypto/sha2.h"
15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
16 #include "net/base/sdch_observer.h"
17 #include "net/url_request/url_request_http_job.h"
21 void StripTrailingDot(GURL
* gurl
) {
22 std::string
host(gurl
->host());
27 if (*host
.rbegin() != '.')
30 host
.resize(host
.size() - 1);
32 GURL::Replacements replacements
;
33 replacements
.SetHostStr(host
);
34 *gurl
= gurl
->ReplaceComponents(replacements
);
42 // Workaround for http://crbug.com/437794; remove when fixed.
45 bool SdchManager::g_sdch_enabled_
= false;
48 bool SdchManager::g_sdch_enabled_
= true;
52 bool SdchManager::g_secure_scheme_supported_
= true;
54 SdchManager::Dictionary::Dictionary(const std::string
& dictionary_text
,
56 const std::string
& client_hash
,
57 const std::string
& server_hash
,
59 const std::string
& domain
,
60 const std::string
& path
,
61 const base::Time
& expiration
,
62 const std::set
<int>& ports
)
63 : text_(dictionary_text
, offset
),
64 client_hash_(client_hash
),
65 server_hash_(server_hash
),
69 expiration_(expiration
),
71 clock_(new base::DefaultClock
) {
74 SdchManager::Dictionary::Dictionary(const SdchManager::Dictionary
& rhs
)
76 client_hash_(rhs
.client_hash_
),
77 server_hash_(rhs
.server_hash_
),
81 expiration_(rhs
.expiration_
),
83 clock_(new base::DefaultClock
) {
86 SdchManager::Dictionary::~Dictionary() {}
88 // Security functions restricting loads and use of dictionaries.
91 SdchProblemCode
SdchManager::Dictionary::CanSet(const std::string
& domain
,
92 const std::string
& path
,
93 const std::set
<int>& ports
,
94 const GURL
& dictionary_url
) {
96 A dictionary is invalid and must not be stored if any of the following are
98 1. The dictionary has no Domain attribute.
99 2. The effective host name that derives from the referer URL host name does
100 not domain-match the Domain attribute.
101 3. The Domain attribute is a top level domain.
102 4. The referer URL host is a host domain name (not IP address) and has the
103 form HD, where D is the value of the Domain attribute, and H is a string
104 that contains one or more dots.
105 5. If the dictionary has a Port attribute and the referer URL's port was not
109 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
110 // and hence the conservative approach is to not allow any redirects (if there
111 // were any... then don't allow the dictionary to be set).
114 return SDCH_DICTIONARY_MISSING_DOMAIN_SPECIFIER
; // Domain is required.
116 if (registry_controlled_domains::GetDomainAndRegistry(
117 domain
, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES
)
119 return SDCH_DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN
; // domain was a TLD.
122 if (!Dictionary::DomainMatch(dictionary_url
, domain
))
123 return SDCH_DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL
;
125 std::string referrer_url_host
= dictionary_url
.host();
126 size_t postfix_domain_index
= referrer_url_host
.rfind(domain
);
127 // See if it is indeed a postfix, or just an internal string.
128 if (referrer_url_host
.size() == postfix_domain_index
+ domain
.size()) {
129 // It is a postfix... so check to see if there's a dot in the prefix.
130 size_t end_of_host_index
= referrer_url_host
.find_first_of('.');
131 if (referrer_url_host
.npos
!= end_of_host_index
&&
132 end_of_host_index
< postfix_domain_index
) {
133 return SDCH_DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX
;
137 if (!ports
.empty() && 0 == ports
.count(dictionary_url
.EffectiveIntPort()))
138 return SDCH_DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL
;
143 SdchProblemCode
SdchManager::Dictionary::CanUse(
144 const GURL
& target_url
) const {
146 1. The request URL's host name domain-matches the Domain attribute of the
148 2. If the dictionary has a Port attribute, the request port is one of the
149 ports listed in the Port attribute.
150 3. The request URL path-matches the path attribute of the dictionary.
151 4. The request is not an HTTPS request.
152 We can override (ignore) item (4) only when we have explicitly enabled
153 HTTPS support AND the dictionary acquisition scheme matches the target
156 if (!DomainMatch(target_url
, domain_
))
157 return SDCH_DICTIONARY_FOUND_HAS_WRONG_DOMAIN
;
159 if (!ports_
.empty() && 0 == ports_
.count(target_url
.EffectiveIntPort()))
160 return SDCH_DICTIONARY_FOUND_HAS_WRONG_PORT_LIST
;
162 if (path_
.size() && !PathMatch(target_url
.path(), path_
))
163 return SDCH_DICTIONARY_FOUND_HAS_WRONG_PATH
;
165 if (!SdchManager::secure_scheme_supported() && target_url
.SchemeIsSecure())
166 return SDCH_DICTIONARY_FOUND_HAS_WRONG_SCHEME
;
168 if (target_url
.SchemeIsSecure() != url_
.SchemeIsSecure())
169 return SDCH_DICTIONARY_FOUND_HAS_WRONG_SCHEME
;
171 // TODO(jar): Remove overly restrictive failsafe test (added per security
172 // review) when we have a need to be more general.
173 if (!target_url
.SchemeIsHTTPOrHTTPS())
174 return SDCH_ATTEMPT_TO_DECODE_NON_HTTP_DATA
;
180 bool SdchManager::Dictionary::PathMatch(const std::string
& path
,
181 const std::string
& restriction
) {
184 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
185 character following P2 in P1 is "/".
187 if (path
== restriction
)
189 size_t prefix_length
= restriction
.size();
190 if (prefix_length
> path
.size())
191 return false; // Can't be a prefix.
192 if (0 != path
.compare(0, prefix_length
, restriction
))
194 return restriction
[prefix_length
- 1] == '/' || path
[prefix_length
] == '/';
198 bool SdchManager::Dictionary::DomainMatch(const GURL
& gurl
,
199 const std::string
& restriction
) {
200 // TODO(jar): This is not precisely a domain match definition.
201 return gurl
.DomainIs(restriction
.data(), restriction
.size());
204 bool SdchManager::Dictionary::Expired() const {
205 return clock_
->Now() > expiration_
;
208 void SdchManager::Dictionary::SetClockForTesting(
209 scoped_ptr
<base::Clock
> clock
) {
210 clock_
= clock
.Pass();
213 SdchManager::DictionarySet::DictionarySet() {}
215 SdchManager::DictionarySet::~DictionarySet() {}
217 std::string
SdchManager::DictionarySet::GetDictionaryClientHashList() const {
220 for (const auto& entry
: dictionaries_
) {
224 result
.append(entry
.second
->data
.client_hash());
230 const SdchManager::Dictionary
* SdchManager::DictionarySet::GetDictionary(
231 const std::string
& hash
) const {
232 auto it
= dictionaries_
.find(hash
);
233 if (it
== dictionaries_
.end())
236 return &it
->second
->data
;
239 bool SdchManager::DictionarySet::Empty() const {
240 return dictionaries_
.empty();
243 void SdchManager::DictionarySet::AddDictionary(
244 const std::string
& server_hash
,
245 const scoped_refptr
<base::RefCountedData
<SdchManager::Dictionary
>>&
247 DCHECK(dictionaries_
.end() == dictionaries_
.find(server_hash
));
249 dictionaries_
[server_hash
] = dictionary
;
252 SdchManager::SdchManager() {
253 DCHECK(thread_checker_
.CalledOnValidThread());
256 SdchManager::~SdchManager() {
257 DCHECK(thread_checker_
.CalledOnValidThread());
258 while (!dictionaries_
.empty()) {
259 auto it
= dictionaries_
.begin();
260 dictionaries_
.erase(it
->first
);
262 #if defined(OS_CHROMEOS)
263 // For debugging http://crbug.com/454198; remove when resolved.
265 // Explicitly confirm that we can't notify any observers anymore.
266 CHECK(!observers_
.might_have_observers());
270 void SdchManager::ClearData() {
271 blacklisted_domains_
.clear();
272 allow_latency_experiment_
.clear();
273 dictionaries_
.clear();
274 FOR_EACH_OBSERVER(SdchObserver
, observers_
, OnClearDictionaries(this));
278 void SdchManager::SdchErrorRecovery(SdchProblemCode problem
) {
279 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_5", problem
,
280 SDCH_MAX_PROBLEM_CODE
);
284 void SdchManager::EnableSdchSupport(bool enabled
) {
285 g_sdch_enabled_
= enabled
;
289 void SdchManager::EnableSecureSchemeSupport(bool enabled
) {
290 g_secure_scheme_supported_
= enabled
;
293 void SdchManager::BlacklistDomain(const GURL
& url
,
294 SdchProblemCode blacklist_reason
) {
295 SetAllowLatencyExperiment(url
, false);
297 BlacklistInfo
* blacklist_info
=
298 &blacklisted_domains_
[base::StringToLowerASCII(url
.host())];
300 if (blacklist_info
->count
> 0)
301 return; // Domain is already blacklisted.
303 if (blacklist_info
->exponential_count
> (INT_MAX
- 1) / 2) {
304 blacklist_info
->exponential_count
= INT_MAX
;
306 blacklist_info
->exponential_count
=
307 blacklist_info
->exponential_count
* 2 + 1;
310 blacklist_info
->count
= blacklist_info
->exponential_count
;
311 blacklist_info
->reason
= blacklist_reason
;
314 void SdchManager::BlacklistDomainForever(const GURL
& url
,
315 SdchProblemCode blacklist_reason
) {
316 SetAllowLatencyExperiment(url
, false);
318 BlacklistInfo
* blacklist_info
=
319 &blacklisted_domains_
[base::StringToLowerASCII(url
.host())];
320 blacklist_info
->count
= INT_MAX
;
321 blacklist_info
->exponential_count
= INT_MAX
;
322 blacklist_info
->reason
= blacklist_reason
;
325 void SdchManager::ClearBlacklistings() {
326 blacklisted_domains_
.clear();
329 void SdchManager::ClearDomainBlacklisting(const std::string
& domain
) {
330 BlacklistInfo
* blacklist_info
= &blacklisted_domains_
[
331 base::StringToLowerASCII(domain
)];
332 blacklist_info
->count
= 0;
333 blacklist_info
->reason
= SDCH_OK
;
336 int SdchManager::BlackListDomainCount(const std::string
& domain
) {
337 std::string
domain_lower(base::StringToLowerASCII(domain
));
339 if (blacklisted_domains_
.end() == blacklisted_domains_
.find(domain_lower
))
341 return blacklisted_domains_
[domain_lower
].count
;
344 int SdchManager::BlacklistDomainExponential(const std::string
& domain
) {
345 std::string
domain_lower(base::StringToLowerASCII(domain
));
347 if (blacklisted_domains_
.end() == blacklisted_domains_
.find(domain_lower
))
349 return blacklisted_domains_
[domain_lower
].exponential_count
;
352 SdchProblemCode
SdchManager::IsInSupportedDomain(const GURL
& url
) {
353 DCHECK(thread_checker_
.CalledOnValidThread());
354 if (!g_sdch_enabled_
)
355 return SDCH_DISABLED
;
357 if (!secure_scheme_supported() && url
.SchemeIsSecure())
358 return SDCH_SECURE_SCHEME_NOT_SUPPORTED
;
360 if (blacklisted_domains_
.empty())
363 DomainBlacklistInfo::iterator it
=
364 blacklisted_domains_
.find(base::StringToLowerASCII(url
.host()));
365 if (blacklisted_domains_
.end() == it
|| it
->second
.count
== 0)
368 UMA_HISTOGRAM_ENUMERATION("Sdch3.BlacklistReason", it
->second
.reason
,
369 SDCH_MAX_PROBLEM_CODE
);
371 int count
= it
->second
.count
- 1;
373 it
->second
.count
= count
;
375 it
->second
.count
= 0;
376 it
->second
.reason
= SDCH_OK
;
379 return SDCH_DOMAIN_BLACKLIST_INCLUDES_TARGET
;
382 SdchProblemCode
SdchManager::OnGetDictionary(const GURL
& request_url
,
383 const GURL
& dictionary_url
) {
384 DCHECK(thread_checker_
.CalledOnValidThread());
385 SdchProblemCode rv
= CanFetchDictionary(request_url
, dictionary_url
);
389 FOR_EACH_OBSERVER(SdchObserver
,
391 OnGetDictionary(this, request_url
, dictionary_url
));
396 void SdchManager::OnDictionaryUsed(const std::string
& server_hash
) {
397 FOR_EACH_OBSERVER(SdchObserver
, observers_
,
398 OnDictionaryUsed(this, server_hash
));
401 SdchProblemCode
SdchManager::CanFetchDictionary(
402 const GURL
& referring_url
,
403 const GURL
& dictionary_url
) const {
404 DCHECK(thread_checker_
.CalledOnValidThread());
405 /* The user agent may retrieve a dictionary from the dictionary URL if all of
406 the following are true:
407 1 The dictionary URL host name matches the referrer URL host name and
409 2 The dictionary URL host name domain matches the parent domain of the
410 referrer URL host name
411 3 The parent domain of the referrer URL host name is not a top level
414 // Item (1) above implies item (2). Spec should be updated.
415 // I take "host name match" to be "is identical to"
416 if (referring_url
.host() != dictionary_url
.host() ||
417 referring_url
.scheme() != dictionary_url
.scheme())
418 return SDCH_DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST
;
420 if (!secure_scheme_supported() && referring_url
.SchemeIsSecure())
421 return SDCH_DICTIONARY_SELECTED_FOR_SSL
;
423 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
424 // than current SDCH spec when needed, and justified by security audit.
425 if (!referring_url
.SchemeIsHTTPOrHTTPS())
426 return SDCH_DICTIONARY_SELECTED_FROM_NON_HTTP
;
431 scoped_ptr
<SdchManager::DictionarySet
>
432 SdchManager::GetDictionarySet(const GURL
& target_url
) {
433 if (IsInSupportedDomain(target_url
) != SDCH_OK
)
437 scoped_ptr
<SdchManager::DictionarySet
> result(new DictionarySet
);
438 for (const auto& entry
: dictionaries_
) {
439 if (entry
.second
->data
.CanUse(target_url
) != SDCH_OK
)
441 if (entry
.second
->data
.Expired())
444 result
->AddDictionary(entry
.first
, entry
.second
);
450 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count
);
452 return result
.Pass();
455 scoped_ptr
<SdchManager::DictionarySet
>
456 SdchManager::GetDictionarySetByHash(
457 const GURL
& target_url
,
458 const std::string
& server_hash
,
459 SdchProblemCode
* problem_code
) {
460 scoped_ptr
<SdchManager::DictionarySet
> result
;
462 *problem_code
= SDCH_DICTIONARY_HASH_NOT_FOUND
;
463 const auto& it
= dictionaries_
.find(server_hash
);
464 if (it
== dictionaries_
.end())
465 return result
.Pass();
467 *problem_code
= it
->second
->data
.CanUse(target_url
);
468 if (*problem_code
!= SDCH_OK
)
469 return result
.Pass();
471 result
.reset(new DictionarySet
);
472 result
->AddDictionary(it
->first
, it
->second
);
473 return result
.Pass();
477 void SdchManager::GenerateHash(const std::string
& dictionary_text
,
478 std::string
* client_hash
, std::string
* server_hash
) {
479 char binary_hash
[32];
480 crypto::SHA256HashString(dictionary_text
, binary_hash
, sizeof(binary_hash
));
482 std::string
first_48_bits(&binary_hash
[0], 6);
483 std::string
second_48_bits(&binary_hash
[6], 6);
484 UrlSafeBase64Encode(first_48_bits
, client_hash
);
485 UrlSafeBase64Encode(second_48_bits
, server_hash
);
487 DCHECK_EQ(server_hash
->length(), 8u);
488 DCHECK_EQ(client_hash
->length(), 8u);
491 // Methods for supporting latency experiments.
493 bool SdchManager::AllowLatencyExperiment(const GURL
& url
) const {
494 DCHECK(thread_checker_
.CalledOnValidThread());
495 return allow_latency_experiment_
.end() !=
496 allow_latency_experiment_
.find(url
.host());
499 void SdchManager::SetAllowLatencyExperiment(const GURL
& url
, bool enable
) {
500 DCHECK(thread_checker_
.CalledOnValidThread());
502 allow_latency_experiment_
.insert(url
.host());
505 ExperimentSet::iterator it
= allow_latency_experiment_
.find(url
.host());
506 if (allow_latency_experiment_
.end() == it
)
507 return; // It was already erased, or never allowed.
508 SdchErrorRecovery(SDCH_LATENCY_TEST_DISALLOWED
);
509 allow_latency_experiment_
.erase(it
);
512 void SdchManager::AddObserver(SdchObserver
* observer
) {
513 observers_
.AddObserver(observer
);
516 void SdchManager::RemoveObserver(SdchObserver
* observer
) {
517 observers_
.RemoveObserver(observer
);
520 SdchProblemCode
SdchManager::AddSdchDictionary(
521 const std::string
& dictionary_text
,
522 const GURL
& dictionary_url
,
523 std::string
* server_hash_p
) {
524 DCHECK(thread_checker_
.CalledOnValidThread());
525 std::string client_hash
;
526 std::string server_hash
;
527 GenerateHash(dictionary_text
, &client_hash
, &server_hash
);
528 if (dictionaries_
.find(server_hash
) != dictionaries_
.end())
529 return SDCH_DICTIONARY_ALREADY_LOADED
; // Already loaded.
531 std::string domain
, path
;
533 base::Time
expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
535 if (dictionary_text
.empty())
536 return SDCH_DICTIONARY_HAS_NO_TEXT
; // Missing header.
538 size_t header_end
= dictionary_text
.find("\n\n");
539 if (std::string::npos
== header_end
)
540 return SDCH_DICTIONARY_HAS_NO_HEADER
; // Missing header.
542 size_t line_start
= 0; // Start of line being parsed.
544 size_t line_end
= dictionary_text
.find('\n', line_start
);
545 DCHECK(std::string::npos
!= line_end
);
546 DCHECK_LE(line_end
, header_end
);
548 size_t colon_index
= dictionary_text
.find(':', line_start
);
549 if (std::string::npos
== colon_index
)
550 return SDCH_DICTIONARY_HEADER_LINE_MISSING_COLON
; // Illegal line missing
553 if (colon_index
> line_end
)
556 size_t value_start
= dictionary_text
.find_first_not_of(" \t",
558 if (std::string::npos
!= value_start
) {
559 if (value_start
>= line_end
)
561 std::string
name(dictionary_text
, line_start
, colon_index
- line_start
);
562 std::string
value(dictionary_text
, value_start
, line_end
- value_start
);
563 name
= base::StringToLowerASCII(name
);
564 if (name
== "domain") {
566 } else if (name
== "path") {
568 } else if (name
== "format-version") {
570 return SDCH_DICTIONARY_UNSUPPORTED_VERSION
;
571 } else if (name
== "max-age") {
573 base::StringToInt64(value
, &seconds
);
574 expiration
= base::Time::Now() + base::TimeDelta::FromSeconds(seconds
);
575 } else if (name
== "port") {
577 base::StringToInt(value
, &port
);
583 if (line_end
>= header_end
)
585 line_start
= line_end
+ 1;
588 // Narrow fix for http://crbug.com/389451.
589 GURL
dictionary_url_normalized(dictionary_url
);
590 StripTrailingDot(&dictionary_url_normalized
);
592 SdchProblemCode rv
= IsInSupportedDomain(dictionary_url_normalized
);
596 rv
= Dictionary::CanSet(domain
, path
, ports
, dictionary_url_normalized
);
600 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text
.size());
601 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
602 << " and server hash " << server_hash
;
603 Dictionary
dictionary(dictionary_text
, header_end
+ 2, client_hash
,
604 server_hash
, dictionary_url_normalized
, domain
, path
,
606 dictionaries_
[server_hash
] =
607 new base::RefCountedData
<Dictionary
>(dictionary
);
609 *server_hash_p
= server_hash
;
614 SdchProblemCode
SdchManager::RemoveSdchDictionary(
615 const std::string
& server_hash
) {
616 if (dictionaries_
.find(server_hash
) == dictionaries_
.end())
617 return SDCH_DICTIONARY_HASH_NOT_FOUND
;
619 dictionaries_
.erase(server_hash
);
624 scoped_ptr
<SdchManager::DictionarySet
>
625 SdchManager::CreateEmptyDictionarySetForTesting() {
626 return scoped_ptr
<DictionarySet
>(new DictionarySet
).Pass();
630 void SdchManager::UrlSafeBase64Encode(const std::string
& input
,
631 std::string
* output
) {
632 // Since this is only done during a dictionary load, and hashes are only 8
633 // characters, we just do the simple fixup, rather than rewriting the encoder.
634 base::Base64Encode(input
, output
);
635 std::replace(output
->begin(), output
->end(), '+', '-');
636 std::replace(output
->begin(), output
->end(), '/', '_');
639 base::Value
* SdchManager::SdchInfoToValue() const {
640 base::DictionaryValue
* value
= new base::DictionaryValue();
642 value
->SetBoolean("sdch_enabled", sdch_enabled());
643 value
->SetBoolean("secure_scheme_support", secure_scheme_supported());
645 base::ListValue
* entry_list
= new base::ListValue();
646 for (const auto& entry
: dictionaries_
) {
647 base::DictionaryValue
* entry_dict
= new base::DictionaryValue();
648 entry_dict
->SetString("url", entry
.second
->data
.url().spec());
649 entry_dict
->SetString("client_hash", entry
.second
->data
.client_hash());
650 entry_dict
->SetString("domain", entry
.second
->data
.domain());
651 entry_dict
->SetString("path", entry
.second
->data
.path());
652 base::ListValue
* port_list
= new base::ListValue();
653 for (std::set
<int>::const_iterator port_it
=
654 entry
.second
->data
.ports().begin();
655 port_it
!= entry
.second
->data
.ports().end(); ++port_it
) {
656 port_list
->AppendInteger(*port_it
);
658 entry_dict
->Set("ports", port_list
);
659 entry_dict
->SetString("server_hash", entry
.first
);
660 entry_list
->Append(entry_dict
);
662 value
->Set("dictionaries", entry_list
);
664 entry_list
= new base::ListValue();
665 for (DomainBlacklistInfo::const_iterator it
= blacklisted_domains_
.begin();
666 it
!= blacklisted_domains_
.end(); ++it
) {
667 if (it
->second
.count
== 0)
669 base::DictionaryValue
* entry_dict
= new base::DictionaryValue();
670 entry_dict
->SetString("domain", it
->first
);
671 if (it
->second
.count
!= INT_MAX
)
672 entry_dict
->SetInteger("tries", it
->second
.count
);
673 entry_dict
->SetInteger("reason", it
->second
.reason
);
674 entry_list
->Append(entry_dict
);
676 value
->Set("blacklisted", entry_list
);