1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "base/time/default_clock.h"
13 #include "base/values.h"
14 #include "crypto/sha2.h"
15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
16 #include "net/base/sdch_observer.h"
17 #include "net/url_request/url_request_http_job.h"
21 void StripTrailingDot(GURL
* gurl
) {
22 std::string
host(gurl
->host());
27 if (*host
.rbegin() != '.')
30 host
.resize(host
.size() - 1);
32 GURL::Replacements replacements
;
33 replacements
.SetHostStr(host
);
34 *gurl
= gurl
->ReplaceComponents(replacements
);
42 // Workaround for http://crbug.com/437794; remove when fixed.
45 bool SdchManager::g_sdch_enabled_
= false;
48 bool SdchManager::g_sdch_enabled_
= true;
52 bool SdchManager::g_secure_scheme_supported_
= true;
54 SdchManager::Dictionary::Dictionary(const std::string
& dictionary_text
,
56 const std::string
& client_hash
,
57 const std::string
& server_hash
,
59 const std::string
& domain
,
60 const std::string
& path
,
61 const base::Time
& expiration
,
62 const std::set
<int>& ports
)
63 : text_(dictionary_text
, offset
),
64 client_hash_(client_hash
),
65 server_hash_(server_hash
),
69 expiration_(expiration
),
71 clock_(new base::DefaultClock
) {
74 SdchManager::Dictionary::Dictionary(const SdchManager::Dictionary
& rhs
)
76 client_hash_(rhs
.client_hash_
),
77 server_hash_(rhs
.server_hash_
),
81 expiration_(rhs
.expiration_
),
83 clock_(new base::DefaultClock
) {
86 SdchManager::Dictionary::~Dictionary() {}
88 // Security functions restricting loads and use of dictionaries.
91 SdchProblemCode
SdchManager::Dictionary::CanSet(const std::string
& domain
,
92 const std::string
& path
,
93 const std::set
<int>& ports
,
94 const GURL
& dictionary_url
) {
96 A dictionary is invalid and must not be stored if any of the following are
98 1. The dictionary has no Domain attribute.
99 2. The effective host name that derives from the referer URL host name does
100 not domain-match the Domain attribute.
101 3. The Domain attribute is a top level domain.
102 4. The referer URL host is a host domain name (not IP address) and has the
103 form HD, where D is the value of the Domain attribute, and H is a string
104 that contains one or more dots.
105 5. If the dictionary has a Port attribute and the referer URL's port was not
109 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
110 // and hence the conservative approach is to not allow any redirects (if there
111 // were any... then don't allow the dictionary to be set).
114 return SDCH_DICTIONARY_MISSING_DOMAIN_SPECIFIER
; // Domain is required.
116 if (registry_controlled_domains::GetDomainAndRegistry(
117 domain
, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES
)
119 return SDCH_DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN
; // domain was a TLD.
122 if (!Dictionary::DomainMatch(dictionary_url
, domain
))
123 return SDCH_DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL
;
125 std::string referrer_url_host
= dictionary_url
.host();
126 size_t postfix_domain_index
= referrer_url_host
.rfind(domain
);
127 // See if it is indeed a postfix, or just an internal string.
128 if (referrer_url_host
.size() == postfix_domain_index
+ domain
.size()) {
129 // It is a postfix... so check to see if there's a dot in the prefix.
130 size_t end_of_host_index
= referrer_url_host
.find_first_of('.');
131 if (referrer_url_host
.npos
!= end_of_host_index
&&
132 end_of_host_index
< postfix_domain_index
) {
133 return SDCH_DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX
;
137 if (!ports
.empty() && 0 == ports
.count(dictionary_url
.EffectiveIntPort()))
138 return SDCH_DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL
;
143 SdchProblemCode
SdchManager::Dictionary::CanUse(
144 const GURL
& target_url
) const {
146 1. The request URL's host name domain-matches the Domain attribute of the
148 2. If the dictionary has a Port attribute, the request port is one of the
149 ports listed in the Port attribute.
150 3. The request URL path-matches the path attribute of the dictionary.
151 4. The request is not an HTTPS request.
152 We can override (ignore) item (4) only when we have explicitly enabled
153 HTTPS support AND the dictionary acquisition scheme matches the target
156 if (!DomainMatch(target_url
, domain_
))
157 return SDCH_DICTIONARY_FOUND_HAS_WRONG_DOMAIN
;
159 if (!ports_
.empty() && 0 == ports_
.count(target_url
.EffectiveIntPort()))
160 return SDCH_DICTIONARY_FOUND_HAS_WRONG_PORT_LIST
;
162 if (path_
.size() && !PathMatch(target_url
.path(), path_
))
163 return SDCH_DICTIONARY_FOUND_HAS_WRONG_PATH
;
165 if (!SdchManager::secure_scheme_supported() && target_url
.SchemeIsSecure())
166 return SDCH_DICTIONARY_FOUND_HAS_WRONG_SCHEME
;
168 if (target_url
.SchemeIsSecure() != url_
.SchemeIsSecure())
169 return SDCH_DICTIONARY_FOUND_HAS_WRONG_SCHEME
;
171 // TODO(jar): Remove overly restrictive failsafe test (added per security
172 // review) when we have a need to be more general.
173 if (!target_url
.SchemeIsHTTPOrHTTPS())
174 return SDCH_ATTEMPT_TO_DECODE_NON_HTTP_DATA
;
180 bool SdchManager::Dictionary::PathMatch(const std::string
& path
,
181 const std::string
& restriction
) {
184 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
185 character following P2 in P1 is "/".
187 if (path
== restriction
)
189 size_t prefix_length
= restriction
.size();
190 if (prefix_length
> path
.size())
191 return false; // Can't be a prefix.
192 if (0 != path
.compare(0, prefix_length
, restriction
))
194 return restriction
[prefix_length
- 1] == '/' || path
[prefix_length
] == '/';
198 bool SdchManager::Dictionary::DomainMatch(const GURL
& gurl
,
199 const std::string
& restriction
) {
200 // TODO(jar): This is not precisely a domain match definition.
201 return gurl
.DomainIs(restriction
.data(), restriction
.size());
204 bool SdchManager::Dictionary::Expired() const {
205 return clock_
->Now() > expiration_
;
208 void SdchManager::Dictionary::SetClockForTesting(
209 scoped_ptr
<base::Clock
> clock
) {
210 clock_
= clock
.Pass();
213 SdchManager::DictionarySet::DictionarySet() {}
215 SdchManager::DictionarySet::~DictionarySet() {}
217 std::string
SdchManager::DictionarySet::GetDictionaryClientHashList() const {
220 for (const auto& entry
: dictionaries_
) {
224 result
.append(entry
.second
->data
.client_hash());
230 const SdchManager::Dictionary
* SdchManager::DictionarySet::GetDictionary(
231 const std::string
& hash
) const {
232 auto it
= dictionaries_
.find(hash
);
233 if (it
== dictionaries_
.end())
236 return &it
->second
->data
;
239 bool SdchManager::DictionarySet::Empty() const {
240 return dictionaries_
.empty();
243 void SdchManager::DictionarySet::AddDictionary(
244 const std::string
& server_hash
,
245 const scoped_refptr
<base::RefCountedData
<SdchManager::Dictionary
>>&
247 DCHECK(dictionaries_
.end() == dictionaries_
.find(server_hash
));
249 dictionaries_
[server_hash
] = dictionary
;
252 SdchManager::SdchManager() {
253 DCHECK(thread_checker_
.CalledOnValidThread());
256 SdchManager::~SdchManager() {
257 DCHECK(thread_checker_
.CalledOnValidThread());
258 while (!dictionaries_
.empty()) {
259 auto it
= dictionaries_
.begin();
260 dictionaries_
.erase(it
->first
);
264 void SdchManager::ClearData() {
265 blacklisted_domains_
.clear();
266 allow_latency_experiment_
.clear();
267 dictionaries_
.clear();
268 FOR_EACH_OBSERVER(SdchObserver
, observers_
, OnClearDictionaries(this));
272 void SdchManager::SdchErrorRecovery(SdchProblemCode problem
) {
273 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_5", problem
,
274 SDCH_MAX_PROBLEM_CODE
);
278 void SdchManager::EnableSdchSupport(bool enabled
) {
279 g_sdch_enabled_
= enabled
;
283 void SdchManager::EnableSecureSchemeSupport(bool enabled
) {
284 g_secure_scheme_supported_
= enabled
;
287 void SdchManager::BlacklistDomain(const GURL
& url
,
288 SdchProblemCode blacklist_reason
) {
289 SetAllowLatencyExperiment(url
, false);
291 BlacklistInfo
* blacklist_info
=
292 &blacklisted_domains_
[base::StringToLowerASCII(url
.host())];
294 if (blacklist_info
->count
> 0)
295 return; // Domain is already blacklisted.
297 if (blacklist_info
->exponential_count
> (INT_MAX
- 1) / 2) {
298 blacklist_info
->exponential_count
= INT_MAX
;
300 blacklist_info
->exponential_count
=
301 blacklist_info
->exponential_count
* 2 + 1;
304 blacklist_info
->count
= blacklist_info
->exponential_count
;
305 blacklist_info
->reason
= blacklist_reason
;
308 void SdchManager::BlacklistDomainForever(const GURL
& url
,
309 SdchProblemCode blacklist_reason
) {
310 SetAllowLatencyExperiment(url
, false);
312 BlacklistInfo
* blacklist_info
=
313 &blacklisted_domains_
[base::StringToLowerASCII(url
.host())];
314 blacklist_info
->count
= INT_MAX
;
315 blacklist_info
->exponential_count
= INT_MAX
;
316 blacklist_info
->reason
= blacklist_reason
;
319 void SdchManager::ClearBlacklistings() {
320 blacklisted_domains_
.clear();
323 void SdchManager::ClearDomainBlacklisting(const std::string
& domain
) {
324 BlacklistInfo
* blacklist_info
= &blacklisted_domains_
[
325 base::StringToLowerASCII(domain
)];
326 blacklist_info
->count
= 0;
327 blacklist_info
->reason
= SDCH_OK
;
330 int SdchManager::BlackListDomainCount(const std::string
& domain
) {
331 std::string
domain_lower(base::StringToLowerASCII(domain
));
333 if (blacklisted_domains_
.end() == blacklisted_domains_
.find(domain_lower
))
335 return blacklisted_domains_
[domain_lower
].count
;
338 int SdchManager::BlacklistDomainExponential(const std::string
& domain
) {
339 std::string
domain_lower(base::StringToLowerASCII(domain
));
341 if (blacklisted_domains_
.end() == blacklisted_domains_
.find(domain_lower
))
343 return blacklisted_domains_
[domain_lower
].exponential_count
;
346 SdchProblemCode
SdchManager::IsInSupportedDomain(const GURL
& url
) {
347 DCHECK(thread_checker_
.CalledOnValidThread());
348 if (!g_sdch_enabled_
)
349 return SDCH_DISABLED
;
351 if (!secure_scheme_supported() && url
.SchemeIsSecure())
352 return SDCH_SECURE_SCHEME_NOT_SUPPORTED
;
354 if (blacklisted_domains_
.empty())
357 DomainBlacklistInfo::iterator it
=
358 blacklisted_domains_
.find(base::StringToLowerASCII(url
.host()));
359 if (blacklisted_domains_
.end() == it
|| it
->second
.count
== 0)
362 UMA_HISTOGRAM_ENUMERATION("Sdch3.BlacklistReason", it
->second
.reason
,
363 SDCH_MAX_PROBLEM_CODE
);
365 int count
= it
->second
.count
- 1;
367 it
->second
.count
= count
;
369 it
->second
.count
= 0;
370 it
->second
.reason
= SDCH_OK
;
373 return SDCH_DOMAIN_BLACKLIST_INCLUDES_TARGET
;
376 SdchProblemCode
SdchManager::OnGetDictionary(const GURL
& request_url
,
377 const GURL
& dictionary_url
) {
378 DCHECK(thread_checker_
.CalledOnValidThread());
379 SdchProblemCode rv
= CanFetchDictionary(request_url
, dictionary_url
);
383 FOR_EACH_OBSERVER(SdchObserver
,
385 OnGetDictionary(this, request_url
, dictionary_url
));
390 void SdchManager::OnDictionaryUsed(const std::string
& server_hash
) {
391 FOR_EACH_OBSERVER(SdchObserver
, observers_
,
392 OnDictionaryUsed(this, server_hash
));
395 SdchProblemCode
SdchManager::CanFetchDictionary(
396 const GURL
& referring_url
,
397 const GURL
& dictionary_url
) const {
398 DCHECK(thread_checker_
.CalledOnValidThread());
399 /* The user agent may retrieve a dictionary from the dictionary URL if all of
400 the following are true:
401 1 The dictionary URL host name matches the referrer URL host name and
403 2 The dictionary URL host name domain matches the parent domain of the
404 referrer URL host name
405 3 The parent domain of the referrer URL host name is not a top level
408 // Item (1) above implies item (2). Spec should be updated.
409 // I take "host name match" to be "is identical to"
410 if (referring_url
.host() != dictionary_url
.host() ||
411 referring_url
.scheme() != dictionary_url
.scheme())
412 return SDCH_DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST
;
414 if (!secure_scheme_supported() && referring_url
.SchemeIsSecure())
415 return SDCH_DICTIONARY_SELECTED_FOR_SSL
;
417 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
418 // than current SDCH spec when needed, and justified by security audit.
419 if (!referring_url
.SchemeIsHTTPOrHTTPS())
420 return SDCH_DICTIONARY_SELECTED_FROM_NON_HTTP
;
425 scoped_ptr
<SdchManager::DictionarySet
>
426 SdchManager::GetDictionarySet(const GURL
& target_url
) {
427 if (IsInSupportedDomain(target_url
) != SDCH_OK
)
431 scoped_ptr
<SdchManager::DictionarySet
> result(new DictionarySet
);
432 for (const auto& entry
: dictionaries_
) {
433 if (entry
.second
->data
.CanUse(target_url
) != SDCH_OK
)
435 if (entry
.second
->data
.Expired())
438 result
->AddDictionary(entry
.first
, entry
.second
);
444 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count
);
446 return result
.Pass();
449 scoped_ptr
<SdchManager::DictionarySet
>
450 SdchManager::GetDictionarySetByHash(
451 const GURL
& target_url
,
452 const std::string
& server_hash
,
453 SdchProblemCode
* problem_code
) {
454 scoped_ptr
<SdchManager::DictionarySet
> result
;
456 *problem_code
= SDCH_DICTIONARY_HASH_NOT_FOUND
;
457 const auto& it
= dictionaries_
.find(server_hash
);
458 if (it
== dictionaries_
.end())
459 return result
.Pass();
461 *problem_code
= it
->second
->data
.CanUse(target_url
);
462 if (*problem_code
!= SDCH_OK
)
463 return result
.Pass();
465 result
.reset(new DictionarySet
);
466 result
->AddDictionary(it
->first
, it
->second
);
467 return result
.Pass();
471 void SdchManager::GenerateHash(const std::string
& dictionary_text
,
472 std::string
* client_hash
, std::string
* server_hash
) {
473 char binary_hash
[32];
474 crypto::SHA256HashString(dictionary_text
, binary_hash
, sizeof(binary_hash
));
476 std::string
first_48_bits(&binary_hash
[0], 6);
477 std::string
second_48_bits(&binary_hash
[6], 6);
478 UrlSafeBase64Encode(first_48_bits
, client_hash
);
479 UrlSafeBase64Encode(second_48_bits
, server_hash
);
481 DCHECK_EQ(server_hash
->length(), 8u);
482 DCHECK_EQ(client_hash
->length(), 8u);
485 // Methods for supporting latency experiments.
487 bool SdchManager::AllowLatencyExperiment(const GURL
& url
) const {
488 DCHECK(thread_checker_
.CalledOnValidThread());
489 return allow_latency_experiment_
.end() !=
490 allow_latency_experiment_
.find(url
.host());
493 void SdchManager::SetAllowLatencyExperiment(const GURL
& url
, bool enable
) {
494 DCHECK(thread_checker_
.CalledOnValidThread());
496 allow_latency_experiment_
.insert(url
.host());
499 ExperimentSet::iterator it
= allow_latency_experiment_
.find(url
.host());
500 if (allow_latency_experiment_
.end() == it
)
501 return; // It was already erased, or never allowed.
502 SdchErrorRecovery(SDCH_LATENCY_TEST_DISALLOWED
);
503 allow_latency_experiment_
.erase(it
);
506 void SdchManager::AddObserver(SdchObserver
* observer
) {
507 observers_
.AddObserver(observer
);
510 void SdchManager::RemoveObserver(SdchObserver
* observer
) {
511 observers_
.RemoveObserver(observer
);
514 SdchProblemCode
SdchManager::AddSdchDictionary(
515 const std::string
& dictionary_text
,
516 const GURL
& dictionary_url
,
517 std::string
* server_hash_p
) {
518 DCHECK(thread_checker_
.CalledOnValidThread());
519 std::string client_hash
;
520 std::string server_hash
;
521 GenerateHash(dictionary_text
, &client_hash
, &server_hash
);
522 if (dictionaries_
.find(server_hash
) != dictionaries_
.end())
523 return SDCH_DICTIONARY_ALREADY_LOADED
; // Already loaded.
525 std::string domain
, path
;
527 base::Time
expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
529 if (dictionary_text
.empty())
530 return SDCH_DICTIONARY_HAS_NO_TEXT
; // Missing header.
532 size_t header_end
= dictionary_text
.find("\n\n");
533 if (std::string::npos
== header_end
)
534 return SDCH_DICTIONARY_HAS_NO_HEADER
; // Missing header.
536 size_t line_start
= 0; // Start of line being parsed.
538 size_t line_end
= dictionary_text
.find('\n', line_start
);
539 DCHECK(std::string::npos
!= line_end
);
540 DCHECK_LE(line_end
, header_end
);
542 size_t colon_index
= dictionary_text
.find(':', line_start
);
543 if (std::string::npos
== colon_index
)
544 return SDCH_DICTIONARY_HEADER_LINE_MISSING_COLON
; // Illegal line missing
547 if (colon_index
> line_end
)
550 size_t value_start
= dictionary_text
.find_first_not_of(" \t",
552 if (std::string::npos
!= value_start
) {
553 if (value_start
>= line_end
)
555 std::string
name(dictionary_text
, line_start
, colon_index
- line_start
);
556 std::string
value(dictionary_text
, value_start
, line_end
- value_start
);
557 name
= base::StringToLowerASCII(name
);
558 if (name
== "domain") {
560 } else if (name
== "path") {
562 } else if (name
== "format-version") {
564 return SDCH_DICTIONARY_UNSUPPORTED_VERSION
;
565 } else if (name
== "max-age") {
567 base::StringToInt64(value
, &seconds
);
568 expiration
= base::Time::Now() + base::TimeDelta::FromSeconds(seconds
);
569 } else if (name
== "port") {
571 base::StringToInt(value
, &port
);
577 if (line_end
>= header_end
)
579 line_start
= line_end
+ 1;
582 // Narrow fix for http://crbug.com/389451.
583 GURL
dictionary_url_normalized(dictionary_url
);
584 StripTrailingDot(&dictionary_url_normalized
);
586 SdchProblemCode rv
= IsInSupportedDomain(dictionary_url_normalized
);
590 rv
= Dictionary::CanSet(domain
, path
, ports
, dictionary_url_normalized
);
594 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text
.size());
595 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
596 << " and server hash " << server_hash
;
597 Dictionary
dictionary(dictionary_text
, header_end
+ 2, client_hash
,
598 server_hash
, dictionary_url_normalized
, domain
, path
,
600 dictionaries_
[server_hash
] =
601 new base::RefCountedData
<Dictionary
>(dictionary
);
603 *server_hash_p
= server_hash
;
608 SdchProblemCode
SdchManager::RemoveSdchDictionary(
609 const std::string
& server_hash
) {
610 if (dictionaries_
.find(server_hash
) == dictionaries_
.end())
611 return SDCH_DICTIONARY_HASH_NOT_FOUND
;
613 dictionaries_
.erase(server_hash
);
618 scoped_ptr
<SdchManager::DictionarySet
>
619 SdchManager::CreateEmptyDictionarySetForTesting() {
620 return scoped_ptr
<DictionarySet
>(new DictionarySet
).Pass();
624 void SdchManager::UrlSafeBase64Encode(const std::string
& input
,
625 std::string
* output
) {
626 // Since this is only done during a dictionary load, and hashes are only 8
627 // characters, we just do the simple fixup, rather than rewriting the encoder.
628 base::Base64Encode(input
, output
);
629 std::replace(output
->begin(), output
->end(), '+', '-');
630 std::replace(output
->begin(), output
->end(), '/', '_');
633 base::Value
* SdchManager::SdchInfoToValue() const {
634 base::DictionaryValue
* value
= new base::DictionaryValue();
636 value
->SetBoolean("sdch_enabled", sdch_enabled());
637 value
->SetBoolean("secure_scheme_support", secure_scheme_supported());
639 base::ListValue
* entry_list
= new base::ListValue();
640 for (const auto& entry
: dictionaries_
) {
641 base::DictionaryValue
* entry_dict
= new base::DictionaryValue();
642 entry_dict
->SetString("url", entry
.second
->data
.url().spec());
643 entry_dict
->SetString("client_hash", entry
.second
->data
.client_hash());
644 entry_dict
->SetString("domain", entry
.second
->data
.domain());
645 entry_dict
->SetString("path", entry
.second
->data
.path());
646 base::ListValue
* port_list
= new base::ListValue();
647 for (std::set
<int>::const_iterator port_it
=
648 entry
.second
->data
.ports().begin();
649 port_it
!= entry
.second
->data
.ports().end(); ++port_it
) {
650 port_list
->AppendInteger(*port_it
);
652 entry_dict
->Set("ports", port_list
);
653 entry_dict
->SetString("server_hash", entry
.first
);
654 entry_list
->Append(entry_dict
);
656 value
->Set("dictionaries", entry_list
);
658 entry_list
= new base::ListValue();
659 for (DomainBlacklistInfo::const_iterator it
= blacklisted_domains_
.begin();
660 it
!= blacklisted_domains_
.end(); ++it
) {
661 if (it
->second
.count
== 0)
663 base::DictionaryValue
* entry_dict
= new base::DictionaryValue();
664 entry_dict
->SetString("domain", it
->first
);
665 if (it
->second
.count
!= INT_MAX
)
666 entry_dict
->SetInteger("tries", it
->second
.count
);
667 entry_dict
->SetInteger("reason", it
->second
.reason
);
668 entry_list
->Append(entry_dict
);
670 value
->Set("blacklisted", entry_list
);