1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "base/time/default_clock.h"
13 #include "base/values.h"
14 #include "crypto/sha2.h"
15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
16 #include "net/base/sdch_observer.h"
17 #include "net/url_request/url_request_http_job.h"
21 void StripTrailingDot(GURL
* gurl
) {
22 std::string
host(gurl
->host());
27 if (*host
.rbegin() != '.')
30 host
.resize(host
.size() - 1);
32 GURL::Replacements replacements
;
33 replacements
.SetHostStr(host
);
34 *gurl
= gurl
->ReplaceComponents(replacements
);
43 bool SdchManager::g_sdch_enabled_
= true;
46 bool SdchManager::g_secure_scheme_supported_
= true;
48 SdchManager::Dictionary::Dictionary(const std::string
& dictionary_text
,
50 const std::string
& client_hash
,
51 const std::string
& server_hash
,
53 const std::string
& domain
,
54 const std::string
& path
,
55 const base::Time
& expiration
,
56 const std::set
<int>& ports
)
57 : text_(dictionary_text
, offset
),
58 client_hash_(client_hash
),
59 server_hash_(server_hash
),
63 expiration_(expiration
),
65 clock_(new base::DefaultClock
) {
68 SdchManager::Dictionary::Dictionary(const SdchManager::Dictionary
& rhs
)
70 client_hash_(rhs
.client_hash_
),
71 server_hash_(rhs
.server_hash_
),
75 expiration_(rhs
.expiration_
),
77 clock_(new base::DefaultClock
) {
80 SdchManager::Dictionary::~Dictionary() {}
82 // Security functions restricting loads and use of dictionaries.
85 SdchProblemCode
SdchManager::Dictionary::CanSet(const std::string
& domain
,
86 const std::string
& path
,
87 const std::set
<int>& ports
,
88 const GURL
& dictionary_url
) {
90 A dictionary is invalid and must not be stored if any of the following are
92 1. The dictionary has no Domain attribute.
93 2. The effective host name that derives from the referer URL host name does
94 not domain-match the Domain attribute.
95 3. The Domain attribute is a top level domain.
96 4. The referer URL host is a host domain name (not IP address) and has the
97 form HD, where D is the value of the Domain attribute, and H is a string
98 that contains one or more dots.
99 5. If the dictionary has a Port attribute and the referer URL's port was not
103 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
104 // and hence the conservative approach is to not allow any redirects (if there
105 // were any... then don't allow the dictionary to be set).
108 return SDCH_DICTIONARY_MISSING_DOMAIN_SPECIFIER
; // Domain is required.
110 if (registry_controlled_domains::GetDomainAndRegistry(
111 domain
, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES
)
113 return SDCH_DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN
; // domain was a TLD.
116 if (!Dictionary::DomainMatch(dictionary_url
, domain
))
117 return SDCH_DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL
;
119 std::string referrer_url_host
= dictionary_url
.host();
120 size_t postfix_domain_index
= referrer_url_host
.rfind(domain
);
121 // See if it is indeed a postfix, or just an internal string.
122 if (referrer_url_host
.size() == postfix_domain_index
+ domain
.size()) {
123 // It is a postfix... so check to see if there's a dot in the prefix.
124 size_t end_of_host_index
= referrer_url_host
.find_first_of('.');
125 if (referrer_url_host
.npos
!= end_of_host_index
&&
126 end_of_host_index
< postfix_domain_index
) {
127 return SDCH_DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX
;
131 if (!ports
.empty() && 0 == ports
.count(dictionary_url
.EffectiveIntPort()))
132 return SDCH_DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL
;
137 SdchProblemCode
SdchManager::Dictionary::CanUse(
138 const GURL
& target_url
) const {
140 1. The request URL's host name domain-matches the Domain attribute of the
142 2. If the dictionary has a Port attribute, the request port is one of the
143 ports listed in the Port attribute.
144 3. The request URL path-matches the path attribute of the dictionary.
145 4. The request is not an HTTPS request.
146 We can override (ignore) item (4) only when we have explicitly enabled
147 HTTPS support AND the dictionary acquisition scheme matches the target
150 if (!DomainMatch(target_url
, domain_
))
151 return SDCH_DICTIONARY_FOUND_HAS_WRONG_DOMAIN
;
153 if (!ports_
.empty() && 0 == ports_
.count(target_url
.EffectiveIntPort()))
154 return SDCH_DICTIONARY_FOUND_HAS_WRONG_PORT_LIST
;
156 if (path_
.size() && !PathMatch(target_url
.path(), path_
))
157 return SDCH_DICTIONARY_FOUND_HAS_WRONG_PATH
;
159 if (!SdchManager::secure_scheme_supported() && target_url
.SchemeIsSecure())
160 return SDCH_DICTIONARY_FOUND_HAS_WRONG_SCHEME
;
162 if (target_url
.SchemeIsSecure() != url_
.SchemeIsSecure())
163 return SDCH_DICTIONARY_FOUND_HAS_WRONG_SCHEME
;
165 // TODO(jar): Remove overly restrictive failsafe test (added per security
166 // review) when we have a need to be more general.
167 if (!target_url
.SchemeIsHTTPOrHTTPS())
168 return SDCH_ATTEMPT_TO_DECODE_NON_HTTP_DATA
;
174 bool SdchManager::Dictionary::PathMatch(const std::string
& path
,
175 const std::string
& restriction
) {
178 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
179 character following P2 in P1 is "/".
181 if (path
== restriction
)
183 size_t prefix_length
= restriction
.size();
184 if (prefix_length
> path
.size())
185 return false; // Can't be a prefix.
186 if (0 != path
.compare(0, prefix_length
, restriction
))
188 return restriction
[prefix_length
- 1] == '/' || path
[prefix_length
] == '/';
192 bool SdchManager::Dictionary::DomainMatch(const GURL
& gurl
,
193 const std::string
& restriction
) {
194 // TODO(jar): This is not precisely a domain match definition.
195 return gurl
.DomainIs(restriction
.data(), restriction
.size());
198 bool SdchManager::Dictionary::Expired() const {
199 return clock_
->Now() > expiration_
;
202 void SdchManager::Dictionary::SetClockForTesting(
203 scoped_ptr
<base::Clock
> clock
) {
204 clock_
= clock
.Pass();
207 SdchManager::DictionarySet::DictionarySet() {}
209 SdchManager::DictionarySet::~DictionarySet() {}
211 std::string
SdchManager::DictionarySet::GetDictionaryClientHashList() const {
214 for (const auto& entry
: dictionaries_
) {
218 result
.append(entry
.second
->data
.client_hash());
224 const SdchManager::Dictionary
* SdchManager::DictionarySet::GetDictionary(
225 const std::string
& hash
) const {
226 auto it
= dictionaries_
.find(hash
);
227 if (it
== dictionaries_
.end())
230 return &it
->second
->data
;
233 bool SdchManager::DictionarySet::Empty() const {
234 return dictionaries_
.empty();
237 void SdchManager::DictionarySet::AddDictionary(
238 const std::string
& server_hash
,
239 const scoped_refptr
<base::RefCountedData
<SdchManager::Dictionary
>>&
241 DCHECK(dictionaries_
.end() == dictionaries_
.find(server_hash
));
243 dictionaries_
[server_hash
] = dictionary
;
246 SdchManager::SdchManager() : factory_(this) {
247 DCHECK(thread_checker_
.CalledOnValidThread());
250 SdchManager::~SdchManager() {
251 DCHECK(thread_checker_
.CalledOnValidThread());
252 while (!dictionaries_
.empty()) {
253 auto it
= dictionaries_
.begin();
254 dictionaries_
.erase(it
->first
);
256 #if defined(OS_CHROMEOS)
257 // For debugging http://crbug.com/454198; remove when resolved.
259 // Explicitly confirm that we can't notify any observers anymore.
260 CHECK(!observers_
.might_have_observers());
264 void SdchManager::ClearData() {
265 blacklisted_domains_
.clear();
266 allow_latency_experiment_
.clear();
267 dictionaries_
.clear();
268 FOR_EACH_OBSERVER(SdchObserver
, observers_
, OnClearDictionaries(this));
272 void SdchManager::SdchErrorRecovery(SdchProblemCode problem
) {
273 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_5", problem
,
274 SDCH_MAX_PROBLEM_CODE
);
278 void SdchManager::EnableSdchSupport(bool enabled
) {
279 g_sdch_enabled_
= enabled
;
283 void SdchManager::EnableSecureSchemeSupport(bool enabled
) {
284 g_secure_scheme_supported_
= enabled
;
287 void SdchManager::BlacklistDomain(const GURL
& url
,
288 SdchProblemCode blacklist_reason
) {
289 SetAllowLatencyExperiment(url
, false);
291 BlacklistInfo
* blacklist_info
=
292 &blacklisted_domains_
[base::StringToLowerASCII(url
.host())];
294 if (blacklist_info
->count
> 0)
295 return; // Domain is already blacklisted.
297 if (blacklist_info
->exponential_count
> (INT_MAX
- 1) / 2) {
298 blacklist_info
->exponential_count
= INT_MAX
;
300 blacklist_info
->exponential_count
=
301 blacklist_info
->exponential_count
* 2 + 1;
304 blacklist_info
->count
= blacklist_info
->exponential_count
;
305 blacklist_info
->reason
= blacklist_reason
;
308 void SdchManager::BlacklistDomainForever(const GURL
& url
,
309 SdchProblemCode blacklist_reason
) {
310 SetAllowLatencyExperiment(url
, false);
312 BlacklistInfo
* blacklist_info
=
313 &blacklisted_domains_
[base::StringToLowerASCII(url
.host())];
314 blacklist_info
->count
= INT_MAX
;
315 blacklist_info
->exponential_count
= INT_MAX
;
316 blacklist_info
->reason
= blacklist_reason
;
319 void SdchManager::ClearBlacklistings() {
320 blacklisted_domains_
.clear();
323 void SdchManager::ClearDomainBlacklisting(const std::string
& domain
) {
324 BlacklistInfo
* blacklist_info
= &blacklisted_domains_
[
325 base::StringToLowerASCII(domain
)];
326 blacklist_info
->count
= 0;
327 blacklist_info
->reason
= SDCH_OK
;
330 int SdchManager::BlackListDomainCount(const std::string
& domain
) {
331 std::string
domain_lower(base::StringToLowerASCII(domain
));
333 if (blacklisted_domains_
.end() == blacklisted_domains_
.find(domain_lower
))
335 return blacklisted_domains_
[domain_lower
].count
;
338 int SdchManager::BlacklistDomainExponential(const std::string
& domain
) {
339 std::string
domain_lower(base::StringToLowerASCII(domain
));
341 if (blacklisted_domains_
.end() == blacklisted_domains_
.find(domain_lower
))
343 return blacklisted_domains_
[domain_lower
].exponential_count
;
346 SdchProblemCode
SdchManager::IsInSupportedDomain(const GURL
& url
) {
347 DCHECK(thread_checker_
.CalledOnValidThread());
348 if (!g_sdch_enabled_
)
349 return SDCH_DISABLED
;
351 if (!secure_scheme_supported() && url
.SchemeIsSecure())
352 return SDCH_SECURE_SCHEME_NOT_SUPPORTED
;
354 if (blacklisted_domains_
.empty())
357 DomainBlacklistInfo::iterator it
=
358 blacklisted_domains_
.find(base::StringToLowerASCII(url
.host()));
359 if (blacklisted_domains_
.end() == it
|| it
->second
.count
== 0)
362 UMA_HISTOGRAM_ENUMERATION("Sdch3.BlacklistReason", it
->second
.reason
,
363 SDCH_MAX_PROBLEM_CODE
);
365 int count
= it
->second
.count
- 1;
367 it
->second
.count
= count
;
369 it
->second
.count
= 0;
370 it
->second
.reason
= SDCH_OK
;
373 return SDCH_DOMAIN_BLACKLIST_INCLUDES_TARGET
;
376 SdchProblemCode
SdchManager::OnGetDictionary(const GURL
& request_url
,
377 const GURL
& dictionary_url
) {
378 DCHECK(thread_checker_
.CalledOnValidThread());
379 SdchProblemCode rv
= CanFetchDictionary(request_url
, dictionary_url
);
383 FOR_EACH_OBSERVER(SdchObserver
,
385 OnGetDictionary(this, request_url
, dictionary_url
));
390 void SdchManager::OnDictionaryUsed(const std::string
& server_hash
) {
391 FOR_EACH_OBSERVER(SdchObserver
, observers_
,
392 OnDictionaryUsed(this, server_hash
));
395 SdchProblemCode
SdchManager::CanFetchDictionary(
396 const GURL
& referring_url
,
397 const GURL
& dictionary_url
) const {
398 DCHECK(thread_checker_
.CalledOnValidThread());
399 /* The user agent may retrieve a dictionary from the dictionary URL if all of
400 the following are true:
401 1 The dictionary URL host name matches the referrer URL host name and
403 2 The dictionary URL host name domain matches the parent domain of the
404 referrer URL host name
405 3 The parent domain of the referrer URL host name is not a top level
408 // Item (1) above implies item (2). Spec should be updated.
409 // I take "host name match" to be "is identical to"
410 if (referring_url
.host() != dictionary_url
.host() ||
411 referring_url
.scheme() != dictionary_url
.scheme())
412 return SDCH_DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST
;
414 if (!secure_scheme_supported() && referring_url
.SchemeIsSecure())
415 return SDCH_DICTIONARY_SELECTED_FOR_SSL
;
417 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
418 // than current SDCH spec when needed, and justified by security audit.
419 if (!referring_url
.SchemeIsHTTPOrHTTPS())
420 return SDCH_DICTIONARY_SELECTED_FROM_NON_HTTP
;
425 scoped_ptr
<SdchManager::DictionarySet
>
426 SdchManager::GetDictionarySet(const GURL
& target_url
) {
427 if (IsInSupportedDomain(target_url
) != SDCH_OK
)
431 scoped_ptr
<SdchManager::DictionarySet
> result(new DictionarySet
);
432 for (const auto& entry
: dictionaries_
) {
433 if (entry
.second
->data
.CanUse(target_url
) != SDCH_OK
)
435 if (entry
.second
->data
.Expired())
438 result
->AddDictionary(entry
.first
, entry
.second
);
444 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count
);
446 return result
.Pass();
449 scoped_ptr
<SdchManager::DictionarySet
>
450 SdchManager::GetDictionarySetByHash(
451 const GURL
& target_url
,
452 const std::string
& server_hash
,
453 SdchProblemCode
* problem_code
) {
454 scoped_ptr
<SdchManager::DictionarySet
> result
;
456 *problem_code
= SDCH_DICTIONARY_HASH_NOT_FOUND
;
457 const auto& it
= dictionaries_
.find(server_hash
);
458 if (it
== dictionaries_
.end())
459 return result
.Pass();
461 *problem_code
= it
->second
->data
.CanUse(target_url
);
462 if (*problem_code
!= SDCH_OK
)
463 return result
.Pass();
465 result
.reset(new DictionarySet
);
466 result
->AddDictionary(it
->first
, it
->second
);
467 return result
.Pass();
471 void SdchManager::GenerateHash(const std::string
& dictionary_text
,
472 std::string
* client_hash
, std::string
* server_hash
) {
473 char binary_hash
[32];
474 crypto::SHA256HashString(dictionary_text
, binary_hash
, sizeof(binary_hash
));
476 std::string
first_48_bits(&binary_hash
[0], 6);
477 std::string
second_48_bits(&binary_hash
[6], 6);
478 UrlSafeBase64Encode(first_48_bits
, client_hash
);
479 UrlSafeBase64Encode(second_48_bits
, server_hash
);
481 DCHECK_EQ(server_hash
->length(), 8u);
482 DCHECK_EQ(client_hash
->length(), 8u);
485 // Methods for supporting latency experiments.
487 bool SdchManager::AllowLatencyExperiment(const GURL
& url
) const {
488 DCHECK(thread_checker_
.CalledOnValidThread());
489 return allow_latency_experiment_
.end() !=
490 allow_latency_experiment_
.find(url
.host());
493 void SdchManager::SetAllowLatencyExperiment(const GURL
& url
, bool enable
) {
494 DCHECK(thread_checker_
.CalledOnValidThread());
496 allow_latency_experiment_
.insert(url
.host());
499 ExperimentSet::iterator it
= allow_latency_experiment_
.find(url
.host());
500 if (allow_latency_experiment_
.end() == it
)
501 return; // It was already erased, or never allowed.
502 SdchErrorRecovery(SDCH_LATENCY_TEST_DISALLOWED
);
503 allow_latency_experiment_
.erase(it
);
506 void SdchManager::AddObserver(SdchObserver
* observer
) {
507 observers_
.AddObserver(observer
);
510 void SdchManager::RemoveObserver(SdchObserver
* observer
) {
511 observers_
.RemoveObserver(observer
);
514 SdchProblemCode
SdchManager::AddSdchDictionary(
515 const std::string
& dictionary_text
,
516 const GURL
& dictionary_url
,
517 std::string
* server_hash_p
) {
518 DCHECK(thread_checker_
.CalledOnValidThread());
519 std::string client_hash
;
520 std::string server_hash
;
521 GenerateHash(dictionary_text
, &client_hash
, &server_hash
);
522 if (dictionaries_
.find(server_hash
) != dictionaries_
.end())
523 return SDCH_DICTIONARY_ALREADY_LOADED
; // Already loaded.
525 std::string domain
, path
;
527 base::Time
expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
529 if (dictionary_text
.empty())
530 return SDCH_DICTIONARY_HAS_NO_TEXT
; // Missing header.
532 size_t header_end
= dictionary_text
.find("\n\n");
533 if (std::string::npos
== header_end
)
534 return SDCH_DICTIONARY_HAS_NO_HEADER
; // Missing header.
536 size_t line_start
= 0; // Start of line being parsed.
538 size_t line_end
= dictionary_text
.find('\n', line_start
);
539 DCHECK(std::string::npos
!= line_end
);
540 DCHECK_LE(line_end
, header_end
);
542 size_t colon_index
= dictionary_text
.find(':', line_start
);
543 if (std::string::npos
== colon_index
)
544 return SDCH_DICTIONARY_HEADER_LINE_MISSING_COLON
; // Illegal line missing
547 if (colon_index
> line_end
)
550 size_t value_start
= dictionary_text
.find_first_not_of(" \t",
552 if (std::string::npos
!= value_start
) {
553 if (value_start
>= line_end
)
555 std::string
name(dictionary_text
, line_start
, colon_index
- line_start
);
556 std::string
value(dictionary_text
, value_start
, line_end
- value_start
);
557 name
= base::StringToLowerASCII(name
);
558 if (name
== "domain") {
560 } else if (name
== "path") {
562 } else if (name
== "format-version") {
564 return SDCH_DICTIONARY_UNSUPPORTED_VERSION
;
565 } else if (name
== "max-age") {
567 base::StringToInt64(value
, &seconds
);
568 expiration
= base::Time::Now() + base::TimeDelta::FromSeconds(seconds
);
569 } else if (name
== "port") {
571 base::StringToInt(value
, &port
);
577 if (line_end
>= header_end
)
579 line_start
= line_end
+ 1;
582 // Narrow fix for http://crbug.com/389451.
583 GURL
dictionary_url_normalized(dictionary_url
);
584 StripTrailingDot(&dictionary_url_normalized
);
586 SdchProblemCode rv
= IsInSupportedDomain(dictionary_url_normalized
);
590 rv
= Dictionary::CanSet(domain
, path
, ports
, dictionary_url_normalized
);
594 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text
.size());
595 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
596 << " and server hash " << server_hash
;
597 Dictionary
dictionary(dictionary_text
, header_end
+ 2, client_hash
,
598 server_hash
, dictionary_url_normalized
, domain
, path
,
600 dictionaries_
[server_hash
] =
601 new base::RefCountedData
<Dictionary
>(dictionary
);
603 *server_hash_p
= server_hash
;
608 SdchProblemCode
SdchManager::RemoveSdchDictionary(
609 const std::string
& server_hash
) {
610 if (dictionaries_
.find(server_hash
) == dictionaries_
.end())
611 return SDCH_DICTIONARY_HASH_NOT_FOUND
;
613 dictionaries_
.erase(server_hash
);
618 scoped_ptr
<SdchManager::DictionarySet
>
619 SdchManager::CreateEmptyDictionarySetForTesting() {
620 return scoped_ptr
<DictionarySet
>(new DictionarySet
).Pass();
623 // For investigation of http://crbug.com/454198; remove when resolved.
624 base::WeakPtr
<SdchManager
> SdchManager::GetWeakPtr() {
625 return factory_
.GetWeakPtr();
629 void SdchManager::UrlSafeBase64Encode(const std::string
& input
,
630 std::string
* output
) {
631 // Since this is only done during a dictionary load, and hashes are only 8
632 // characters, we just do the simple fixup, rather than rewriting the encoder.
633 base::Base64Encode(input
, output
);
634 std::replace(output
->begin(), output
->end(), '+', '-');
635 std::replace(output
->begin(), output
->end(), '/', '_');
638 base::Value
* SdchManager::SdchInfoToValue() const {
639 base::DictionaryValue
* value
= new base::DictionaryValue();
641 value
->SetBoolean("sdch_enabled", sdch_enabled());
642 value
->SetBoolean("secure_scheme_support", secure_scheme_supported());
644 base::ListValue
* entry_list
= new base::ListValue();
645 for (const auto& entry
: dictionaries_
) {
646 base::DictionaryValue
* entry_dict
= new base::DictionaryValue();
647 entry_dict
->SetString("url", entry
.second
->data
.url().spec());
648 entry_dict
->SetString("client_hash", entry
.second
->data
.client_hash());
649 entry_dict
->SetString("domain", entry
.second
->data
.domain());
650 entry_dict
->SetString("path", entry
.second
->data
.path());
651 base::ListValue
* port_list
= new base::ListValue();
652 for (std::set
<int>::const_iterator port_it
=
653 entry
.second
->data
.ports().begin();
654 port_it
!= entry
.second
->data
.ports().end(); ++port_it
) {
655 port_list
->AppendInteger(*port_it
);
657 entry_dict
->Set("ports", port_list
);
658 entry_dict
->SetString("server_hash", entry
.first
);
659 entry_list
->Append(entry_dict
);
661 value
->Set("dictionaries", entry_list
);
663 entry_list
= new base::ListValue();
664 for (DomainBlacklistInfo::const_iterator it
= blacklisted_domains_
.begin();
665 it
!= blacklisted_domains_
.end(); ++it
) {
666 if (it
->second
.count
== 0)
668 base::DictionaryValue
* entry_dict
= new base::DictionaryValue();
669 entry_dict
->SetString("domain", it
->first
);
670 if (it
->second
.count
!= INT_MAX
)
671 entry_dict
->SetInteger("tries", it
->second
.count
);
672 entry_dict
->SetInteger("reason", it
->second
.reason
);
673 entry_list
->Append(entry_dict
);
675 value
->Set("blacklisted", entry_list
);