1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 #include "net/url_request/url_request_http_job.h"
18 //------------------------------------------------------------------------------
20 const size_t SdchManager::kMaxDictionarySize
= 1000000;
23 const size_t SdchManager::kMaxDictionaryCount
= 20;
26 SdchManager
* SdchManager::global_
= NULL
;
29 bool SdchManager::g_sdch_enabled_
= true;
31 //------------------------------------------------------------------------------
32 SdchManager::Dictionary::Dictionary(const std::string
& dictionary_text
,
34 const std::string
& client_hash
,
36 const std::string
& domain
,
37 const std::string
& path
,
38 const base::Time
& expiration
,
39 const std::set
<int>& ports
)
40 : text_(dictionary_text
, offset
),
41 client_hash_(client_hash
),
45 expiration_(expiration
),
49 SdchManager::Dictionary::~Dictionary() {
52 bool SdchManager::Dictionary::CanAdvertise(const GURL
& target_url
) {
53 if (!SdchManager::Global()->IsInSupportedDomain(target_url
))
55 /* The specific rules of when a dictionary should be advertised in an
56 Avail-Dictionary header are modeled after the rules for cookie scoping. The
57 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
58 dictionary may be advertised in the Avail-Dictionaries header exactly when
59 all of the following are true:
60 1. The server's effective host name domain-matches the Domain attribute of
62 2. If the dictionary has a Port attribute, the request port is one of the
63 ports listed in the Port attribute.
64 3. The request URI path-matches the path header of the dictionary.
65 4. The request is not an HTTPS request.
67 if (!DomainMatch(target_url
, domain_
))
69 if (!ports_
.empty() && 0 == ports_
.count(target_url
.EffectiveIntPort()))
71 if (path_
.size() && !PathMatch(target_url
.path(), path_
))
73 if (target_url
.SchemeIsSecure())
75 if (base::Time::Now() > expiration_
)
80 //------------------------------------------------------------------------------
81 // Security functions restricting loads and use of dictionaries.
84 bool SdchManager::Dictionary::CanSet(const std::string
& domain
,
85 const std::string
& path
,
86 const std::set
<int>& ports
,
87 const GURL
& dictionary_url
) {
88 if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url
))
91 A dictionary is invalid and must not be stored if any of the following are
93 1. The dictionary has no Domain attribute.
94 2. The effective host name that derives from the referer URL host name does
95 not domain-match the Domain attribute.
96 3. The Domain attribute is a top level domain.
97 4. The referer URL host is a host domain name (not IP address) and has the
98 form HD, where D is the value of the Domain attribute, and H is a string
99 that contains one or more dots.
100 5. If the dictionary has a Port attribute and the referer URL's port was not
104 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
105 // and hence the conservative approach is to not allow any redirects (if there
106 // were any... then don't allow the dictionary to be set).
108 if (domain
.empty()) {
109 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER
);
110 return false; // Domain is required.
112 if (registry_controlled_domains::GetDomainAndRegistry(
114 registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES
).empty()) {
115 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN
);
116 return false; // domain was a TLD.
118 if (!Dictionary::DomainMatch(dictionary_url
, domain
)) {
119 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL
);
123 std::string referrer_url_host
= dictionary_url
.host();
124 size_t postfix_domain_index
= referrer_url_host
.rfind(domain
);
125 // See if it is indeed a postfix, or just an internal string.
126 if (referrer_url_host
.size() == postfix_domain_index
+ domain
.size()) {
127 // It is a postfix... so check to see if there's a dot in the prefix.
128 size_t end_of_host_index
= referrer_url_host
.find_first_of('.');
129 if (referrer_url_host
.npos
!= end_of_host_index
&&
130 end_of_host_index
< postfix_domain_index
) {
131 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX
);
137 && 0 == ports
.count(dictionary_url
.EffectiveIntPort())) {
138 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL
);
145 bool SdchManager::Dictionary::CanUse(const GURL
& referring_url
) {
146 if (!SdchManager::Global()->IsInSupportedDomain(referring_url
))
149 1. The request URL's host name domain-matches the Domain attribute of the
151 2. If the dictionary has a Port attribute, the request port is one of the
152 ports listed in the Port attribute.
153 3. The request URL path-matches the path attribute of the dictionary.
154 4. The request is not an HTTPS request.
156 if (!DomainMatch(referring_url
, domain_
)) {
157 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN
);
161 && 0 == ports_
.count(referring_url
.EffectiveIntPort())) {
162 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST
);
165 if (path_
.size() && !PathMatch(referring_url
.path(), path_
)) {
166 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH
);
169 if (referring_url
.SchemeIsSecure()) {
170 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME
);
174 // TODO(jar): Remove overly restrictive failsafe test (added per security
175 // review) when we have a need to be more general.
176 if (!referring_url
.SchemeIs("http")) {
177 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA
);
184 bool SdchManager::Dictionary::PathMatch(const std::string
& path
,
185 const std::string
& restriction
) {
188 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
189 character following P2 in P1 is "/".
191 if (path
== restriction
)
193 size_t prefix_length
= restriction
.size();
194 if (prefix_length
> path
.size())
195 return false; // Can't be a prefix.
196 if (0 != path
.compare(0, prefix_length
, restriction
))
198 return restriction
[prefix_length
- 1] == '/' || path
[prefix_length
] == '/';
202 bool SdchManager::Dictionary::DomainMatch(const GURL
& gurl
,
203 const std::string
& restriction
) {
204 // TODO(jar): This is not precisely a domain match definition.
205 return gurl
.DomainIs(restriction
.data(), restriction
.size());
208 //------------------------------------------------------------------------------
209 SdchManager::SdchManager() {
211 DCHECK(CalledOnValidThread());
215 SdchManager::~SdchManager() {
216 DCHECK_EQ(this, global_
);
217 DCHECK(CalledOnValidThread());
218 while (!dictionaries_
.empty()) {
219 DictionaryMap::iterator it
= dictionaries_
.begin();
220 it
->second
->Release();
221 dictionaries_
.erase(it
->first
);
227 void SdchManager::Shutdown() {
228 EnableSdchSupport(false);
231 global_
->set_sdch_fetcher(NULL
);
235 SdchManager
* SdchManager::Global() {
240 void SdchManager::SdchErrorRecovery(ProblemCodes problem
) {
241 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem
, MAX_PROBLEM_CODE
);
244 void SdchManager::set_sdch_fetcher(SdchFetcher
* fetcher
) {
245 DCHECK(CalledOnValidThread());
246 fetcher_
.reset(fetcher
);
250 void SdchManager::EnableSdchSupport(bool enabled
) {
251 g_sdch_enabled_
= enabled
;
255 void SdchManager::BlacklistDomain(const GURL
& url
) {
258 global_
->SetAllowLatencyExperiment(url
, false);
260 std::string
domain(StringToLowerASCII(url
.host()));
261 int count
= global_
->blacklisted_domains_
[domain
];
263 return; // Domain is already blacklisted.
265 count
= 1 + 2 * global_
->exponential_blacklist_count
[domain
];
267 global_
->exponential_blacklist_count
[domain
] = count
;
271 global_
->blacklisted_domains_
[domain
] = count
;
275 void SdchManager::BlacklistDomainForever(const GURL
& url
) {
278 global_
->SetAllowLatencyExperiment(url
, false);
280 std::string
domain(StringToLowerASCII(url
.host()));
281 global_
->exponential_blacklist_count
[domain
] = INT_MAX
;
282 global_
->blacklisted_domains_
[domain
] = INT_MAX
;
286 void SdchManager::ClearBlacklistings() {
287 Global()->blacklisted_domains_
.clear();
288 Global()->exponential_blacklist_count
.clear();
292 void SdchManager::ClearDomainBlacklisting(const std::string
& domain
) {
293 Global()->blacklisted_domains_
.erase(StringToLowerASCII(domain
));
297 int SdchManager::BlackListDomainCount(const std::string
& domain
) {
298 if (Global()->blacklisted_domains_
.end() ==
299 Global()->blacklisted_domains_
.find(domain
))
301 return Global()->blacklisted_domains_
[StringToLowerASCII(domain
)];
305 int SdchManager::BlacklistDomainExponential(const std::string
& domain
) {
306 if (Global()->exponential_blacklist_count
.end() ==
307 Global()->exponential_blacklist_count
.find(domain
))
309 return Global()->exponential_blacklist_count
[StringToLowerASCII(domain
)];
312 bool SdchManager::IsInSupportedDomain(const GURL
& url
) {
313 DCHECK(CalledOnValidThread());
314 if (!g_sdch_enabled_
)
317 if (blacklisted_domains_
.empty())
320 std::string
domain(StringToLowerASCII(url
.host()));
321 DomainCounter::iterator it
= blacklisted_domains_
.find(domain
);
322 if (blacklisted_domains_
.end() == it
)
325 int count
= it
->second
- 1;
327 blacklisted_domains_
[domain
] = count
;
329 blacklisted_domains_
.erase(domain
);
330 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET
);
334 void SdchManager::FetchDictionary(const GURL
& request_url
,
335 const GURL
& dictionary_url
) {
336 DCHECK(CalledOnValidThread());
337 if (SdchManager::Global()->CanFetchDictionary(request_url
, dictionary_url
) &&
339 fetcher_
->Schedule(dictionary_url
);
342 bool SdchManager::CanFetchDictionary(const GURL
& referring_url
,
343 const GURL
& dictionary_url
) const {
344 DCHECK(CalledOnValidThread());
345 /* The user agent may retrieve a dictionary from the dictionary URL if all of
346 the following are true:
347 1 The dictionary URL host name matches the referrer URL host name
348 2 The dictionary URL host name domain matches the parent domain of the
349 referrer URL host name
350 3 The parent domain of the referrer URL host name is not a top level
352 4 The dictionary URL is not an HTTPS URL.
354 // Item (1) above implies item (2). Spec should be updated.
355 // I take "host name match" to be "is identical to"
356 if (referring_url
.host() != dictionary_url
.host()) {
357 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST
);
360 if (referring_url
.SchemeIs("https")) {
361 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL
);
365 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
366 // than current SDCH spec when needed, and justified by security audit.
367 if (!referring_url
.SchemeIs("http")) {
368 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP
);
375 bool SdchManager::AddSdchDictionary(const std::string
& dictionary_text
,
376 const GURL
& dictionary_url
) {
377 DCHECK(CalledOnValidThread());
378 std::string client_hash
;
379 std::string server_hash
;
380 GenerateHash(dictionary_text
, &client_hash
, &server_hash
);
381 if (dictionaries_
.find(server_hash
) != dictionaries_
.end()) {
382 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED
);
383 return false; // Already loaded.
386 std::string domain
, path
;
388 base::Time
expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
390 if (dictionary_text
.empty()) {
391 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT
);
392 return false; // Missing header.
395 size_t header_end
= dictionary_text
.find("\n\n");
396 if (std::string::npos
== header_end
) {
397 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER
);
398 return false; // Missing header.
400 size_t line_start
= 0; // Start of line being parsed.
402 size_t line_end
= dictionary_text
.find('\n', line_start
);
403 DCHECK(std::string::npos
!= line_end
);
404 DCHECK_LE(line_end
, header_end
);
406 size_t colon_index
= dictionary_text
.find(':', line_start
);
407 if (std::string::npos
== colon_index
) {
408 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON
);
409 return false; // Illegal line missing a colon.
412 if (colon_index
> line_end
)
415 size_t value_start
= dictionary_text
.find_first_not_of(" \t",
417 if (std::string::npos
!= value_start
) {
418 if (value_start
>= line_end
)
420 std::string
name(dictionary_text
, line_start
, colon_index
- line_start
);
421 std::string
value(dictionary_text
, value_start
, line_end
- value_start
);
422 name
= StringToLowerASCII(name
);
423 if (name
== "domain") {
425 } else if (name
== "path") {
427 } else if (name
== "format-version") {
430 } else if (name
== "max-age") {
432 base::StringToInt64(value
, &seconds
);
433 expiration
= base::Time::Now() + base::TimeDelta::FromSeconds(seconds
);
434 } else if (name
== "port") {
436 base::StringToInt(value
, &port
);
442 if (line_end
>= header_end
)
444 line_start
= line_end
+ 1;
447 if (!Dictionary::CanSet(domain
, path
, ports
, dictionary_url
))
450 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
451 // useless dictionaries. We should probably have a cache eviction plan,
452 // instead of just blocking additions. For now, with the spec in flux, it
453 // is probably not worth doing eviction handling.
454 if (kMaxDictionarySize
< dictionary_text
.size()) {
455 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE
);
458 if (kMaxDictionaryCount
<= dictionaries_
.size()) {
459 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED
);
463 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text
.size());
464 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
465 << " and server hash " << server_hash
;
466 Dictionary
* dictionary
=
467 new Dictionary(dictionary_text
, header_end
+ 2, client_hash
,
468 dictionary_url
, domain
, path
, expiration
, ports
);
469 dictionary
->AddRef();
470 dictionaries_
[server_hash
] = dictionary
;
474 void SdchManager::GetVcdiffDictionary(const std::string
& server_hash
,
475 const GURL
& referring_url
, Dictionary
** dictionary
) {
476 DCHECK(CalledOnValidThread());
478 DictionaryMap::iterator it
= dictionaries_
.find(server_hash
);
479 if (it
== dictionaries_
.end()) {
482 Dictionary
* matching_dictionary
= it
->second
;
483 if (!matching_dictionary
->CanUse(referring_url
))
485 *dictionary
= matching_dictionary
;
488 // TODO(jar): If we have evictions from the dictionaries_, then we need to
489 // change this interface to return a list of reference counted Dictionary
490 // instances that can be used if/when a server specifies one.
491 void SdchManager::GetAvailDictionaryList(const GURL
& target_url
,
493 DCHECK(CalledOnValidThread());
495 for (DictionaryMap::iterator it
= dictionaries_
.begin();
496 it
!= dictionaries_
.end(); ++it
) {
497 if (!it
->second
->CanAdvertise(target_url
))
502 list
->append(it
->second
->client_hash());
504 // Watch to see if we have corrupt or numerous dictionaries.
506 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count
);
510 void SdchManager::GenerateHash(const std::string
& dictionary_text
,
511 std::string
* client_hash
, std::string
* server_hash
) {
512 char binary_hash
[32];
513 crypto::SHA256HashString(dictionary_text
, binary_hash
, sizeof(binary_hash
));
515 std::string
first_48_bits(&binary_hash
[0], 6);
516 std::string
second_48_bits(&binary_hash
[6], 6);
517 UrlSafeBase64Encode(first_48_bits
, client_hash
);
518 UrlSafeBase64Encode(second_48_bits
, server_hash
);
520 DCHECK_EQ(server_hash
->length(), 8u);
521 DCHECK_EQ(client_hash
->length(), 8u);
524 //------------------------------------------------------------------------------
525 // Methods for supporting latency experiments.
527 bool SdchManager::AllowLatencyExperiment(const GURL
& url
) const {
528 DCHECK(CalledOnValidThread());
529 return allow_latency_experiment_
.end() !=
530 allow_latency_experiment_
.find(url
.host());
533 void SdchManager::SetAllowLatencyExperiment(const GURL
& url
, bool enable
) {
534 DCHECK(CalledOnValidThread());
536 allow_latency_experiment_
.insert(url
.host());
539 ExperimentSet::iterator it
= allow_latency_experiment_
.find(url
.host());
540 if (allow_latency_experiment_
.end() == it
)
541 return; // It was already erased, or never allowed.
542 SdchErrorRecovery(LATENCY_TEST_DISALLOWED
);
543 allow_latency_experiment_
.erase(it
);
547 void SdchManager::UrlSafeBase64Encode(const std::string
& input
,
548 std::string
* output
) {
549 // Since this is only done during a dictionary load, and hashes are only 8
550 // characters, we just do the simple fixup, rather than rewriting the encoder.
551 base::Base64Encode(input
, output
);
552 for (size_t i
= 0; i
< output
->size(); ++i
) {
553 switch (output
->data()[i
]) {