include <limits> for std::numeric_limits
[chromium-blink-merge.git] / net / base / sdch_manager.cc
blob8b7481b50b582b80829ac0f2b1b928333c1b1a38
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 #include "net/url_request/url_request_http_job.h"
16 namespace net {
18 //------------------------------------------------------------------------------
19 // static
21 // Adjust SDCH limits downwards for mobile.
22 #if defined(OS_ANDROID) || defined(OS_IOS)
23 // static
24 const size_t SdchManager::kMaxDictionaryCount = 1;
25 const size_t SdchManager::kMaxDictionarySize = 150 * 1000;
26 #else
27 // static
28 const size_t SdchManager::kMaxDictionaryCount = 20;
29 const size_t SdchManager::kMaxDictionarySize = 1000 * 1000;
30 #endif
32 // static
33 bool SdchManager::g_sdch_enabled_ = true;
35 // static
36 bool SdchManager::g_secure_scheme_supported_ = false;
38 //------------------------------------------------------------------------------
39 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
40 size_t offset,
41 const std::string& client_hash,
42 const GURL& gurl,
43 const std::string& domain,
44 const std::string& path,
45 const base::Time& expiration,
46 const std::set<int>& ports)
47 : text_(dictionary_text, offset),
48 client_hash_(client_hash),
49 url_(gurl),
50 domain_(domain),
51 path_(path),
52 expiration_(expiration),
53 ports_(ports) {
56 SdchManager::Dictionary::~Dictionary() {
59 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
60 /* The specific rules of when a dictionary should be advertised in an
61 Avail-Dictionary header are modeled after the rules for cookie scoping. The
62 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
63 dictionary may be advertised in the Avail-Dictionaries header exactly when
64 all of the following are true:
65 1. The server's effective host name domain-matches the Domain attribute of
66 the dictionary.
67 2. If the dictionary has a Port attribute, the request port is one of the
68 ports listed in the Port attribute.
69 3. The request URI path-matches the path header of the dictionary.
70 4. The request is not an HTTPS request.
71 We can override (ignore) item (4) only when we have explicitly enabled
72 HTTPS support AND the dictionary acquisition scheme matches the target
73 url scheme.
75 if (!DomainMatch(target_url, domain_))
76 return false;
77 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
78 return false;
79 if (path_.size() && !PathMatch(target_url.path(), path_))
80 return false;
81 if (!SdchManager::secure_scheme_supported() && target_url.SchemeIsSecure())
82 return false;
83 if (target_url.SchemeIsSecure() != url_.SchemeIsSecure())
84 return false;
85 if (base::Time::Now() > expiration_)
86 return false;
87 return true;
90 //------------------------------------------------------------------------------
91 // Security functions restricting loads and use of dictionaries.
93 // static
94 bool SdchManager::Dictionary::CanSet(const std::string& domain,
95 const std::string& path,
96 const std::set<int>& ports,
97 const GURL& dictionary_url) {
99 A dictionary is invalid and must not be stored if any of the following are
100 true:
101 1. The dictionary has no Domain attribute.
102 2. The effective host name that derives from the referer URL host name does
103 not domain-match the Domain attribute.
104 3. The Domain attribute is a top level domain.
105 4. The referer URL host is a host domain name (not IP address) and has the
106 form HD, where D is the value of the Domain attribute, and H is a string
107 that contains one or more dots.
108 5. If the dictionary has a Port attribute and the referer URL's port was not
109 in the list.
112 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
113 // and hence the conservative approach is to not allow any redirects (if there
114 // were any... then don't allow the dictionary to be set).
116 if (domain.empty()) {
117 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
118 return false; // Domain is required.
120 if (registry_controlled_domains::GetDomainAndRegistry(
121 domain,
122 registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES).empty()) {
123 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
124 return false; // domain was a TLD.
126 if (!Dictionary::DomainMatch(dictionary_url, domain)) {
127 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
128 return false;
131 std::string referrer_url_host = dictionary_url.host();
132 size_t postfix_domain_index = referrer_url_host.rfind(domain);
133 // See if it is indeed a postfix, or just an internal string.
134 if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
135 // It is a postfix... so check to see if there's a dot in the prefix.
136 size_t end_of_host_index = referrer_url_host.find_first_of('.');
137 if (referrer_url_host.npos != end_of_host_index &&
138 end_of_host_index < postfix_domain_index) {
139 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
140 return false;
144 if (!ports.empty()
145 && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
146 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
147 return false;
149 return true;
152 // static
153 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
155 1. The request URL's host name domain-matches the Domain attribute of the
156 dictionary.
157 2. If the dictionary has a Port attribute, the request port is one of the
158 ports listed in the Port attribute.
159 3. The request URL path-matches the path attribute of the dictionary.
160 4. The request is not an HTTPS request.
161 We can override (ignore) item (4) only when we have explicitly enabled
162 HTTPS support AND the dictionary acquisition scheme matches the target
163 url scheme.
165 if (!DomainMatch(referring_url, domain_)) {
166 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
167 return false;
169 if (!ports_.empty()
170 && 0 == ports_.count(referring_url.EffectiveIntPort())) {
171 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
172 return false;
174 if (path_.size() && !PathMatch(referring_url.path(), path_)) {
175 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
176 return false;
178 if (!SdchManager::secure_scheme_supported() &&
179 referring_url.SchemeIsSecure()) {
180 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
181 return false;
183 if (referring_url.SchemeIsSecure() != url_.SchemeIsSecure()) {
184 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
185 return false;
188 // TODO(jar): Remove overly restrictive failsafe test (added per security
189 // review) when we have a need to be more general.
190 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
191 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
192 return false;
195 return true;
198 bool SdchManager::Dictionary::PathMatch(const std::string& path,
199 const std::string& restriction) {
200 /* Must be either:
201 1. P2 is equal to P1
202 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
203 character following P2 in P1 is "/".
205 if (path == restriction)
206 return true;
207 size_t prefix_length = restriction.size();
208 if (prefix_length > path.size())
209 return false; // Can't be a prefix.
210 if (0 != path.compare(0, prefix_length, restriction))
211 return false;
212 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
215 // static
216 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
217 const std::string& restriction) {
218 // TODO(jar): This is not precisely a domain match definition.
219 return gurl.DomainIs(restriction.data(), restriction.size());
222 //------------------------------------------------------------------------------
223 SdchManager::SdchManager() {
224 DCHECK(CalledOnValidThread());
227 SdchManager::~SdchManager() {
228 DCHECK(CalledOnValidThread());
229 while (!dictionaries_.empty()) {
230 DictionaryMap::iterator it = dictionaries_.begin();
231 dictionaries_.erase(it->first);
235 void SdchManager::ClearData() {
236 blacklisted_domains_.clear();
237 exponential_blacklist_count_.clear();
238 allow_latency_experiment_.clear();
239 if (fetcher_.get())
240 fetcher_->Cancel();
242 // Note that this may result in not having dictionaries we've advertised
243 // for incoming responses. The window is relatively small (as ClearData()
244 // is not expected to be called frequently), so we rely on meta-refresh
245 // to handle this case.
246 dictionaries_.clear();
249 // static
250 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
251 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
254 void SdchManager::set_sdch_fetcher(SdchFetcher* fetcher) {
255 DCHECK(CalledOnValidThread());
256 fetcher_.reset(fetcher);
259 // static
260 void SdchManager::EnableSdchSupport(bool enabled) {
261 g_sdch_enabled_ = enabled;
264 // static
265 void SdchManager::EnableSecureSchemeSupport(bool enabled) {
266 g_secure_scheme_supported_ = enabled;
269 void SdchManager::BlacklistDomain(const GURL& url) {
270 SetAllowLatencyExperiment(url, false);
272 std::string domain(base::StringToLowerASCII(url.host()));
273 int count = blacklisted_domains_[domain];
274 if (count > 0)
275 return; // Domain is already blacklisted.
277 count = 1 + 2 * exponential_blacklist_count_[domain];
278 if (count > 0)
279 exponential_blacklist_count_[domain] = count;
280 else
281 count = INT_MAX;
283 blacklisted_domains_[domain] = count;
286 void SdchManager::BlacklistDomainForever(const GURL& url) {
287 SetAllowLatencyExperiment(url, false);
289 std::string domain(base::StringToLowerASCII(url.host()));
290 exponential_blacklist_count_[domain] = INT_MAX;
291 blacklisted_domains_[domain] = INT_MAX;
294 void SdchManager::ClearBlacklistings() {
295 blacklisted_domains_.clear();
296 exponential_blacklist_count_.clear();
299 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
300 blacklisted_domains_.erase(base::StringToLowerASCII(domain));
303 int SdchManager::BlackListDomainCount(const std::string& domain) {
304 if (blacklisted_domains_.end() == blacklisted_domains_.find(domain))
305 return 0;
306 return blacklisted_domains_[base::StringToLowerASCII(domain)];
309 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
310 if (exponential_blacklist_count_.end() ==
311 exponential_blacklist_count_.find(domain))
312 return 0;
313 return exponential_blacklist_count_[base::StringToLowerASCII(domain)];
316 bool SdchManager::IsInSupportedDomain(const GURL& url) {
317 DCHECK(CalledOnValidThread());
318 if (!g_sdch_enabled_ )
319 return false;
321 if (!secure_scheme_supported() && url.SchemeIsSecure())
322 return false;
324 if (blacklisted_domains_.empty())
325 return true;
327 std::string domain(base::StringToLowerASCII(url.host()));
328 DomainCounter::iterator it = blacklisted_domains_.find(domain);
329 if (blacklisted_domains_.end() == it)
330 return true;
332 int count = it->second - 1;
333 if (count > 0)
334 blacklisted_domains_[domain] = count;
335 else
336 blacklisted_domains_.erase(domain);
337 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
338 return false;
341 void SdchManager::FetchDictionary(const GURL& request_url,
342 const GURL& dictionary_url) {
343 DCHECK(CalledOnValidThread());
344 if (CanFetchDictionary(request_url, dictionary_url) && fetcher_.get())
345 fetcher_->Schedule(dictionary_url);
348 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
349 const GURL& dictionary_url) const {
350 DCHECK(CalledOnValidThread());
351 /* The user agent may retrieve a dictionary from the dictionary URL if all of
352 the following are true:
353 1 The dictionary URL host name matches the referrer URL host name and
354 scheme.
355 2 The dictionary URL host name domain matches the parent domain of the
356 referrer URL host name
357 3 The parent domain of the referrer URL host name is not a top level
358 domain
359 4 The dictionary URL is not an HTTPS URL.
361 // Item (1) above implies item (2). Spec should be updated.
362 // I take "host name match" to be "is identical to"
363 if (referring_url.host() != dictionary_url.host() ||
364 referring_url.scheme() != dictionary_url.scheme()) {
365 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
366 return false;
368 if (!secure_scheme_supported() && referring_url.SchemeIsSecure()) {
369 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
370 return false;
373 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
374 // than current SDCH spec when needed, and justified by security audit.
375 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
376 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
377 return false;
380 return true;
383 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
384 const GURL& dictionary_url) {
385 DCHECK(CalledOnValidThread());
386 std::string client_hash;
387 std::string server_hash;
388 GenerateHash(dictionary_text, &client_hash, &server_hash);
389 if (dictionaries_.find(server_hash) != dictionaries_.end()) {
390 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
391 return false; // Already loaded.
394 std::string domain, path;
395 std::set<int> ports;
396 base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
398 if (dictionary_text.empty()) {
399 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
400 return false; // Missing header.
403 size_t header_end = dictionary_text.find("\n\n");
404 if (std::string::npos == header_end) {
405 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
406 return false; // Missing header.
408 size_t line_start = 0; // Start of line being parsed.
409 while (1) {
410 size_t line_end = dictionary_text.find('\n', line_start);
411 DCHECK(std::string::npos != line_end);
412 DCHECK_LE(line_end, header_end);
414 size_t colon_index = dictionary_text.find(':', line_start);
415 if (std::string::npos == colon_index) {
416 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
417 return false; // Illegal line missing a colon.
420 if (colon_index > line_end)
421 break;
423 size_t value_start = dictionary_text.find_first_not_of(" \t",
424 colon_index + 1);
425 if (std::string::npos != value_start) {
426 if (value_start >= line_end)
427 break;
428 std::string name(dictionary_text, line_start, colon_index - line_start);
429 std::string value(dictionary_text, value_start, line_end - value_start);
430 name = base::StringToLowerASCII(name);
431 if (name == "domain") {
432 domain = value;
433 } else if (name == "path") {
434 path = value;
435 } else if (name == "format-version") {
436 if (value != "1.0")
437 return false;
438 } else if (name == "max-age") {
439 int64 seconds;
440 base::StringToInt64(value, &seconds);
441 expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
442 } else if (name == "port") {
443 int port;
444 base::StringToInt(value, &port);
445 if (port >= 0)
446 ports.insert(port);
450 if (line_end >= header_end)
451 break;
452 line_start = line_end + 1;
455 if (!IsInSupportedDomain(dictionary_url))
456 return false;
458 if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
459 return false;
461 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
462 // useless dictionaries. We should probably have a cache eviction plan,
463 // instead of just blocking additions. For now, with the spec in flux, it
464 // is probably not worth doing eviction handling.
465 if (kMaxDictionarySize < dictionary_text.size()) {
466 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
467 return false;
469 if (kMaxDictionaryCount <= dictionaries_.size()) {
470 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
471 return false;
474 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
475 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
476 << " and server hash " << server_hash;
477 Dictionary* dictionary =
478 new Dictionary(dictionary_text, header_end + 2, client_hash,
479 dictionary_url, domain, path, expiration, ports);
480 dictionaries_[server_hash] = dictionary;
481 return true;
484 void SdchManager::GetVcdiffDictionary(
485 const std::string& server_hash,
486 const GURL& referring_url,
487 scoped_refptr<Dictionary>* dictionary) {
488 DCHECK(CalledOnValidThread());
489 *dictionary = NULL;
490 DictionaryMap::iterator it = dictionaries_.find(server_hash);
491 if (it == dictionaries_.end()) {
492 return;
494 scoped_refptr<Dictionary> matching_dictionary = it->second;
495 if (!IsInSupportedDomain(referring_url))
496 return;
497 if (!matching_dictionary->CanUse(referring_url))
498 return;
499 *dictionary = matching_dictionary;
502 // TODO(jar): If we have evictions from the dictionaries_, then we need to
503 // change this interface to return a list of reference counted Dictionary
504 // instances that can be used if/when a server specifies one.
505 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
506 std::string* list) {
507 DCHECK(CalledOnValidThread());
508 int count = 0;
509 for (DictionaryMap::iterator it = dictionaries_.begin();
510 it != dictionaries_.end(); ++it) {
511 if (!IsInSupportedDomain(target_url))
512 continue;
513 if (!it->second->CanAdvertise(target_url))
514 continue;
515 ++count;
516 if (!list->empty())
517 list->append(",");
518 list->append(it->second->client_hash());
520 // Watch to see if we have corrupt or numerous dictionaries.
521 if (count > 0)
522 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
525 // static
526 void SdchManager::GenerateHash(const std::string& dictionary_text,
527 std::string* client_hash, std::string* server_hash) {
528 char binary_hash[32];
529 crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
531 std::string first_48_bits(&binary_hash[0], 6);
532 std::string second_48_bits(&binary_hash[6], 6);
533 UrlSafeBase64Encode(first_48_bits, client_hash);
534 UrlSafeBase64Encode(second_48_bits, server_hash);
536 DCHECK_EQ(server_hash->length(), 8u);
537 DCHECK_EQ(client_hash->length(), 8u);
540 //------------------------------------------------------------------------------
541 // Methods for supporting latency experiments.
543 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
544 DCHECK(CalledOnValidThread());
545 return allow_latency_experiment_.end() !=
546 allow_latency_experiment_.find(url.host());
549 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
550 DCHECK(CalledOnValidThread());
551 if (enable) {
552 allow_latency_experiment_.insert(url.host());
553 return;
555 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
556 if (allow_latency_experiment_.end() == it)
557 return; // It was already erased, or never allowed.
558 SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
559 allow_latency_experiment_.erase(it);
562 // static
563 void SdchManager::UrlSafeBase64Encode(const std::string& input,
564 std::string* output) {
565 // Since this is only done during a dictionary load, and hashes are only 8
566 // characters, we just do the simple fixup, rather than rewriting the encoder.
567 base::Base64Encode(input, output);
568 for (size_t i = 0; i < output->size(); ++i) {
569 switch (output->data()[i]) {
570 case '+':
571 (*output)[i] = '-';
572 continue;
573 case '/':
574 (*output)[i] = '_';
575 continue;
576 default:
577 continue;
582 } // namespace net