Valgrind Mac: Remove many obsolete suppressions.
[chromium-blink-merge.git] / net / base / sdch_manager.cc
blob0f31d5cee8ad84e4ea2acacd8c06a52b786847b2
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 #include "net/url_request/url_request_http_job.h"
16 namespace {
18 void StripTrailingDot(GURL* gurl) {
19 std::string host(gurl->host());
21 if (host.empty())
22 return;
24 if (*host.rbegin() != '.')
25 return;
27 host.resize(host.size() - 1);
29 GURL::Replacements replacements;
30 replacements.SetHostStr(host);
31 *gurl = gurl->ReplaceComponents(replacements);
32 return;
35 } // namespace
37 namespace net {
39 //------------------------------------------------------------------------------
40 // static
42 // Adjust SDCH limits downwards for mobile.
43 #if defined(OS_ANDROID) || defined(OS_IOS)
44 // static
45 const size_t SdchManager::kMaxDictionaryCount = 1;
46 const size_t SdchManager::kMaxDictionarySize = 500 * 1000;
47 #else
48 // static
49 const size_t SdchManager::kMaxDictionaryCount = 20;
50 const size_t SdchManager::kMaxDictionarySize = 1000 * 1000;
51 #endif
53 // static
54 bool SdchManager::g_sdch_enabled_ = true;
56 // static
57 bool SdchManager::g_secure_scheme_supported_ = true;
59 //------------------------------------------------------------------------------
60 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
61 size_t offset,
62 const std::string& client_hash,
63 const GURL& gurl,
64 const std::string& domain,
65 const std::string& path,
66 const base::Time& expiration,
67 const std::set<int>& ports)
68 : text_(dictionary_text, offset),
69 client_hash_(client_hash),
70 url_(gurl),
71 domain_(domain),
72 path_(path),
73 expiration_(expiration),
74 ports_(ports) {
77 SdchManager::Dictionary::~Dictionary() {
80 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
81 /* The specific rules of when a dictionary should be advertised in an
82 Avail-Dictionary header are modeled after the rules for cookie scoping. The
83 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
84 dictionary may be advertised in the Avail-Dictionaries header exactly when
85 all of the following are true:
86 1. The server's effective host name domain-matches the Domain attribute of
87 the dictionary.
88 2. If the dictionary has a Port attribute, the request port is one of the
89 ports listed in the Port attribute.
90 3. The request URI path-matches the path header of the dictionary.
91 4. The request is not an HTTPS request.
92 We can override (ignore) item (4) only when we have explicitly enabled
93 HTTPS support AND the dictionary acquisition scheme matches the target
94 url scheme.
96 if (!DomainMatch(target_url, domain_))
97 return false;
98 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
99 return false;
100 if (path_.size() && !PathMatch(target_url.path(), path_))
101 return false;
102 if (!SdchManager::secure_scheme_supported() && target_url.SchemeIsSecure())
103 return false;
104 if (target_url.SchemeIsSecure() != url_.SchemeIsSecure())
105 return false;
106 if (base::Time::Now() > expiration_)
107 return false;
108 return true;
111 //------------------------------------------------------------------------------
112 // Security functions restricting loads and use of dictionaries.
114 // static
115 bool SdchManager::Dictionary::CanSet(const std::string& domain,
116 const std::string& path,
117 const std::set<int>& ports,
118 const GURL& dictionary_url) {
120 A dictionary is invalid and must not be stored if any of the following are
121 true:
122 1. The dictionary has no Domain attribute.
123 2. The effective host name that derives from the referer URL host name does
124 not domain-match the Domain attribute.
125 3. The Domain attribute is a top level domain.
126 4. The referer URL host is a host domain name (not IP address) and has the
127 form HD, where D is the value of the Domain attribute, and H is a string
128 that contains one or more dots.
129 5. If the dictionary has a Port attribute and the referer URL's port was not
130 in the list.
133 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
134 // and hence the conservative approach is to not allow any redirects (if there
135 // were any... then don't allow the dictionary to be set).
137 if (domain.empty()) {
138 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
139 return false; // Domain is required.
141 if (registry_controlled_domains::GetDomainAndRegistry(
142 domain,
143 registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES).empty()) {
144 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
145 return false; // domain was a TLD.
147 if (!Dictionary::DomainMatch(dictionary_url, domain)) {
148 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
149 return false;
152 std::string referrer_url_host = dictionary_url.host();
153 size_t postfix_domain_index = referrer_url_host.rfind(domain);
154 // See if it is indeed a postfix, or just an internal string.
155 if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
156 // It is a postfix... so check to see if there's a dot in the prefix.
157 size_t end_of_host_index = referrer_url_host.find_first_of('.');
158 if (referrer_url_host.npos != end_of_host_index &&
159 end_of_host_index < postfix_domain_index) {
160 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
161 return false;
165 if (!ports.empty()
166 && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
167 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
168 return false;
170 return true;
173 // static
174 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
176 1. The request URL's host name domain-matches the Domain attribute of the
177 dictionary.
178 2. If the dictionary has a Port attribute, the request port is one of the
179 ports listed in the Port attribute.
180 3. The request URL path-matches the path attribute of the dictionary.
181 4. The request is not an HTTPS request.
182 We can override (ignore) item (4) only when we have explicitly enabled
183 HTTPS support AND the dictionary acquisition scheme matches the target
184 url scheme.
186 if (!DomainMatch(referring_url, domain_)) {
187 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
188 return false;
190 if (!ports_.empty()
191 && 0 == ports_.count(referring_url.EffectiveIntPort())) {
192 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
193 return false;
195 if (path_.size() && !PathMatch(referring_url.path(), path_)) {
196 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
197 return false;
199 if (!SdchManager::secure_scheme_supported() &&
200 referring_url.SchemeIsSecure()) {
201 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
202 return false;
204 if (referring_url.SchemeIsSecure() != url_.SchemeIsSecure()) {
205 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
206 return false;
209 // TODO(jar): Remove overly restrictive failsafe test (added per security
210 // review) when we have a need to be more general.
211 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
212 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
213 return false;
216 return true;
219 bool SdchManager::Dictionary::PathMatch(const std::string& path,
220 const std::string& restriction) {
221 /* Must be either:
222 1. P2 is equal to P1
223 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
224 character following P2 in P1 is "/".
226 if (path == restriction)
227 return true;
228 size_t prefix_length = restriction.size();
229 if (prefix_length > path.size())
230 return false; // Can't be a prefix.
231 if (0 != path.compare(0, prefix_length, restriction))
232 return false;
233 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
236 // static
237 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
238 const std::string& restriction) {
239 // TODO(jar): This is not precisely a domain match definition.
240 return gurl.DomainIs(restriction.data(), restriction.size());
243 //------------------------------------------------------------------------------
244 SdchManager::SdchManager()
245 : fetches_count_for_testing_(0) {
246 DCHECK(CalledOnValidThread());
249 SdchManager::~SdchManager() {
250 DCHECK(CalledOnValidThread());
251 while (!dictionaries_.empty()) {
252 DictionaryMap::iterator it = dictionaries_.begin();
253 dictionaries_.erase(it->first);
257 void SdchManager::ClearData() {
258 blacklisted_domains_.clear();
259 allow_latency_experiment_.clear();
260 if (fetcher_.get())
261 fetcher_->Cancel();
263 // Note that this may result in not having dictionaries we've advertised
264 // for incoming responses. The window is relatively small (as ClearData()
265 // is not expected to be called frequently), so we rely on meta-refresh
266 // to handle this case.
267 dictionaries_.clear();
270 // static
271 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
272 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
275 void SdchManager::set_sdch_fetcher(scoped_ptr<SdchFetcher> fetcher) {
276 DCHECK(CalledOnValidThread());
277 fetcher_ = fetcher.Pass();
280 // static
281 void SdchManager::EnableSdchSupport(bool enabled) {
282 g_sdch_enabled_ = enabled;
285 // static
286 void SdchManager::EnableSecureSchemeSupport(bool enabled) {
287 g_secure_scheme_supported_ = enabled;
290 void SdchManager::BlacklistDomain(const GURL& url,
291 ProblemCodes blacklist_reason) {
292 SetAllowLatencyExperiment(url, false);
294 BlacklistInfo* blacklist_info =
295 &blacklisted_domains_[base::StringToLowerASCII(url.host())];
297 if (blacklist_info->count > 0)
298 return; // Domain is already blacklisted.
300 if (blacklist_info->exponential_count > (INT_MAX - 1) / 2) {
301 blacklist_info->exponential_count = INT_MAX;
302 } else {
303 blacklist_info->exponential_count =
304 blacklist_info->exponential_count * 2 + 1;
307 blacklist_info->count = blacklist_info->exponential_count;
308 blacklist_info->reason = blacklist_reason;
311 void SdchManager::BlacklistDomainForever(const GURL& url,
312 ProblemCodes blacklist_reason) {
313 SetAllowLatencyExperiment(url, false);
315 BlacklistInfo* blacklist_info =
316 &blacklisted_domains_[base::StringToLowerASCII(url.host())];
317 blacklist_info->count = INT_MAX;
318 blacklist_info->exponential_count = INT_MAX;
319 blacklist_info->reason = blacklist_reason;
322 void SdchManager::ClearBlacklistings() {
323 blacklisted_domains_.clear();
326 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
327 BlacklistInfo* blacklist_info = &blacklisted_domains_[
328 base::StringToLowerASCII(domain)];
329 blacklist_info->count = 0;
330 blacklist_info->reason = MIN_PROBLEM_CODE;
333 int SdchManager::BlackListDomainCount(const std::string& domain) {
334 std::string domain_lower(base::StringToLowerASCII(domain));
336 if (blacklisted_domains_.end() == blacklisted_domains_.find(domain_lower))
337 return 0;
338 return blacklisted_domains_[domain_lower].count;
341 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
342 std::string domain_lower(base::StringToLowerASCII(domain));
344 if (blacklisted_domains_.end() == blacklisted_domains_.find(domain_lower))
345 return 0;
346 return blacklisted_domains_[domain_lower].exponential_count;
349 bool SdchManager::IsInSupportedDomain(const GURL& url) {
350 DCHECK(CalledOnValidThread());
351 if (!g_sdch_enabled_ )
352 return false;
354 if (!secure_scheme_supported() && url.SchemeIsSecure())
355 return false;
357 if (blacklisted_domains_.empty())
358 return true;
360 DomainBlacklistInfo::iterator it =
361 blacklisted_domains_.find(base::StringToLowerASCII(url.host()));
362 if (blacklisted_domains_.end() == it || it->second.count == 0)
363 return true;
365 UMA_HISTOGRAM_ENUMERATION("Sdch3.BlacklistReason", it->second.reason,
366 MAX_PROBLEM_CODE);
367 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
369 int count = it->second.count - 1;
370 if (count > 0) {
371 it->second.count = count;
372 } else {
373 it->second.count = 0;
374 it->second.reason = MIN_PROBLEM_CODE;
377 return false;
380 void SdchManager::FetchDictionary(const GURL& request_url,
381 const GURL& dictionary_url) {
382 DCHECK(CalledOnValidThread());
383 if (CanFetchDictionary(request_url, dictionary_url) && fetcher_.get()) {
384 ++fetches_count_for_testing_;
385 fetcher_->Schedule(dictionary_url);
389 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
390 const GURL& dictionary_url) const {
391 DCHECK(CalledOnValidThread());
392 /* The user agent may retrieve a dictionary from the dictionary URL if all of
393 the following are true:
394 1 The dictionary URL host name matches the referrer URL host name and
395 scheme.
396 2 The dictionary URL host name domain matches the parent domain of the
397 referrer URL host name
398 3 The parent domain of the referrer URL host name is not a top level
399 domain
400 4 The dictionary URL is not an HTTPS URL.
402 // Item (1) above implies item (2). Spec should be updated.
403 // I take "host name match" to be "is identical to"
404 if (referring_url.host() != dictionary_url.host() ||
405 referring_url.scheme() != dictionary_url.scheme()) {
406 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
407 return false;
409 if (!secure_scheme_supported() && referring_url.SchemeIsSecure()) {
410 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
411 return false;
414 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
415 // than current SDCH spec when needed, and justified by security audit.
416 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
417 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
418 return false;
421 return true;
424 void SdchManager::GetVcdiffDictionary(
425 const std::string& server_hash,
426 const GURL& referring_url,
427 scoped_refptr<Dictionary>* dictionary) {
428 DCHECK(CalledOnValidThread());
429 *dictionary = NULL;
430 DictionaryMap::iterator it = dictionaries_.find(server_hash);
431 if (it == dictionaries_.end()) {
432 return;
434 scoped_refptr<Dictionary> matching_dictionary = it->second;
435 if (!IsInSupportedDomain(referring_url))
436 return;
437 if (!matching_dictionary->CanUse(referring_url))
438 return;
439 *dictionary = matching_dictionary;
442 // TODO(jar): If we have evictions from the dictionaries_, then we need to
443 // change this interface to return a list of reference counted Dictionary
444 // instances that can be used if/when a server specifies one.
445 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
446 std::string* list) {
447 DCHECK(CalledOnValidThread());
448 int count = 0;
449 for (DictionaryMap::iterator it = dictionaries_.begin();
450 it != dictionaries_.end(); ++it) {
451 if (!IsInSupportedDomain(target_url))
452 continue;
453 if (!it->second->CanAdvertise(target_url))
454 continue;
455 ++count;
456 if (!list->empty())
457 list->append(",");
458 list->append(it->second->client_hash());
460 // Watch to see if we have corrupt or numerous dictionaries.
461 if (count > 0)
462 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
465 // static
466 void SdchManager::GenerateHash(const std::string& dictionary_text,
467 std::string* client_hash, std::string* server_hash) {
468 char binary_hash[32];
469 crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
471 std::string first_48_bits(&binary_hash[0], 6);
472 std::string second_48_bits(&binary_hash[6], 6);
473 UrlSafeBase64Encode(first_48_bits, client_hash);
474 UrlSafeBase64Encode(second_48_bits, server_hash);
476 DCHECK_EQ(server_hash->length(), 8u);
477 DCHECK_EQ(client_hash->length(), 8u);
480 //------------------------------------------------------------------------------
481 // Methods for supporting latency experiments.
483 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
484 DCHECK(CalledOnValidThread());
485 return allow_latency_experiment_.end() !=
486 allow_latency_experiment_.find(url.host());
489 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
490 DCHECK(CalledOnValidThread());
491 if (enable) {
492 allow_latency_experiment_.insert(url.host());
493 return;
495 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
496 if (allow_latency_experiment_.end() == it)
497 return; // It was already erased, or never allowed.
498 SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
499 allow_latency_experiment_.erase(it);
502 void SdchManager::AddSdchDictionary(const std::string& dictionary_text,
503 const GURL& dictionary_url) {
504 DCHECK(CalledOnValidThread());
505 std::string client_hash;
506 std::string server_hash;
507 GenerateHash(dictionary_text, &client_hash, &server_hash);
508 if (dictionaries_.find(server_hash) != dictionaries_.end()) {
509 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
510 return; // Already loaded.
513 std::string domain, path;
514 std::set<int> ports;
515 base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
517 if (dictionary_text.empty()) {
518 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
519 return; // Missing header.
522 size_t header_end = dictionary_text.find("\n\n");
523 if (std::string::npos == header_end) {
524 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
525 return; // Missing header.
527 size_t line_start = 0; // Start of line being parsed.
528 while (1) {
529 size_t line_end = dictionary_text.find('\n', line_start);
530 DCHECK(std::string::npos != line_end);
531 DCHECK_LE(line_end, header_end);
533 size_t colon_index = dictionary_text.find(':', line_start);
534 if (std::string::npos == colon_index) {
535 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
536 return; // Illegal line missing a colon.
539 if (colon_index > line_end)
540 break;
542 size_t value_start = dictionary_text.find_first_not_of(" \t",
543 colon_index + 1);
544 if (std::string::npos != value_start) {
545 if (value_start >= line_end)
546 break;
547 std::string name(dictionary_text, line_start, colon_index - line_start);
548 std::string value(dictionary_text, value_start, line_end - value_start);
549 name = base::StringToLowerASCII(name);
550 if (name == "domain") {
551 domain = value;
552 } else if (name == "path") {
553 path = value;
554 } else if (name == "format-version") {
555 if (value != "1.0")
556 return;
557 } else if (name == "max-age") {
558 int64 seconds;
559 base::StringToInt64(value, &seconds);
560 expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
561 } else if (name == "port") {
562 int port;
563 base::StringToInt(value, &port);
564 if (port >= 0)
565 ports.insert(port);
569 if (line_end >= header_end)
570 break;
571 line_start = line_end + 1;
574 // Narrow fix for http://crbug.com/389451.
575 GURL dictionary_url_normalized(dictionary_url);
576 StripTrailingDot(&dictionary_url_normalized);
578 if (!IsInSupportedDomain(dictionary_url_normalized))
579 return;
581 if (!Dictionary::CanSet(domain, path, ports, dictionary_url_normalized))
582 return;
584 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
585 // useless dictionaries. We should probably have a cache eviction plan,
586 // instead of just blocking additions. For now, with the spec in flux, it
587 // is probably not worth doing eviction handling.
588 if (kMaxDictionarySize < dictionary_text.size()) {
589 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
590 return;
592 if (kMaxDictionaryCount <= dictionaries_.size()) {
593 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
594 return;
597 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
598 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
599 << " and server hash " << server_hash;
600 Dictionary* dictionary =
601 new Dictionary(dictionary_text, header_end + 2, client_hash,
602 dictionary_url_normalized, domain,
603 path, expiration, ports);
604 dictionaries_[server_hash] = dictionary;
605 return;
608 // static
609 void SdchManager::UrlSafeBase64Encode(const std::string& input,
610 std::string* output) {
611 // Since this is only done during a dictionary load, and hashes are only 8
612 // characters, we just do the simple fixup, rather than rewriting the encoder.
613 base::Base64Encode(input, output);
614 std::replace(output->begin(), output->end(), '+', '-');
615 std::replace(output->begin(), output->end(), '/', '_');
618 } // namespace net