Clean up MFYI by adding suppressions for new bugs
[chromium-blink-merge.git] / net / base / sdch_manager.cc
blob9e8fd98097865dc8ea08fcf99c69ad6975ec18b6
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 #include "net/url_request/url_request_http_job.h"
16 namespace net {
18 //------------------------------------------------------------------------------
19 // static
20 const size_t SdchManager::kMaxDictionarySize = 1000000;
22 // static
23 const size_t SdchManager::kMaxDictionaryCount = 20;
25 // static
26 SdchManager* SdchManager::global_ = NULL;
28 // static
29 bool SdchManager::g_sdch_enabled_ = true;
31 // static
32 bool SdchManager::g_secure_scheme_supported_ = false;
34 //------------------------------------------------------------------------------
35 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
36 size_t offset,
37 const std::string& client_hash,
38 const GURL& gurl,
39 const std::string& domain,
40 const std::string& path,
41 const base::Time& expiration,
42 const std::set<int>& ports)
43 : text_(dictionary_text, offset),
44 client_hash_(client_hash),
45 url_(gurl),
46 domain_(domain),
47 path_(path),
48 expiration_(expiration),
49 ports_(ports) {
52 SdchManager::Dictionary::~Dictionary() {
55 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
56 if (!SdchManager::Global()->IsInSupportedDomain(target_url))
57 return false;
58 /* The specific rules of when a dictionary should be advertised in an
59 Avail-Dictionary header are modeled after the rules for cookie scoping. The
60 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
61 dictionary may be advertised in the Avail-Dictionaries header exactly when
62 all of the following are true:
63 1. The server's effective host name domain-matches the Domain attribute of
64 the dictionary.
65 2. If the dictionary has a Port attribute, the request port is one of the
66 ports listed in the Port attribute.
67 3. The request URI path-matches the path header of the dictionary.
68 4. The request is not an HTTPS request.
69 We can override (ignore) item (4) only when we have explicitly enabled
70 HTTPS support AND dictionary has been acquired over HTTPS.
72 if (!DomainMatch(target_url, domain_))
73 return false;
74 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
75 return false;
76 if (path_.size() && !PathMatch(target_url.path(), path_))
77 return false;
78 if (!SdchManager::secure_scheme_supported() && target_url.SchemeIsSecure())
79 return false;
80 if (target_url.SchemeIsSecure() && !url_.SchemeIsSecure())
81 return false;
82 if (base::Time::Now() > expiration_)
83 return false;
84 return true;
87 //------------------------------------------------------------------------------
88 // Security functions restricting loads and use of dictionaries.
90 // static
91 bool SdchManager::Dictionary::CanSet(const std::string& domain,
92 const std::string& path,
93 const std::set<int>& ports,
94 const GURL& dictionary_url) {
95 if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url))
96 return false;
98 A dictionary is invalid and must not be stored if any of the following are
99 true:
100 1. The dictionary has no Domain attribute.
101 2. The effective host name that derives from the referer URL host name does
102 not domain-match the Domain attribute.
103 3. The Domain attribute is a top level domain.
104 4. The referer URL host is a host domain name (not IP address) and has the
105 form HD, where D is the value of the Domain attribute, and H is a string
106 that contains one or more dots.
107 5. If the dictionary has a Port attribute and the referer URL's port was not
108 in the list.
111 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
112 // and hence the conservative approach is to not allow any redirects (if there
113 // were any... then don't allow the dictionary to be set).
115 if (domain.empty()) {
116 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
117 return false; // Domain is required.
119 if (registry_controlled_domains::GetDomainAndRegistry(
120 domain,
121 registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES).empty()) {
122 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
123 return false; // domain was a TLD.
125 if (!Dictionary::DomainMatch(dictionary_url, domain)) {
126 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
127 return false;
130 std::string referrer_url_host = dictionary_url.host();
131 size_t postfix_domain_index = referrer_url_host.rfind(domain);
132 // See if it is indeed a postfix, or just an internal string.
133 if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
134 // It is a postfix... so check to see if there's a dot in the prefix.
135 size_t end_of_host_index = referrer_url_host.find_first_of('.');
136 if (referrer_url_host.npos != end_of_host_index &&
137 end_of_host_index < postfix_domain_index) {
138 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
139 return false;
143 if (!ports.empty()
144 && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
145 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
146 return false;
148 return true;
151 // static
152 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
153 if (!SdchManager::Global()->IsInSupportedDomain(referring_url))
154 return false;
156 1. The request URL's host name domain-matches the Domain attribute of the
157 dictionary.
158 2. If the dictionary has a Port attribute, the request port is one of the
159 ports listed in the Port attribute.
160 3. The request URL path-matches the path attribute of the dictionary.
161 4. The request is not an HTTPS request.
162 We can override (ignore) item (4) only when we have explicitly enabled
163 HTTPS support AND dictionary has been acquired over HTTPS.
165 if (!DomainMatch(referring_url, domain_)) {
166 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
167 return false;
169 if (!ports_.empty()
170 && 0 == ports_.count(referring_url.EffectiveIntPort())) {
171 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
172 return false;
174 if (path_.size() && !PathMatch(referring_url.path(), path_)) {
175 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
176 return false;
178 if (!SdchManager::secure_scheme_supported() &&
179 referring_url.SchemeIsSecure()) {
180 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
181 return false;
183 if (referring_url.SchemeIsSecure() && !url_.SchemeIsSecure()) {
184 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
185 return false;
188 // TODO(jar): Remove overly restrictive failsafe test (added per security
189 // review) when we have a need to be more general.
190 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
191 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
192 return false;
195 return true;
198 bool SdchManager::Dictionary::PathMatch(const std::string& path,
199 const std::string& restriction) {
200 /* Must be either:
201 1. P2 is equal to P1
202 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
203 character following P2 in P1 is "/".
205 if (path == restriction)
206 return true;
207 size_t prefix_length = restriction.size();
208 if (prefix_length > path.size())
209 return false; // Can't be a prefix.
210 if (0 != path.compare(0, prefix_length, restriction))
211 return false;
212 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
215 // static
216 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
217 const std::string& restriction) {
218 // TODO(jar): This is not precisely a domain match definition.
219 return gurl.DomainIs(restriction.data(), restriction.size());
222 //------------------------------------------------------------------------------
223 SdchManager::SdchManager() {
224 DCHECK(!global_);
225 DCHECK(CalledOnValidThread());
226 global_ = this;
229 SdchManager::~SdchManager() {
230 DCHECK_EQ(this, global_);
231 DCHECK(CalledOnValidThread());
232 while (!dictionaries_.empty()) {
233 DictionaryMap::iterator it = dictionaries_.begin();
234 it->second->Release();
235 dictionaries_.erase(it->first);
237 global_ = NULL;
240 // static
241 void SdchManager::Shutdown() {
242 EnableSdchSupport(false);
243 if (!global_ )
244 return;
245 global_->set_sdch_fetcher(NULL);
248 // static
249 SdchManager* SdchManager::Global() {
250 return global_;
253 // static
254 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
255 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
258 void SdchManager::set_sdch_fetcher(SdchFetcher* fetcher) {
259 DCHECK(CalledOnValidThread());
260 fetcher_.reset(fetcher);
263 // static
264 void SdchManager::EnableSdchSupport(bool enabled) {
265 g_sdch_enabled_ = enabled;
268 // static
269 void SdchManager::EnableSecureSchemeSupport(bool enabled) {
270 g_secure_scheme_supported_ = enabled;
273 // static
274 void SdchManager::BlacklistDomain(const GURL& url) {
275 if (!global_ )
276 return;
277 global_->SetAllowLatencyExperiment(url, false);
279 std::string domain(StringToLowerASCII(url.host()));
280 int count = global_->blacklisted_domains_[domain];
281 if (count > 0)
282 return; // Domain is already blacklisted.
284 count = 1 + 2 * global_->exponential_blacklist_count[domain];
285 if (count > 0)
286 global_->exponential_blacklist_count[domain] = count;
287 else
288 count = INT_MAX;
290 global_->blacklisted_domains_[domain] = count;
293 // static
294 void SdchManager::BlacklistDomainForever(const GURL& url) {
295 if (!global_ )
296 return;
297 global_->SetAllowLatencyExperiment(url, false);
299 std::string domain(StringToLowerASCII(url.host()));
300 global_->exponential_blacklist_count[domain] = INT_MAX;
301 global_->blacklisted_domains_[domain] = INT_MAX;
304 // static
305 void SdchManager::ClearBlacklistings() {
306 Global()->blacklisted_domains_.clear();
307 Global()->exponential_blacklist_count.clear();
310 // static
311 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
312 Global()->blacklisted_domains_.erase(StringToLowerASCII(domain));
315 // static
316 int SdchManager::BlackListDomainCount(const std::string& domain) {
317 if (Global()->blacklisted_domains_.end() ==
318 Global()->blacklisted_domains_.find(domain))
319 return 0;
320 return Global()->blacklisted_domains_[StringToLowerASCII(domain)];
323 // static
324 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
325 if (Global()->exponential_blacklist_count.end() ==
326 Global()->exponential_blacklist_count.find(domain))
327 return 0;
328 return Global()->exponential_blacklist_count[StringToLowerASCII(domain)];
331 bool SdchManager::IsInSupportedDomain(const GURL& url) {
332 DCHECK(CalledOnValidThread());
333 if (!g_sdch_enabled_ )
334 return false;
336 if (blacklisted_domains_.empty())
337 return true;
339 std::string domain(StringToLowerASCII(url.host()));
340 DomainCounter::iterator it = blacklisted_domains_.find(domain);
341 if (blacklisted_domains_.end() == it)
342 return true;
344 int count = it->second - 1;
345 if (count > 0)
346 blacklisted_domains_[domain] = count;
347 else
348 blacklisted_domains_.erase(domain);
349 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
350 return false;
353 void SdchManager::FetchDictionary(const GURL& request_url,
354 const GURL& dictionary_url) {
355 DCHECK(CalledOnValidThread());
356 if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) &&
357 fetcher_.get())
358 fetcher_->Schedule(dictionary_url);
361 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
362 const GURL& dictionary_url) const {
363 DCHECK(CalledOnValidThread());
364 /* The user agent may retrieve a dictionary from the dictionary URL if all of
365 the following are true:
366 1 The dictionary URL host name matches the referrer URL host name and
367 scheme.
368 2 The dictionary URL host name domain matches the parent domain of the
369 referrer URL host name
370 3 The parent domain of the referrer URL host name is not a top level
371 domain
372 4 The dictionary URL is not an HTTPS URL.
374 // Item (1) above implies item (2). Spec should be updated.
375 // I take "host name match" to be "is identical to"
376 if (referring_url.host() != dictionary_url.host() ||
377 referring_url.scheme() != dictionary_url.scheme()) {
378 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
379 return false;
381 if (!secure_scheme_supported() && referring_url.SchemeIsSecure()) {
382 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
383 return false;
386 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
387 // than current SDCH spec when needed, and justified by security audit.
388 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
389 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
390 return false;
393 return true;
396 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
397 const GURL& dictionary_url) {
398 DCHECK(CalledOnValidThread());
399 std::string client_hash;
400 std::string server_hash;
401 GenerateHash(dictionary_text, &client_hash, &server_hash);
402 if (dictionaries_.find(server_hash) != dictionaries_.end()) {
403 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
404 return false; // Already loaded.
407 std::string domain, path;
408 std::set<int> ports;
409 base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
411 if (dictionary_text.empty()) {
412 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
413 return false; // Missing header.
416 size_t header_end = dictionary_text.find("\n\n");
417 if (std::string::npos == header_end) {
418 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
419 return false; // Missing header.
421 size_t line_start = 0; // Start of line being parsed.
422 while (1) {
423 size_t line_end = dictionary_text.find('\n', line_start);
424 DCHECK(std::string::npos != line_end);
425 DCHECK_LE(line_end, header_end);
427 size_t colon_index = dictionary_text.find(':', line_start);
428 if (std::string::npos == colon_index) {
429 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
430 return false; // Illegal line missing a colon.
433 if (colon_index > line_end)
434 break;
436 size_t value_start = dictionary_text.find_first_not_of(" \t",
437 colon_index + 1);
438 if (std::string::npos != value_start) {
439 if (value_start >= line_end)
440 break;
441 std::string name(dictionary_text, line_start, colon_index - line_start);
442 std::string value(dictionary_text, value_start, line_end - value_start);
443 name = StringToLowerASCII(name);
444 if (name == "domain") {
445 domain = value;
446 } else if (name == "path") {
447 path = value;
448 } else if (name == "format-version") {
449 if (value != "1.0")
450 return false;
451 } else if (name == "max-age") {
452 int64 seconds;
453 base::StringToInt64(value, &seconds);
454 expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
455 } else if (name == "port") {
456 int port;
457 base::StringToInt(value, &port);
458 if (port >= 0)
459 ports.insert(port);
463 if (line_end >= header_end)
464 break;
465 line_start = line_end + 1;
468 if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
469 return false;
471 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
472 // useless dictionaries. We should probably have a cache eviction plan,
473 // instead of just blocking additions. For now, with the spec in flux, it
474 // is probably not worth doing eviction handling.
475 if (kMaxDictionarySize < dictionary_text.size()) {
476 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
477 return false;
479 if (kMaxDictionaryCount <= dictionaries_.size()) {
480 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
481 return false;
484 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
485 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
486 << " and server hash " << server_hash;
487 Dictionary* dictionary =
488 new Dictionary(dictionary_text, header_end + 2, client_hash,
489 dictionary_url, domain, path, expiration, ports);
490 dictionary->AddRef();
491 dictionaries_[server_hash] = dictionary;
492 return true;
495 void SdchManager::GetVcdiffDictionary(const std::string& server_hash,
496 const GURL& referring_url, Dictionary** dictionary) {
497 DCHECK(CalledOnValidThread());
498 *dictionary = NULL;
499 DictionaryMap::iterator it = dictionaries_.find(server_hash);
500 if (it == dictionaries_.end()) {
501 return;
503 Dictionary* matching_dictionary = it->second;
504 if (!matching_dictionary->CanUse(referring_url))
505 return;
506 *dictionary = matching_dictionary;
509 // TODO(jar): If we have evictions from the dictionaries_, then we need to
510 // change this interface to return a list of reference counted Dictionary
511 // instances that can be used if/when a server specifies one.
512 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
513 std::string* list) {
514 DCHECK(CalledOnValidThread());
515 int count = 0;
516 for (DictionaryMap::iterator it = dictionaries_.begin();
517 it != dictionaries_.end(); ++it) {
518 if (!it->second->CanAdvertise(target_url))
519 continue;
520 ++count;
521 if (!list->empty())
522 list->append(",");
523 list->append(it->second->client_hash());
525 // Watch to see if we have corrupt or numerous dictionaries.
526 if (count > 0)
527 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
530 // static
531 void SdchManager::GenerateHash(const std::string& dictionary_text,
532 std::string* client_hash, std::string* server_hash) {
533 char binary_hash[32];
534 crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
536 std::string first_48_bits(&binary_hash[0], 6);
537 std::string second_48_bits(&binary_hash[6], 6);
538 UrlSafeBase64Encode(first_48_bits, client_hash);
539 UrlSafeBase64Encode(second_48_bits, server_hash);
541 DCHECK_EQ(server_hash->length(), 8u);
542 DCHECK_EQ(client_hash->length(), 8u);
545 //------------------------------------------------------------------------------
546 // Methods for supporting latency experiments.
548 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
549 DCHECK(CalledOnValidThread());
550 return allow_latency_experiment_.end() !=
551 allow_latency_experiment_.find(url.host());
554 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
555 DCHECK(CalledOnValidThread());
556 if (enable) {
557 allow_latency_experiment_.insert(url.host());
558 return;
560 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
561 if (allow_latency_experiment_.end() == it)
562 return; // It was already erased, or never allowed.
563 SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
564 allow_latency_experiment_.erase(it);
567 // static
568 void SdchManager::UrlSafeBase64Encode(const std::string& input,
569 std::string* output) {
570 // Since this is only done during a dictionary load, and hashes are only 8
571 // characters, we just do the simple fixup, rather than rewriting the encoder.
572 base::Base64Encode(input, output);
573 for (size_t i = 0; i < output->size(); ++i) {
574 switch (output->data()[i]) {
575 case '+':
576 (*output)[i] = '-';
577 continue;
578 case '/':
579 (*output)[i] = '_';
580 continue;
581 default:
582 continue;
587 } // namespace net