file_manager: Fix a bug where hosted documents could not be opened without active...
[chromium-blink-merge.git] / net / base / sdch_manager.cc
blob17883b11613e339f557e588729916cdaa65311fa
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 #include "net/url_request/url_request_http_job.h"
16 namespace net {
18 //------------------------------------------------------------------------------
19 // static
20 const size_t SdchManager::kMaxDictionarySize = 1000000;
22 // static
23 const size_t SdchManager::kMaxDictionaryCount = 20;
25 // static
26 SdchManager* SdchManager::global_ = NULL;
28 // static
29 bool SdchManager::g_sdch_enabled_ = true;
31 //------------------------------------------------------------------------------
32 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
33 size_t offset,
34 const std::string& client_hash,
35 const GURL& gurl,
36 const std::string& domain,
37 const std::string& path,
38 const base::Time& expiration,
39 const std::set<int>& ports)
40 : text_(dictionary_text, offset),
41 client_hash_(client_hash),
42 url_(gurl),
43 domain_(domain),
44 path_(path),
45 expiration_(expiration),
46 ports_(ports) {
49 SdchManager::Dictionary::~Dictionary() {
52 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
53 if (!SdchManager::Global()->IsInSupportedDomain(target_url))
54 return false;
55 /* The specific rules of when a dictionary should be advertised in an
56 Avail-Dictionary header are modeled after the rules for cookie scoping. The
57 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
58 dictionary may be advertised in the Avail-Dictionaries header exactly when
59 all of the following are true:
60 1. The server's effective host name domain-matches the Domain attribute of
61 the dictionary.
62 2. If the dictionary has a Port attribute, the request port is one of the
63 ports listed in the Port attribute.
64 3. The request URI path-matches the path header of the dictionary.
65 4. The request is not an HTTPS request.
67 if (!DomainMatch(target_url, domain_))
68 return false;
69 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
70 return false;
71 if (path_.size() && !PathMatch(target_url.path(), path_))
72 return false;
73 if (target_url.SchemeIsSecure())
74 return false;
75 if (base::Time::Now() > expiration_)
76 return false;
77 return true;
80 //------------------------------------------------------------------------------
81 // Security functions restricting loads and use of dictionaries.
83 // static
84 bool SdchManager::Dictionary::CanSet(const std::string& domain,
85 const std::string& path,
86 const std::set<int>& ports,
87 const GURL& dictionary_url) {
88 if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url))
89 return false;
91 A dictionary is invalid and must not be stored if any of the following are
92 true:
93 1. The dictionary has no Domain attribute.
94 2. The effective host name that derives from the referer URL host name does
95 not domain-match the Domain attribute.
96 3. The Domain attribute is a top level domain.
97 4. The referer URL host is a host domain name (not IP address) and has the
98 form HD, where D is the value of the Domain attribute, and H is a string
99 that contains one or more dots.
100 5. If the dictionary has a Port attribute and the referer URL's port was not
101 in the list.
104 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
105 // and hence the conservative approach is to not allow any redirects (if there
106 // were any... then don't allow the dictionary to be set).
108 if (domain.empty()) {
109 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
110 return false; // Domain is required.
112 if (registry_controlled_domains::GetDomainAndRegistry(
113 domain,
114 registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES).empty()) {
115 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
116 return false; // domain was a TLD.
118 if (!Dictionary::DomainMatch(dictionary_url, domain)) {
119 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
120 return false;
123 std::string referrer_url_host = dictionary_url.host();
124 size_t postfix_domain_index = referrer_url_host.rfind(domain);
125 // See if it is indeed a postfix, or just an internal string.
126 if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
127 // It is a postfix... so check to see if there's a dot in the prefix.
128 size_t end_of_host_index = referrer_url_host.find_first_of('.');
129 if (referrer_url_host.npos != end_of_host_index &&
130 end_of_host_index < postfix_domain_index) {
131 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
132 return false;
136 if (!ports.empty()
137 && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
138 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
139 return false;
141 return true;
144 // static
145 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
146 if (!SdchManager::Global()->IsInSupportedDomain(referring_url))
147 return false;
149 1. The request URL's host name domain-matches the Domain attribute of the
150 dictionary.
151 2. If the dictionary has a Port attribute, the request port is one of the
152 ports listed in the Port attribute.
153 3. The request URL path-matches the path attribute of the dictionary.
154 4. The request is not an HTTPS request.
156 if (!DomainMatch(referring_url, domain_)) {
157 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
158 return false;
160 if (!ports_.empty()
161 && 0 == ports_.count(referring_url.EffectiveIntPort())) {
162 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
163 return false;
165 if (path_.size() && !PathMatch(referring_url.path(), path_)) {
166 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
167 return false;
169 if (referring_url.SchemeIsSecure()) {
170 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
171 return false;
174 // TODO(jar): Remove overly restrictive failsafe test (added per security
175 // review) when we have a need to be more general.
176 if (!referring_url.SchemeIs("http")) {
177 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
178 return false;
181 return true;
184 bool SdchManager::Dictionary::PathMatch(const std::string& path,
185 const std::string& restriction) {
186 /* Must be either:
187 1. P2 is equal to P1
188 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
189 character following P2 in P1 is "/".
191 if (path == restriction)
192 return true;
193 size_t prefix_length = restriction.size();
194 if (prefix_length > path.size())
195 return false; // Can't be a prefix.
196 if (0 != path.compare(0, prefix_length, restriction))
197 return false;
198 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
201 // static
202 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
203 const std::string& restriction) {
204 // TODO(jar): This is not precisely a domain match definition.
205 return gurl.DomainIs(restriction.data(), restriction.size());
208 //------------------------------------------------------------------------------
209 SdchManager::SdchManager() {
210 DCHECK(!global_);
211 DCHECK(CalledOnValidThread());
212 global_ = this;
215 SdchManager::~SdchManager() {
216 DCHECK_EQ(this, global_);
217 DCHECK(CalledOnValidThread());
218 while (!dictionaries_.empty()) {
219 DictionaryMap::iterator it = dictionaries_.begin();
220 it->second->Release();
221 dictionaries_.erase(it->first);
223 global_ = NULL;
226 // static
227 void SdchManager::Shutdown() {
228 EnableSdchSupport(false);
229 if (!global_ )
230 return;
231 global_->set_sdch_fetcher(NULL);
234 // static
235 SdchManager* SdchManager::Global() {
236 return global_;
239 // static
240 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
241 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
244 void SdchManager::set_sdch_fetcher(SdchFetcher* fetcher) {
245 DCHECK(CalledOnValidThread());
246 fetcher_.reset(fetcher);
249 // static
250 void SdchManager::EnableSdchSupport(bool enabled) {
251 g_sdch_enabled_ = enabled;
254 // static
255 void SdchManager::BlacklistDomain(const GURL& url) {
256 if (!global_ )
257 return;
258 global_->SetAllowLatencyExperiment(url, false);
260 std::string domain(StringToLowerASCII(url.host()));
261 int count = global_->blacklisted_domains_[domain];
262 if (count > 0)
263 return; // Domain is already blacklisted.
265 count = 1 + 2 * global_->exponential_blacklist_count[domain];
266 if (count > 0)
267 global_->exponential_blacklist_count[domain] = count;
268 else
269 count = INT_MAX;
271 global_->blacklisted_domains_[domain] = count;
274 // static
275 void SdchManager::BlacklistDomainForever(const GURL& url) {
276 if (!global_ )
277 return;
278 global_->SetAllowLatencyExperiment(url, false);
280 std::string domain(StringToLowerASCII(url.host()));
281 global_->exponential_blacklist_count[domain] = INT_MAX;
282 global_->blacklisted_domains_[domain] = INT_MAX;
285 // static
286 void SdchManager::ClearBlacklistings() {
287 Global()->blacklisted_domains_.clear();
288 Global()->exponential_blacklist_count.clear();
291 // static
292 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
293 Global()->blacklisted_domains_.erase(StringToLowerASCII(domain));
296 // static
297 int SdchManager::BlackListDomainCount(const std::string& domain) {
298 if (Global()->blacklisted_domains_.end() ==
299 Global()->blacklisted_domains_.find(domain))
300 return 0;
301 return Global()->blacklisted_domains_[StringToLowerASCII(domain)];
304 // static
305 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
306 if (Global()->exponential_blacklist_count.end() ==
307 Global()->exponential_blacklist_count.find(domain))
308 return 0;
309 return Global()->exponential_blacklist_count[StringToLowerASCII(domain)];
312 bool SdchManager::IsInSupportedDomain(const GURL& url) {
313 DCHECK(CalledOnValidThread());
314 if (!g_sdch_enabled_ )
315 return false;
317 if (blacklisted_domains_.empty())
318 return true;
320 std::string domain(StringToLowerASCII(url.host()));
321 DomainCounter::iterator it = blacklisted_domains_.find(domain);
322 if (blacklisted_domains_.end() == it)
323 return true;
325 int count = it->second - 1;
326 if (count > 0)
327 blacklisted_domains_[domain] = count;
328 else
329 blacklisted_domains_.erase(domain);
330 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
331 return false;
334 void SdchManager::FetchDictionary(const GURL& request_url,
335 const GURL& dictionary_url) {
336 DCHECK(CalledOnValidThread());
337 if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) &&
338 fetcher_.get())
339 fetcher_->Schedule(dictionary_url);
342 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
343 const GURL& dictionary_url) const {
344 DCHECK(CalledOnValidThread());
345 /* The user agent may retrieve a dictionary from the dictionary URL if all of
346 the following are true:
347 1 The dictionary URL host name matches the referrer URL host name
348 2 The dictionary URL host name domain matches the parent domain of the
349 referrer URL host name
350 3 The parent domain of the referrer URL host name is not a top level
351 domain
352 4 The dictionary URL is not an HTTPS URL.
354 // Item (1) above implies item (2). Spec should be updated.
355 // I take "host name match" to be "is identical to"
356 if (referring_url.host() != dictionary_url.host()) {
357 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
358 return false;
360 if (referring_url.SchemeIs("https")) {
361 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
362 return false;
365 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
366 // than current SDCH spec when needed, and justified by security audit.
367 if (!referring_url.SchemeIs("http")) {
368 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
369 return false;
372 return true;
375 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
376 const GURL& dictionary_url) {
377 DCHECK(CalledOnValidThread());
378 std::string client_hash;
379 std::string server_hash;
380 GenerateHash(dictionary_text, &client_hash, &server_hash);
381 if (dictionaries_.find(server_hash) != dictionaries_.end()) {
382 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
383 return false; // Already loaded.
386 std::string domain, path;
387 std::set<int> ports;
388 base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
390 if (dictionary_text.empty()) {
391 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
392 return false; // Missing header.
395 size_t header_end = dictionary_text.find("\n\n");
396 if (std::string::npos == header_end) {
397 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
398 return false; // Missing header.
400 size_t line_start = 0; // Start of line being parsed.
401 while (1) {
402 size_t line_end = dictionary_text.find('\n', line_start);
403 DCHECK(std::string::npos != line_end);
404 DCHECK_LE(line_end, header_end);
406 size_t colon_index = dictionary_text.find(':', line_start);
407 if (std::string::npos == colon_index) {
408 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
409 return false; // Illegal line missing a colon.
412 if (colon_index > line_end)
413 break;
415 size_t value_start = dictionary_text.find_first_not_of(" \t",
416 colon_index + 1);
417 if (std::string::npos != value_start) {
418 if (value_start >= line_end)
419 break;
420 std::string name(dictionary_text, line_start, colon_index - line_start);
421 std::string value(dictionary_text, value_start, line_end - value_start);
422 name = StringToLowerASCII(name);
423 if (name == "domain") {
424 domain = value;
425 } else if (name == "path") {
426 path = value;
427 } else if (name == "format-version") {
428 if (value != "1.0")
429 return false;
430 } else if (name == "max-age") {
431 int64 seconds;
432 base::StringToInt64(value, &seconds);
433 expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
434 } else if (name == "port") {
435 int port;
436 base::StringToInt(value, &port);
437 if (port >= 0)
438 ports.insert(port);
442 if (line_end >= header_end)
443 break;
444 line_start = line_end + 1;
447 if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
448 return false;
450 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
451 // useless dictionaries. We should probably have a cache eviction plan,
452 // instead of just blocking additions. For now, with the spec in flux, it
453 // is probably not worth doing eviction handling.
454 if (kMaxDictionarySize < dictionary_text.size()) {
455 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
456 return false;
458 if (kMaxDictionaryCount <= dictionaries_.size()) {
459 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
460 return false;
463 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
464 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
465 << " and server hash " << server_hash;
466 Dictionary* dictionary =
467 new Dictionary(dictionary_text, header_end + 2, client_hash,
468 dictionary_url, domain, path, expiration, ports);
469 dictionary->AddRef();
470 dictionaries_[server_hash] = dictionary;
471 return true;
474 void SdchManager::GetVcdiffDictionary(const std::string& server_hash,
475 const GURL& referring_url, Dictionary** dictionary) {
476 DCHECK(CalledOnValidThread());
477 *dictionary = NULL;
478 DictionaryMap::iterator it = dictionaries_.find(server_hash);
479 if (it == dictionaries_.end()) {
480 return;
482 Dictionary* matching_dictionary = it->second;
483 if (!matching_dictionary->CanUse(referring_url))
484 return;
485 *dictionary = matching_dictionary;
488 // TODO(jar): If we have evictions from the dictionaries_, then we need to
489 // change this interface to return a list of reference counted Dictionary
490 // instances that can be used if/when a server specifies one.
491 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
492 std::string* list) {
493 DCHECK(CalledOnValidThread());
494 int count = 0;
495 for (DictionaryMap::iterator it = dictionaries_.begin();
496 it != dictionaries_.end(); ++it) {
497 if (!it->second->CanAdvertise(target_url))
498 continue;
499 ++count;
500 if (!list->empty())
501 list->append(",");
502 list->append(it->second->client_hash());
504 // Watch to see if we have corrupt or numerous dictionaries.
505 if (count > 0)
506 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
509 // static
510 void SdchManager::GenerateHash(const std::string& dictionary_text,
511 std::string* client_hash, std::string* server_hash) {
512 char binary_hash[32];
513 crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
515 std::string first_48_bits(&binary_hash[0], 6);
516 std::string second_48_bits(&binary_hash[6], 6);
517 UrlSafeBase64Encode(first_48_bits, client_hash);
518 UrlSafeBase64Encode(second_48_bits, server_hash);
520 DCHECK_EQ(server_hash->length(), 8u);
521 DCHECK_EQ(client_hash->length(), 8u);
524 //------------------------------------------------------------------------------
525 // Methods for supporting latency experiments.
527 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
528 DCHECK(CalledOnValidThread());
529 return allow_latency_experiment_.end() !=
530 allow_latency_experiment_.find(url.host());
533 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
534 DCHECK(CalledOnValidThread());
535 if (enable) {
536 allow_latency_experiment_.insert(url.host());
537 return;
539 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
540 if (allow_latency_experiment_.end() == it)
541 return; // It was already erased, or never allowed.
542 SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
543 allow_latency_experiment_.erase(it);
546 // static
547 void SdchManager::UrlSafeBase64Encode(const std::string& input,
548 std::string* output) {
549 // Since this is only done during a dictionary load, and hashes are only 8
550 // characters, we just do the simple fixup, rather than rewriting the encoder.
551 base::Base64Encode(input, output);
552 for (size_t i = 0; i < output->size(); ++i) {
553 switch (output->data()[i]) {
554 case '+':
555 (*output)[i] = '-';
556 continue;
557 case '/':
558 (*output)[i] = '_';
559 continue;
560 default:
561 continue;
566 } // namespace net